abstractcore 2.9.1__py3-none-any.whl → 2.11.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. abstractcore/__init__.py +7 -27
  2. abstractcore/apps/extractor.py +33 -100
  3. abstractcore/apps/intent.py +19 -0
  4. abstractcore/apps/judge.py +20 -1
  5. abstractcore/apps/summarizer.py +20 -1
  6. abstractcore/architectures/detection.py +34 -1
  7. abstractcore/architectures/response_postprocessing.py +313 -0
  8. abstractcore/assets/architecture_formats.json +38 -8
  9. abstractcore/assets/model_capabilities.json +781 -160
  10. abstractcore/compression/__init__.py +1 -2
  11. abstractcore/compression/glyph_processor.py +6 -4
  12. abstractcore/config/main.py +31 -19
  13. abstractcore/config/manager.py +389 -11
  14. abstractcore/config/vision_config.py +5 -5
  15. abstractcore/core/interface.py +151 -3
  16. abstractcore/core/session.py +16 -10
  17. abstractcore/download.py +1 -1
  18. abstractcore/embeddings/manager.py +20 -6
  19. abstractcore/endpoint/__init__.py +2 -0
  20. abstractcore/endpoint/app.py +458 -0
  21. abstractcore/mcp/client.py +3 -1
  22. abstractcore/media/__init__.py +52 -17
  23. abstractcore/media/auto_handler.py +42 -22
  24. abstractcore/media/base.py +44 -1
  25. abstractcore/media/capabilities.py +12 -33
  26. abstractcore/media/enrichment.py +105 -0
  27. abstractcore/media/handlers/anthropic_handler.py +19 -28
  28. abstractcore/media/handlers/local_handler.py +124 -70
  29. abstractcore/media/handlers/openai_handler.py +19 -31
  30. abstractcore/media/processors/__init__.py +4 -2
  31. abstractcore/media/processors/audio_processor.py +57 -0
  32. abstractcore/media/processors/office_processor.py +8 -3
  33. abstractcore/media/processors/pdf_processor.py +46 -3
  34. abstractcore/media/processors/text_processor.py +22 -24
  35. abstractcore/media/processors/video_processor.py +58 -0
  36. abstractcore/media/types.py +97 -4
  37. abstractcore/media/utils/image_scaler.py +20 -2
  38. abstractcore/media/utils/video_frames.py +219 -0
  39. abstractcore/media/vision_fallback.py +136 -22
  40. abstractcore/processing/__init__.py +32 -3
  41. abstractcore/processing/basic_deepsearch.py +15 -10
  42. abstractcore/processing/basic_intent.py +3 -2
  43. abstractcore/processing/basic_judge.py +3 -2
  44. abstractcore/processing/basic_summarizer.py +1 -1
  45. abstractcore/providers/__init__.py +3 -1
  46. abstractcore/providers/anthropic_provider.py +95 -8
  47. abstractcore/providers/base.py +1516 -81
  48. abstractcore/providers/huggingface_provider.py +546 -69
  49. abstractcore/providers/lmstudio_provider.py +35 -923
  50. abstractcore/providers/mlx_provider.py +382 -35
  51. abstractcore/providers/model_capabilities.py +5 -1
  52. abstractcore/providers/ollama_provider.py +99 -15
  53. abstractcore/providers/openai_compatible_provider.py +406 -180
  54. abstractcore/providers/openai_provider.py +188 -44
  55. abstractcore/providers/openrouter_provider.py +76 -0
  56. abstractcore/providers/registry.py +61 -5
  57. abstractcore/providers/streaming.py +138 -33
  58. abstractcore/providers/vllm_provider.py +92 -817
  59. abstractcore/server/app.py +461 -13
  60. abstractcore/server/audio_endpoints.py +139 -0
  61. abstractcore/server/vision_endpoints.py +1319 -0
  62. abstractcore/structured/handler.py +316 -41
  63. abstractcore/tools/common_tools.py +5501 -2012
  64. abstractcore/tools/comms_tools.py +1641 -0
  65. abstractcore/tools/core.py +37 -7
  66. abstractcore/tools/handler.py +4 -9
  67. abstractcore/tools/parser.py +49 -2
  68. abstractcore/tools/tag_rewriter.py +2 -1
  69. abstractcore/tools/telegram_tdlib.py +407 -0
  70. abstractcore/tools/telegram_tools.py +261 -0
  71. abstractcore/utils/cli.py +1085 -72
  72. abstractcore/utils/token_utils.py +2 -0
  73. abstractcore/utils/truncation.py +29 -0
  74. abstractcore/utils/version.py +3 -4
  75. abstractcore/utils/vlm_token_calculator.py +12 -2
  76. abstractcore-2.11.2.dist-info/METADATA +562 -0
  77. abstractcore-2.11.2.dist-info/RECORD +133 -0
  78. {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/WHEEL +1 -1
  79. {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/entry_points.txt +1 -0
  80. abstractcore-2.9.1.dist-info/METADATA +0 -1190
  81. abstractcore-2.9.1.dist-info/RECORD +0 -119
  82. {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/licenses/LICENSE +0 -0
  83. {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/top_level.txt +0 -0
@@ -33,6 +33,8 @@ import urllib.parse
33
33
  import argparse
34
34
  import sys
35
35
  import logging
36
+ import threading
37
+ import httpx
36
38
  from typing import List, Dict, Any, Optional, Literal, Union, Iterator, Tuple, Annotated
37
39
  from enum import Enum
38
40
  from fastapi import FastAPI, HTTPException, Request, Query, Body
@@ -117,6 +119,26 @@ app.add_middleware(
117
119
  allow_headers=["*"],
118
120
  )
119
121
 
122
+ # Optional: OpenAI-compatible vision generation endpoints (/v1/images/*).
123
+ # These are safe-by-default and require explicit configuration; see `vision_endpoints.py`.
124
+ try:
125
+ from .vision_endpoints import router as _vision_router
126
+
127
+ app.include_router(_vision_router, prefix="/v1")
128
+ logger.info("🖼️ Vision endpoints enabled at /v1/images/*")
129
+ except Exception as e:
130
+ logger.debug(f"Vision endpoints not loaded: {e}")
131
+
132
+ # Optional: OpenAI-compatible audio endpoints (/v1/audio/*).
133
+ # These delegate to capability plugins (e.g. AbstractVoice) and degrade to 501 when unavailable.
134
+ try:
135
+ from .audio_endpoints import router as _audio_router
136
+
137
+ app.include_router(_audio_router, prefix="/v1")
138
+ logger.info("🔊 Audio endpoints enabled at /v1/audio/*")
139
+ except Exception as e:
140
+ logger.debug(f"Audio endpoints not loaded: {e}")
141
+
120
142
  # ============================================================================
121
143
  # Enhanced Error Handling and Logging Middleware
122
144
  # ============================================================================
@@ -193,9 +215,14 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
193
215
  body=body_json
194
216
  )
195
217
  except json.JSONDecodeError:
218
+ raw = body.decode("utf-8", errors="replace")
219
+ body_text = raw
220
+ if len(body_text) > 1000:
221
+ #[WARNING:TRUNCATION] bounded request-body preview for debug logs
222
+ body_text = body_text[:980].rstrip() + "\n… (truncated)"
196
223
  logger.debug(
197
224
  "📋 Request Body (Validation Error)",
198
- body_text=body.decode('utf-8', errors='replace')[:1000] # Limit to 1000 chars
225
+ body_text=body_text,
199
226
  )
200
227
  except Exception as e:
201
228
  logger.debug(f"Could not read request body for debugging: {e}")
@@ -450,6 +477,14 @@ class ChatCompletionRequest(BaseModel):
450
477
  example=False
451
478
  )
452
479
 
480
+ # Unified thinking/reasoning control (AbstractCore-specific feature)
481
+ thinking: Optional[Union[bool, str]] = Field(
482
+ default=None,
483
+ description="Unified thinking/reasoning control (best-effort across providers/models). "
484
+ "Accepted values: null/'auto'/'on'/'off' or 'low'/'medium'/'high' when supported.",
485
+ example="off",
486
+ )
487
+
453
488
  # Tool calling
454
489
  tools: Optional[List[Dict[str, Any]]] = Field(
455
490
  default=None,
@@ -498,6 +533,13 @@ class ChatCompletionRequest(BaseModel):
498
533
  example=0.0
499
534
  )
500
535
 
536
+ # OpenAI prompt caching (2025+): forwarded best-effort by providers that support it.
537
+ prompt_cache_key: Optional[str] = Field(
538
+ default=None,
539
+ description="Provider-specific prompt cache key for prefix caching (best-effort).",
540
+ example="tenantA:session123"
541
+ )
542
+
501
543
  # Agent format control (AppV2 feature)
502
544
  agent_format: Optional[str] = Field(
503
545
  default=None,
@@ -508,10 +550,18 @@ class ChatCompletionRequest(BaseModel):
508
550
  )
509
551
 
510
552
  # Provider-specific parameters (AbstractCore-specific feature)
553
+ api_key: Optional[str] = Field(
554
+ default=None,
555
+ description="API key for the provider (AbstractCore-specific feature). "
556
+ "Supports all providers requiring authentication: openai, anthropic, openrouter, openai-compatible, huggingface. "
557
+ "If not specified, falls back to provider-specific environment variables "
558
+ "(e.g., OPENAI_API_KEY, ANTHROPIC_API_KEY, OPENROUTER_API_KEY).",
559
+ example=None
560
+ )
511
561
  base_url: Optional[str] = Field(
512
562
  default=None,
513
563
  description="Base URL for the provider API endpoint (AbstractCore-specific feature). "
514
- "Useful for openai-compatible provider to connect to custom endpoints. "
564
+ "Useful for OpenAI-compatible providers (lmstudio, vllm, openrouter, openai-compatible) and custom/proxied endpoints. "
515
565
  "Example: 'http://localhost:1234/v1' for LMStudio, 'http://localhost:8080/v1' for llama.cpp. "
516
566
  "If not specified, uses provider's default or environment variable.",
517
567
  example="http://localhost:1234/v1"
@@ -526,9 +576,17 @@ class ChatCompletionRequest(BaseModel):
526
576
  "Values <= 0 are treated as unlimited.",
527
577
  example=7200.0,
528
578
  )
579
+ unload_after: bool = Field(
580
+ default=False,
581
+ description="If true, call `llm.unload_model(model)` after the request completes (AbstractCore-specific feature). "
582
+ "This is useful for explicit memory hygiene in single-tenant or batch scenarios. "
583
+ "WARNING: for providers that unload shared server state (e.g. Ollama), this can disrupt other "
584
+ "clients and is disabled by default unless explicitly enabled by the server operator.",
585
+ example=False,
586
+ )
529
587
 
530
588
  class Config:
531
- schema_extra = {
589
+ json_schema_extra = {
532
590
  "examples": {
533
591
  "basic_text": {
534
592
  "summary": "Basic Text Chat",
@@ -729,7 +787,25 @@ class ChatCompletionRequest(BaseModel):
729
787
  "seed": 12345,
730
788
  "frequency_penalty": 0.0,
731
789
  "presence_penalty": 0.0,
732
- "agent_format": "auto"
790
+ "agent_format": "auto",
791
+ "api_key": None,
792
+ "base_url": None
793
+ }
794
+ },
795
+ "openrouter_with_api_key": {
796
+ "summary": "OpenRouter with Per-Request API Key",
797
+ "description": "Use OpenRouter with a per-request API key (useful for multi-tenant scenarios)",
798
+ "value": {
799
+ "model": "openrouter/anthropic/claude-3.5-sonnet",
800
+ "messages": [
801
+ {
802
+ "role": "user",
803
+ "content": "Explain quantum computing in simple terms"
804
+ }
805
+ ],
806
+ "api_key": "sk-or-v1-your-openrouter-key",
807
+ "temperature": 0.7,
808
+ "max_tokens": 500
733
809
  }
734
810
  }
735
811
  }
@@ -771,7 +847,7 @@ class EmbeddingRequest(BaseModel):
771
847
  )
772
848
 
773
849
  class Config:
774
- schema_extra = {
850
+ json_schema_extra = {
775
851
  "example": {
776
852
  "input": "this is the story of starship lost in space",
777
853
  "model": "huggingface/sentence-transformers/all-MiniLM-L6-v2",
@@ -792,7 +868,7 @@ class ResponsesAPIRequest(BaseModel):
792
868
  The endpoint automatically detects the format based on the presence of 'input' vs 'messages' field.
793
869
  """
794
870
  class Config:
795
- schema_extra = {
871
+ json_schema_extra = {
796
872
  "oneOf": [
797
873
  {
798
874
  "title": "OpenAI Responses API Format",
@@ -896,6 +972,80 @@ def convert_openai_responses_to_chat_completion(openai_request: OpenAIResponsesR
896
972
  # Helper Functions
897
973
  # ============================================================================
898
974
 
975
+ def _parse_bool_env(var_name: str) -> bool:
976
+ """Parse a boolean environment variable (1/true/yes/on)."""
977
+ val = os.getenv(var_name)
978
+ if val is None:
979
+ return False
980
+ return str(val).strip().lower() in {"1", "true", "yes", "on"}
981
+
982
+
983
+ def _parse_boolish(value: Any) -> bool:
984
+ """Parse a request-supplied bool-ish value (bool/int/str/None)."""
985
+ if value is None:
986
+ return False
987
+ if isinstance(value, bool):
988
+ return value
989
+ if isinstance(value, (int, float)):
990
+ return bool(value)
991
+ if isinstance(value, str):
992
+ normalized = value.strip().lower()
993
+ if normalized in {"1", "true", "yes", "on"}:
994
+ return True
995
+ if normalized in {"0", "false", "no", "off", ""}:
996
+ return False
997
+ raise ValueError(f"Expected boolean, got {type(value).__name__}: {value!r}")
998
+
999
+
1000
+ _OLLAMA_INFLIGHT_LOCK = threading.Lock()
1001
+ _OLLAMA_INFLIGHT_COUNTS: Dict[Tuple[str, str, str], int] = {}
1002
+ _OLLAMA_UNLOAD_REQUESTED: Dict[Tuple[str, str, str], bool] = {}
1003
+
1004
+
1005
+ def _ollama_inflight_key(provider: str, base_url: Optional[str], model: str) -> Tuple[str, str, str]:
1006
+ """Build a stable key for tracking in-flight Ollama requests."""
1007
+ return (provider.strip().lower(), (base_url or "").strip(), model)
1008
+
1009
+
1010
+ def _ollama_inflight_enter(key: Tuple[str, str, str]) -> None:
1011
+ """Increment in-flight counter for an Ollama (provider/base_url/model) key."""
1012
+ with _OLLAMA_INFLIGHT_LOCK:
1013
+ _OLLAMA_INFLIGHT_COUNTS[key] = _OLLAMA_INFLIGHT_COUNTS.get(key, 0) + 1
1014
+
1015
+
1016
+ def _ollama_inflight_exit(key: Tuple[str, str, str], *, unload_after_requested: bool) -> bool:
1017
+ """Decrement in-flight counter and return True if an unload should happen now."""
1018
+ with _OLLAMA_INFLIGHT_LOCK:
1019
+ if unload_after_requested:
1020
+ _OLLAMA_UNLOAD_REQUESTED[key] = True
1021
+
1022
+ current = _OLLAMA_INFLIGHT_COUNTS.get(key, 0)
1023
+ if current <= 1:
1024
+ _OLLAMA_INFLIGHT_COUNTS.pop(key, None)
1025
+ return bool(_OLLAMA_UNLOAD_REQUESTED.pop(key, False))
1026
+
1027
+ _OLLAMA_INFLIGHT_COUNTS[key] = current - 1
1028
+ return False
1029
+
1030
+
1031
+ def _best_effort_unload(llm: Any, *, request_id: str, provider: str, model: str) -> None:
1032
+ """Unload provider resources without failing the request lifecycle."""
1033
+ try:
1034
+ if not hasattr(llm, "unload_model"):
1035
+ raise AttributeError("Provider does not implement unload_model(model_name)")
1036
+ llm.unload_model(model)
1037
+ logger.info("🧹 Provider Unloaded", request_id=request_id, provider=provider, model=model)
1038
+ except Exception as e:
1039
+ logger.warning(
1040
+ "⚠️ Provider unload failed",
1041
+ request_id=request_id,
1042
+ provider=provider,
1043
+ model=model,
1044
+ error=str(e),
1045
+ error_type=type(e).__name__,
1046
+ )
1047
+
1048
+
899
1049
  def parse_model_string(model_string: str) -> tuple[str, str]:
900
1050
  """Parse model string to extract provider and model."""
901
1051
  if not model_string:
@@ -997,12 +1147,205 @@ async def health_check():
997
1147
  ]
998
1148
  }
999
1149
 
1150
+
1151
+ class PromptCacheProxyBase(BaseModel):
1152
+ """Proxy configuration for forwarding AbstractCore prompt-cache control-plane calls."""
1153
+
1154
+ base_url: Optional[str] = Field(
1155
+ default=None,
1156
+ description=(
1157
+ "Upstream base URL for an AbstractEndpoint instance. Can include an OpenAI-style `/v1` suffix "
1158
+ "(it will be stripped when proxying `/acore/prompt_cache/*`)."
1159
+ ),
1160
+ example="http://localhost:8001/v1",
1161
+ )
1162
+ api_key: Optional[str] = Field(
1163
+ default=None,
1164
+ description="Optional upstream API key (sent as Authorization: Bearer ...).",
1165
+ example=None,
1166
+ )
1167
+
1168
+
1169
+ class PromptCacheSetProxyRequest(PromptCacheProxyBase):
1170
+ key: str
1171
+ make_default: bool = True
1172
+ ttl_s: Optional[float] = None
1173
+
1174
+
1175
+ class PromptCacheUpdateProxyRequest(PromptCacheProxyBase):
1176
+ key: str
1177
+ prompt: Optional[str] = None
1178
+ messages: Optional[List[Dict[str, Any]]] = None
1179
+ system_prompt: Optional[str] = None
1180
+ tools: Optional[List[Dict[str, Any]]] = None
1181
+ add_generation_prompt: bool = False
1182
+ ttl_s: Optional[float] = None
1183
+
1184
+
1185
+ class PromptCacheForkProxyRequest(PromptCacheProxyBase):
1186
+ from_key: str
1187
+ to_key: str
1188
+ make_default: bool = False
1189
+ ttl_s: Optional[float] = None
1190
+
1191
+
1192
+ class PromptCacheClearProxyRequest(PromptCacheProxyBase):
1193
+ key: Optional[str] = None
1194
+
1195
+
1196
+ class PromptCachePrepareModulesProxyRequest(PromptCacheProxyBase):
1197
+ namespace: str
1198
+ modules: List[Dict[str, Any]]
1199
+ make_default: bool = False
1200
+ ttl_s: Optional[float] = None
1201
+ version: int = 1
1202
+
1203
+
1204
+ def _normalize_control_plane_base_url(base_url: str) -> str:
1205
+ u = str(base_url or "").strip().rstrip("/")
1206
+ if u.endswith("/v1"):
1207
+ u = u[:-3]
1208
+ return u.rstrip("/")
1209
+
1210
+
1211
+ def _proxy_prompt_cache_request(
1212
+ *,
1213
+ base_url: Optional[str],
1214
+ api_key: Optional[str],
1215
+ method: str,
1216
+ path: str,
1217
+ json_body: Optional[Dict[str, Any]] = None,
1218
+ timeout_s: float = 30.0,
1219
+ ) -> Dict[str, Any]:
1220
+ if not isinstance(base_url, str) or not base_url.strip():
1221
+ return {
1222
+ "supported": False,
1223
+ "error": "base_url is required to proxy prompt cache control plane calls (use AbstractEndpoint)",
1224
+ }
1225
+
1226
+ upstream_root = _normalize_control_plane_base_url(base_url)
1227
+ url = f"{upstream_root}{path}"
1228
+
1229
+ headers: Dict[str, str] = {}
1230
+ if isinstance(api_key, str) and api_key.strip():
1231
+ headers["Authorization"] = f"Bearer {api_key.strip()}"
1232
+
1233
+ try:
1234
+ with httpx.Client(timeout=timeout_s) as client:
1235
+ if method.upper() == "GET":
1236
+ resp = client.get(url, headers=headers)
1237
+ else:
1238
+ resp = client.post(url, headers=headers, json=json_body or {})
1239
+ except Exception as e:
1240
+ return {"supported": False, "error": str(e)}
1241
+
1242
+ try:
1243
+ payload = resp.json()
1244
+ except Exception:
1245
+ payload = {"error": resp.text}
1246
+
1247
+ if resp.status_code >= 400:
1248
+ return {
1249
+ "supported": False,
1250
+ "status_code": int(resp.status_code),
1251
+ "error": payload,
1252
+ "upstream": url,
1253
+ }
1254
+
1255
+ if isinstance(payload, dict):
1256
+ return payload
1257
+ return {"supported": True, "data": payload}
1258
+
1259
+
1260
+ @app.get("/acore/prompt_cache/stats")
1261
+ def acore_prompt_cache_stats(
1262
+ base_url: Optional[str] = Query(None, description="Upstream AbstractEndpoint base_url (optionally including /v1)"),
1263
+ api_key: Optional[str] = Query(None, description="Optional upstream API key"),
1264
+ ):
1265
+ return _proxy_prompt_cache_request(
1266
+ base_url=base_url,
1267
+ api_key=api_key,
1268
+ method="GET",
1269
+ path="/acore/prompt_cache/stats",
1270
+ json_body=None,
1271
+ )
1272
+
1273
+
1274
+ @app.post("/acore/prompt_cache/set")
1275
+ def acore_prompt_cache_set(req: PromptCacheSetProxyRequest):
1276
+ body = req.model_dump(exclude_none=True)
1277
+ base_url = body.pop("base_url", None)
1278
+ api_key = body.pop("api_key", None)
1279
+ return _proxy_prompt_cache_request(
1280
+ base_url=base_url,
1281
+ api_key=api_key,
1282
+ method="POST",
1283
+ path="/acore/prompt_cache/set",
1284
+ json_body=body,
1285
+ )
1286
+
1287
+
1288
+ @app.post("/acore/prompt_cache/update")
1289
+ def acore_prompt_cache_update(req: PromptCacheUpdateProxyRequest):
1290
+ body = req.model_dump(exclude_none=True)
1291
+ base_url = body.pop("base_url", None)
1292
+ api_key = body.pop("api_key", None)
1293
+ return _proxy_prompt_cache_request(
1294
+ base_url=base_url,
1295
+ api_key=api_key,
1296
+ method="POST",
1297
+ path="/acore/prompt_cache/update",
1298
+ json_body=body,
1299
+ )
1300
+
1301
+
1302
+ @app.post("/acore/prompt_cache/fork")
1303
+ def acore_prompt_cache_fork(req: PromptCacheForkProxyRequest):
1304
+ body = req.model_dump(exclude_none=True)
1305
+ base_url = body.pop("base_url", None)
1306
+ api_key = body.pop("api_key", None)
1307
+ return _proxy_prompt_cache_request(
1308
+ base_url=base_url,
1309
+ api_key=api_key,
1310
+ method="POST",
1311
+ path="/acore/prompt_cache/fork",
1312
+ json_body=body,
1313
+ )
1314
+
1315
+
1316
+ @app.post("/acore/prompt_cache/clear")
1317
+ def acore_prompt_cache_clear(req: PromptCacheClearProxyRequest):
1318
+ body = req.model_dump(exclude_none=True)
1319
+ base_url = body.pop("base_url", None)
1320
+ api_key = body.pop("api_key", None)
1321
+ return _proxy_prompt_cache_request(
1322
+ base_url=base_url,
1323
+ api_key=api_key,
1324
+ method="POST",
1325
+ path="/acore/prompt_cache/clear",
1326
+ json_body=body,
1327
+ )
1328
+
1329
+
1330
+ @app.post("/acore/prompt_cache/prepare_modules")
1331
+ def acore_prompt_cache_prepare_modules(req: PromptCachePrepareModulesProxyRequest):
1332
+ body = req.model_dump(exclude_none=True)
1333
+ base_url = body.pop("base_url", None)
1334
+ api_key = body.pop("api_key", None)
1335
+ return _proxy_prompt_cache_request(
1336
+ base_url=base_url,
1337
+ api_key=api_key,
1338
+ method="POST",
1339
+ path="/acore/prompt_cache/prepare_modules",
1340
+ json_body=body,
1341
+ )
1342
+
1343
+
1000
1344
  @app.get("/v1/models")
1001
1345
  async def list_models(
1002
1346
  provider: Optional[str] = Query(
1003
1347
  None,
1004
1348
  description="Filter by provider (e.g., 'ollama', 'openai', 'anthropic', 'lmstudio')",
1005
- example=""
1006
1349
  ),
1007
1350
  input_type: Optional[ModelInputCapability] = Query(
1008
1351
  None,
@@ -1316,6 +1659,16 @@ async def create_response(
1316
1659
  detail={"error": {"message": "Request must contain either 'input' (OpenAI format) or 'messages' (legacy format)", "type": "invalid_request"}}
1317
1660
  )
1318
1661
 
1662
+ # AbstractCore extension: allow opt-in unload-after-request even for OpenAI Responses format.
1663
+ if "unload_after" in request_data:
1664
+ try:
1665
+ chat_request = chat_request.model_copy(update={"unload_after": _parse_boolish(request_data.get("unload_after"))})
1666
+ except Exception as e:
1667
+ raise HTTPException(
1668
+ status_code=422,
1669
+ detail={"error": {"message": f"Invalid unload_after value: {e}", "type": "validation_error"}},
1670
+ )
1671
+
1319
1672
  # Respect user's streaming preference (defaults to False)
1320
1673
 
1321
1674
  # Process using our standard pipeline
@@ -2023,11 +2376,16 @@ async def process_chat_completion(
2023
2376
 
2024
2377
  # Detect target format for tool call syntax
2025
2378
  target_format = detect_target_format(f"{provider}/{model}", request, http_request)
2379
+ user_agent_raw = http_request.headers.get("user-agent", "")
2380
+ user_agent = str(user_agent_raw or "")
2381
+ if len(user_agent) > 50:
2382
+ #[WARNING:TRUNCATION] bounded user-agent capture for request logs
2383
+ user_agent = user_agent[:50].rstrip() + "…"
2026
2384
  logger.info(
2027
2385
  "🎯 Target Format Detected",
2028
2386
  request_id=request_id,
2029
2387
  target_format=target_format.value,
2030
- user_agent=http_request.headers.get("user-agent", "")[:50]
2388
+ user_agent=user_agent,
2031
2389
  )
2032
2390
 
2033
2391
  # Process media from messages
@@ -2052,11 +2410,14 @@ async def process_chat_completion(
2052
2410
  # Validate media files if any were found
2053
2411
  if all_media_files:
2054
2412
  validate_media_files(all_media_files)
2413
+ #[WARNING:TRUNCATION] bounded filename preview for request logs
2414
+ files_preview = [os.path.basename(f) for f in all_media_files[:5]]
2055
2415
  logger.info(
2056
2416
  "📎 Media Files Processed",
2057
2417
  request_id=request_id,
2058
2418
  file_count=len(all_media_files),
2059
- files=[os.path.basename(f) for f in all_media_files[:5]] # Log first 5 filenames
2419
+ files=files_preview,
2420
+ files_truncated=len(all_media_files) > 5,
2060
2421
  )
2061
2422
 
2062
2423
  # Create LLM instance
@@ -2067,6 +2428,13 @@ async def process_chat_completion(
2067
2428
  # Enable trace capture (trace_id) without retaining full trace buffers by default.
2068
2429
  provider_kwargs["enable_tracing"] = True
2069
2430
  provider_kwargs.setdefault("max_traces", 0)
2431
+ if request.api_key:
2432
+ provider_kwargs["api_key"] = request.api_key
2433
+ logger.debug(
2434
+ "🔑 Custom API Key Provided",
2435
+ request_id=request_id,
2436
+ provider=provider
2437
+ )
2070
2438
  if request.base_url:
2071
2439
  provider_kwargs["base_url"] = request.base_url
2072
2440
  logger.info(
@@ -2079,7 +2447,28 @@ async def process_chat_completion(
2079
2447
  # Note: BaseProvider treats non-positive values as "unlimited".
2080
2448
  provider_kwargs["timeout"] = request.timeout_s
2081
2449
 
2450
+ provider_normalized = provider.strip().lower()
2451
+ unload_after_requested = bool(getattr(request, "unload_after", False))
2452
+ allow_unsafe_unload_after = _parse_bool_env("ABSTRACTCORE_ALLOW_UNSAFE_UNLOAD_AFTER")
2453
+ if unload_after_requested and provider_normalized == "ollama" and not allow_unsafe_unload_after:
2454
+ raise HTTPException(
2455
+ status_code=403,
2456
+ detail={
2457
+ "error": {
2458
+ "message": (
2459
+ "unload_after=true is disabled for provider 'ollama' because it can unload shared server "
2460
+ "state and disrupt other clients. Set ABSTRACTCORE_ALLOW_UNSAFE_UNLOAD_AFTER=1 to enable."
2461
+ ),
2462
+ "type": "forbidden",
2463
+ }
2464
+ },
2465
+ )
2466
+
2082
2467
  llm = create_llm(provider, model=model, **provider_kwargs)
2468
+ ollama_key: Optional[Tuple[str, str, str]] = None
2469
+ if provider_normalized == "ollama":
2470
+ ollama_key = _ollama_inflight_key(provider, request.base_url, model)
2471
+ _ollama_inflight_enter(ollama_key)
2083
2472
 
2084
2473
  # Convert messages
2085
2474
  messages = convert_to_abstractcore_messages(processed_messages)
@@ -2103,6 +2492,8 @@ async def process_chat_completion(
2103
2492
  gen_kwargs["trace_metadata"] = trace_metadata
2104
2493
 
2105
2494
  # Add optional parameters
2495
+ if request.thinking is not None:
2496
+ gen_kwargs["thinking"] = request.thinking
2106
2497
  if request.stop:
2107
2498
  gen_kwargs["stop"] = request.stop
2108
2499
  if request.seed:
@@ -2111,6 +2502,8 @@ async def process_chat_completion(
2111
2502
  gen_kwargs["frequency_penalty"] = request.frequency_penalty
2112
2503
  if request.presence_penalty:
2113
2504
  gen_kwargs["presence_penalty"] = request.presence_penalty
2505
+ if isinstance(request.prompt_cache_key, str) and request.prompt_cache_key.strip():
2506
+ gen_kwargs["prompt_cache_key"] = request.prompt_cache_key.strip()
2114
2507
 
2115
2508
  # Generate response
2116
2509
  # Only cleanup files created by this request (with our specific prefixes)
@@ -2128,7 +2521,16 @@ async def process_chat_completion(
2128
2521
  if request.stream:
2129
2522
  return StreamingResponse(
2130
2523
  generate_streaming_response(
2131
- llm, gen_kwargs, provider, model, syntax_rewriter, request_id, temp_files_to_cleanup
2524
+ llm,
2525
+ gen_kwargs,
2526
+ provider,
2527
+ model,
2528
+ syntax_rewriter,
2529
+ request_id,
2530
+ temp_files_to_cleanup,
2531
+ unload_after=unload_after_requested,
2532
+ ollama_key=ollama_key,
2533
+ allow_unsafe_unload_after=allow_unsafe_unload_after,
2132
2534
  ),
2133
2535
  media_type="text/event-stream",
2134
2536
  headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
@@ -2148,9 +2550,22 @@ async def process_chat_completion(
2148
2550
  )
2149
2551
  return openai_response
2150
2552
  finally:
2151
- # Cleanup temporary files (base64 and downloaded images) with delay to avoid race conditions
2152
- import threading
2553
+ if not request.stream:
2554
+ if provider_normalized == "ollama" and ollama_key is not None:
2555
+ should_unload = _ollama_inflight_exit(ollama_key, unload_after_requested=unload_after_requested)
2556
+ if should_unload and allow_unsafe_unload_after:
2557
+ _best_effort_unload(llm, request_id=request_id, provider=provider, model=model)
2558
+ elif should_unload:
2559
+ logger.warning(
2560
+ "⚠️ Unload requested but disabled by server policy",
2561
+ request_id=request_id,
2562
+ provider=provider,
2563
+ model=model,
2564
+ )
2565
+ elif unload_after_requested:
2566
+ _best_effort_unload(llm, request_id=request_id, provider=provider, model=model)
2153
2567
 
2568
+ # Cleanup temporary files (base64 and downloaded images) with delay to avoid race conditions
2154
2569
  def delayed_cleanup():
2155
2570
  """Cleanup temporary files after a short delay to avoid race conditions"""
2156
2571
  time.sleep(1) # Short delay to ensure generation is complete
@@ -2170,6 +2585,8 @@ async def process_chat_completion(
2170
2585
  cleanup_thread = threading.Thread(target=delayed_cleanup, daemon=True)
2171
2586
  cleanup_thread.start()
2172
2587
 
2588
+ except HTTPException:
2589
+ raise
2173
2590
  except Exception as e:
2174
2591
  logger.error(
2175
2592
  "❌ Chat completion failed",
@@ -2189,9 +2606,14 @@ def generate_streaming_response(
2189
2606
  model: str,
2190
2607
  syntax_rewriter: ToolCallSyntaxRewriter,
2191
2608
  request_id: str,
2192
- temp_files_to_cleanup: List[str] = None
2609
+ temp_files_to_cleanup: List[str] = None,
2610
+ *,
2611
+ unload_after: bool = False,
2612
+ ollama_key: Optional[Tuple[str, str, str]] = None,
2613
+ allow_unsafe_unload_after: bool = False,
2193
2614
  ) -> Iterator[str]:
2194
2615
  """Generate OpenAI-compatible streaming response with syntax rewriting."""
2616
+ provider_normalized = provider.strip().lower()
2195
2617
  try:
2196
2618
  chat_id = f"chatcmpl-{uuid.uuid4().hex[:8]}"
2197
2619
  created_time = int(time.time())
@@ -2324,6 +2746,32 @@ def generate_streaming_response(
2324
2746
  )
2325
2747
  error_chunk = {"error": {"message": str(e), "type": "server_error"}}
2326
2748
  yield f"data: {json.dumps(error_chunk)}\n\n"
2749
+ finally:
2750
+ if provider_normalized == "ollama" and ollama_key is not None:
2751
+ try:
2752
+ should_unload = _ollama_inflight_exit(ollama_key, unload_after_requested=unload_after)
2753
+ except Exception as e:
2754
+ logger.warning(
2755
+ "⚠️ Failed to update in-flight unload state",
2756
+ request_id=request_id,
2757
+ provider=provider,
2758
+ model=model,
2759
+ error=str(e),
2760
+ error_type=type(e).__name__,
2761
+ )
2762
+ should_unload = False
2763
+
2764
+ if should_unload and allow_unsafe_unload_after:
2765
+ _best_effort_unload(llm, request_id=request_id, provider=provider, model=model)
2766
+ elif should_unload:
2767
+ logger.warning(
2768
+ "⚠️ Unload requested but disabled by server policy",
2769
+ request_id=request_id,
2770
+ provider=provider,
2771
+ model=model,
2772
+ )
2773
+ elif unload_after:
2774
+ _best_effort_unload(llm, request_id=request_id, provider=provider, model=model)
2327
2775
 
2328
2776
  def convert_to_openai_response(
2329
2777
  response,