abstractcore 2.9.1__py3-none-any.whl → 2.11.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/__init__.py +7 -27
- abstractcore/apps/extractor.py +33 -100
- abstractcore/apps/intent.py +19 -0
- abstractcore/apps/judge.py +20 -1
- abstractcore/apps/summarizer.py +20 -1
- abstractcore/architectures/detection.py +34 -1
- abstractcore/architectures/response_postprocessing.py +313 -0
- abstractcore/assets/architecture_formats.json +38 -8
- abstractcore/assets/model_capabilities.json +781 -160
- abstractcore/compression/__init__.py +1 -2
- abstractcore/compression/glyph_processor.py +6 -4
- abstractcore/config/main.py +31 -19
- abstractcore/config/manager.py +389 -11
- abstractcore/config/vision_config.py +5 -5
- abstractcore/core/interface.py +151 -3
- abstractcore/core/session.py +16 -10
- abstractcore/download.py +1 -1
- abstractcore/embeddings/manager.py +20 -6
- abstractcore/endpoint/__init__.py +2 -0
- abstractcore/endpoint/app.py +458 -0
- abstractcore/mcp/client.py +3 -1
- abstractcore/media/__init__.py +52 -17
- abstractcore/media/auto_handler.py +42 -22
- abstractcore/media/base.py +44 -1
- abstractcore/media/capabilities.py +12 -33
- abstractcore/media/enrichment.py +105 -0
- abstractcore/media/handlers/anthropic_handler.py +19 -28
- abstractcore/media/handlers/local_handler.py +124 -70
- abstractcore/media/handlers/openai_handler.py +19 -31
- abstractcore/media/processors/__init__.py +4 -2
- abstractcore/media/processors/audio_processor.py +57 -0
- abstractcore/media/processors/office_processor.py +8 -3
- abstractcore/media/processors/pdf_processor.py +46 -3
- abstractcore/media/processors/text_processor.py +22 -24
- abstractcore/media/processors/video_processor.py +58 -0
- abstractcore/media/types.py +97 -4
- abstractcore/media/utils/image_scaler.py +20 -2
- abstractcore/media/utils/video_frames.py +219 -0
- abstractcore/media/vision_fallback.py +136 -22
- abstractcore/processing/__init__.py +32 -3
- abstractcore/processing/basic_deepsearch.py +15 -10
- abstractcore/processing/basic_intent.py +3 -2
- abstractcore/processing/basic_judge.py +3 -2
- abstractcore/processing/basic_summarizer.py +1 -1
- abstractcore/providers/__init__.py +3 -1
- abstractcore/providers/anthropic_provider.py +95 -8
- abstractcore/providers/base.py +1516 -81
- abstractcore/providers/huggingface_provider.py +546 -69
- abstractcore/providers/lmstudio_provider.py +35 -923
- abstractcore/providers/mlx_provider.py +382 -35
- abstractcore/providers/model_capabilities.py +5 -1
- abstractcore/providers/ollama_provider.py +99 -15
- abstractcore/providers/openai_compatible_provider.py +406 -180
- abstractcore/providers/openai_provider.py +188 -44
- abstractcore/providers/openrouter_provider.py +76 -0
- abstractcore/providers/registry.py +61 -5
- abstractcore/providers/streaming.py +138 -33
- abstractcore/providers/vllm_provider.py +92 -817
- abstractcore/server/app.py +461 -13
- abstractcore/server/audio_endpoints.py +139 -0
- abstractcore/server/vision_endpoints.py +1319 -0
- abstractcore/structured/handler.py +316 -41
- abstractcore/tools/common_tools.py +5501 -2012
- abstractcore/tools/comms_tools.py +1641 -0
- abstractcore/tools/core.py +37 -7
- abstractcore/tools/handler.py +4 -9
- abstractcore/tools/parser.py +49 -2
- abstractcore/tools/tag_rewriter.py +2 -1
- abstractcore/tools/telegram_tdlib.py +407 -0
- abstractcore/tools/telegram_tools.py +261 -0
- abstractcore/utils/cli.py +1085 -72
- abstractcore/utils/token_utils.py +2 -0
- abstractcore/utils/truncation.py +29 -0
- abstractcore/utils/version.py +3 -4
- abstractcore/utils/vlm_token_calculator.py +12 -2
- abstractcore-2.11.2.dist-info/METADATA +562 -0
- abstractcore-2.11.2.dist-info/RECORD +133 -0
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/WHEEL +1 -1
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/entry_points.txt +1 -0
- abstractcore-2.9.1.dist-info/METADATA +0 -1190
- abstractcore-2.9.1.dist-info/RECORD +0 -119
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/top_level.txt +0 -0
abstractcore/server/app.py
CHANGED
|
@@ -33,6 +33,8 @@ import urllib.parse
|
|
|
33
33
|
import argparse
|
|
34
34
|
import sys
|
|
35
35
|
import logging
|
|
36
|
+
import threading
|
|
37
|
+
import httpx
|
|
36
38
|
from typing import List, Dict, Any, Optional, Literal, Union, Iterator, Tuple, Annotated
|
|
37
39
|
from enum import Enum
|
|
38
40
|
from fastapi import FastAPI, HTTPException, Request, Query, Body
|
|
@@ -117,6 +119,26 @@ app.add_middleware(
|
|
|
117
119
|
allow_headers=["*"],
|
|
118
120
|
)
|
|
119
121
|
|
|
122
|
+
# Optional: OpenAI-compatible vision generation endpoints (/v1/images/*).
|
|
123
|
+
# These are safe-by-default and require explicit configuration; see `vision_endpoints.py`.
|
|
124
|
+
try:
|
|
125
|
+
from .vision_endpoints import router as _vision_router
|
|
126
|
+
|
|
127
|
+
app.include_router(_vision_router, prefix="/v1")
|
|
128
|
+
logger.info("🖼️ Vision endpoints enabled at /v1/images/*")
|
|
129
|
+
except Exception as e:
|
|
130
|
+
logger.debug(f"Vision endpoints not loaded: {e}")
|
|
131
|
+
|
|
132
|
+
# Optional: OpenAI-compatible audio endpoints (/v1/audio/*).
|
|
133
|
+
# These delegate to capability plugins (e.g. AbstractVoice) and degrade to 501 when unavailable.
|
|
134
|
+
try:
|
|
135
|
+
from .audio_endpoints import router as _audio_router
|
|
136
|
+
|
|
137
|
+
app.include_router(_audio_router, prefix="/v1")
|
|
138
|
+
logger.info("🔊 Audio endpoints enabled at /v1/audio/*")
|
|
139
|
+
except Exception as e:
|
|
140
|
+
logger.debug(f"Audio endpoints not loaded: {e}")
|
|
141
|
+
|
|
120
142
|
# ============================================================================
|
|
121
143
|
# Enhanced Error Handling and Logging Middleware
|
|
122
144
|
# ============================================================================
|
|
@@ -193,9 +215,14 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
|
|
|
193
215
|
body=body_json
|
|
194
216
|
)
|
|
195
217
|
except json.JSONDecodeError:
|
|
218
|
+
raw = body.decode("utf-8", errors="replace")
|
|
219
|
+
body_text = raw
|
|
220
|
+
if len(body_text) > 1000:
|
|
221
|
+
#[WARNING:TRUNCATION] bounded request-body preview for debug logs
|
|
222
|
+
body_text = body_text[:980].rstrip() + "\n… (truncated)"
|
|
196
223
|
logger.debug(
|
|
197
224
|
"📋 Request Body (Validation Error)",
|
|
198
|
-
body_text=
|
|
225
|
+
body_text=body_text,
|
|
199
226
|
)
|
|
200
227
|
except Exception as e:
|
|
201
228
|
logger.debug(f"Could not read request body for debugging: {e}")
|
|
@@ -450,6 +477,14 @@ class ChatCompletionRequest(BaseModel):
|
|
|
450
477
|
example=False
|
|
451
478
|
)
|
|
452
479
|
|
|
480
|
+
# Unified thinking/reasoning control (AbstractCore-specific feature)
|
|
481
|
+
thinking: Optional[Union[bool, str]] = Field(
|
|
482
|
+
default=None,
|
|
483
|
+
description="Unified thinking/reasoning control (best-effort across providers/models). "
|
|
484
|
+
"Accepted values: null/'auto'/'on'/'off' or 'low'/'medium'/'high' when supported.",
|
|
485
|
+
example="off",
|
|
486
|
+
)
|
|
487
|
+
|
|
453
488
|
# Tool calling
|
|
454
489
|
tools: Optional[List[Dict[str, Any]]] = Field(
|
|
455
490
|
default=None,
|
|
@@ -498,6 +533,13 @@ class ChatCompletionRequest(BaseModel):
|
|
|
498
533
|
example=0.0
|
|
499
534
|
)
|
|
500
535
|
|
|
536
|
+
# OpenAI prompt caching (2025+): forwarded best-effort by providers that support it.
|
|
537
|
+
prompt_cache_key: Optional[str] = Field(
|
|
538
|
+
default=None,
|
|
539
|
+
description="Provider-specific prompt cache key for prefix caching (best-effort).",
|
|
540
|
+
example="tenantA:session123"
|
|
541
|
+
)
|
|
542
|
+
|
|
501
543
|
# Agent format control (AppV2 feature)
|
|
502
544
|
agent_format: Optional[str] = Field(
|
|
503
545
|
default=None,
|
|
@@ -508,10 +550,18 @@ class ChatCompletionRequest(BaseModel):
|
|
|
508
550
|
)
|
|
509
551
|
|
|
510
552
|
# Provider-specific parameters (AbstractCore-specific feature)
|
|
553
|
+
api_key: Optional[str] = Field(
|
|
554
|
+
default=None,
|
|
555
|
+
description="API key for the provider (AbstractCore-specific feature). "
|
|
556
|
+
"Supports all providers requiring authentication: openai, anthropic, openrouter, openai-compatible, huggingface. "
|
|
557
|
+
"If not specified, falls back to provider-specific environment variables "
|
|
558
|
+
"(e.g., OPENAI_API_KEY, ANTHROPIC_API_KEY, OPENROUTER_API_KEY).",
|
|
559
|
+
example=None
|
|
560
|
+
)
|
|
511
561
|
base_url: Optional[str] = Field(
|
|
512
562
|
default=None,
|
|
513
563
|
description="Base URL for the provider API endpoint (AbstractCore-specific feature). "
|
|
514
|
-
"Useful for
|
|
564
|
+
"Useful for OpenAI-compatible providers (lmstudio, vllm, openrouter, openai-compatible) and custom/proxied endpoints. "
|
|
515
565
|
"Example: 'http://localhost:1234/v1' for LMStudio, 'http://localhost:8080/v1' for llama.cpp. "
|
|
516
566
|
"If not specified, uses provider's default or environment variable.",
|
|
517
567
|
example="http://localhost:1234/v1"
|
|
@@ -526,9 +576,17 @@ class ChatCompletionRequest(BaseModel):
|
|
|
526
576
|
"Values <= 0 are treated as unlimited.",
|
|
527
577
|
example=7200.0,
|
|
528
578
|
)
|
|
579
|
+
unload_after: bool = Field(
|
|
580
|
+
default=False,
|
|
581
|
+
description="If true, call `llm.unload_model(model)` after the request completes (AbstractCore-specific feature). "
|
|
582
|
+
"This is useful for explicit memory hygiene in single-tenant or batch scenarios. "
|
|
583
|
+
"WARNING: for providers that unload shared server state (e.g. Ollama), this can disrupt other "
|
|
584
|
+
"clients and is disabled by default unless explicitly enabled by the server operator.",
|
|
585
|
+
example=False,
|
|
586
|
+
)
|
|
529
587
|
|
|
530
588
|
class Config:
|
|
531
|
-
|
|
589
|
+
json_schema_extra = {
|
|
532
590
|
"examples": {
|
|
533
591
|
"basic_text": {
|
|
534
592
|
"summary": "Basic Text Chat",
|
|
@@ -729,7 +787,25 @@ class ChatCompletionRequest(BaseModel):
|
|
|
729
787
|
"seed": 12345,
|
|
730
788
|
"frequency_penalty": 0.0,
|
|
731
789
|
"presence_penalty": 0.0,
|
|
732
|
-
"agent_format": "auto"
|
|
790
|
+
"agent_format": "auto",
|
|
791
|
+
"api_key": None,
|
|
792
|
+
"base_url": None
|
|
793
|
+
}
|
|
794
|
+
},
|
|
795
|
+
"openrouter_with_api_key": {
|
|
796
|
+
"summary": "OpenRouter with Per-Request API Key",
|
|
797
|
+
"description": "Use OpenRouter with a per-request API key (useful for multi-tenant scenarios)",
|
|
798
|
+
"value": {
|
|
799
|
+
"model": "openrouter/anthropic/claude-3.5-sonnet",
|
|
800
|
+
"messages": [
|
|
801
|
+
{
|
|
802
|
+
"role": "user",
|
|
803
|
+
"content": "Explain quantum computing in simple terms"
|
|
804
|
+
}
|
|
805
|
+
],
|
|
806
|
+
"api_key": "sk-or-v1-your-openrouter-key",
|
|
807
|
+
"temperature": 0.7,
|
|
808
|
+
"max_tokens": 500
|
|
733
809
|
}
|
|
734
810
|
}
|
|
735
811
|
}
|
|
@@ -771,7 +847,7 @@ class EmbeddingRequest(BaseModel):
|
|
|
771
847
|
)
|
|
772
848
|
|
|
773
849
|
class Config:
|
|
774
|
-
|
|
850
|
+
json_schema_extra = {
|
|
775
851
|
"example": {
|
|
776
852
|
"input": "this is the story of starship lost in space",
|
|
777
853
|
"model": "huggingface/sentence-transformers/all-MiniLM-L6-v2",
|
|
@@ -792,7 +868,7 @@ class ResponsesAPIRequest(BaseModel):
|
|
|
792
868
|
The endpoint automatically detects the format based on the presence of 'input' vs 'messages' field.
|
|
793
869
|
"""
|
|
794
870
|
class Config:
|
|
795
|
-
|
|
871
|
+
json_schema_extra = {
|
|
796
872
|
"oneOf": [
|
|
797
873
|
{
|
|
798
874
|
"title": "OpenAI Responses API Format",
|
|
@@ -896,6 +972,80 @@ def convert_openai_responses_to_chat_completion(openai_request: OpenAIResponsesR
|
|
|
896
972
|
# Helper Functions
|
|
897
973
|
# ============================================================================
|
|
898
974
|
|
|
975
|
+
def _parse_bool_env(var_name: str) -> bool:
|
|
976
|
+
"""Parse a boolean environment variable (1/true/yes/on)."""
|
|
977
|
+
val = os.getenv(var_name)
|
|
978
|
+
if val is None:
|
|
979
|
+
return False
|
|
980
|
+
return str(val).strip().lower() in {"1", "true", "yes", "on"}
|
|
981
|
+
|
|
982
|
+
|
|
983
|
+
def _parse_boolish(value: Any) -> bool:
|
|
984
|
+
"""Parse a request-supplied bool-ish value (bool/int/str/None)."""
|
|
985
|
+
if value is None:
|
|
986
|
+
return False
|
|
987
|
+
if isinstance(value, bool):
|
|
988
|
+
return value
|
|
989
|
+
if isinstance(value, (int, float)):
|
|
990
|
+
return bool(value)
|
|
991
|
+
if isinstance(value, str):
|
|
992
|
+
normalized = value.strip().lower()
|
|
993
|
+
if normalized in {"1", "true", "yes", "on"}:
|
|
994
|
+
return True
|
|
995
|
+
if normalized in {"0", "false", "no", "off", ""}:
|
|
996
|
+
return False
|
|
997
|
+
raise ValueError(f"Expected boolean, got {type(value).__name__}: {value!r}")
|
|
998
|
+
|
|
999
|
+
|
|
1000
|
+
_OLLAMA_INFLIGHT_LOCK = threading.Lock()
|
|
1001
|
+
_OLLAMA_INFLIGHT_COUNTS: Dict[Tuple[str, str, str], int] = {}
|
|
1002
|
+
_OLLAMA_UNLOAD_REQUESTED: Dict[Tuple[str, str, str], bool] = {}
|
|
1003
|
+
|
|
1004
|
+
|
|
1005
|
+
def _ollama_inflight_key(provider: str, base_url: Optional[str], model: str) -> Tuple[str, str, str]:
|
|
1006
|
+
"""Build a stable key for tracking in-flight Ollama requests."""
|
|
1007
|
+
return (provider.strip().lower(), (base_url or "").strip(), model)
|
|
1008
|
+
|
|
1009
|
+
|
|
1010
|
+
def _ollama_inflight_enter(key: Tuple[str, str, str]) -> None:
|
|
1011
|
+
"""Increment in-flight counter for an Ollama (provider/base_url/model) key."""
|
|
1012
|
+
with _OLLAMA_INFLIGHT_LOCK:
|
|
1013
|
+
_OLLAMA_INFLIGHT_COUNTS[key] = _OLLAMA_INFLIGHT_COUNTS.get(key, 0) + 1
|
|
1014
|
+
|
|
1015
|
+
|
|
1016
|
+
def _ollama_inflight_exit(key: Tuple[str, str, str], *, unload_after_requested: bool) -> bool:
|
|
1017
|
+
"""Decrement in-flight counter and return True if an unload should happen now."""
|
|
1018
|
+
with _OLLAMA_INFLIGHT_LOCK:
|
|
1019
|
+
if unload_after_requested:
|
|
1020
|
+
_OLLAMA_UNLOAD_REQUESTED[key] = True
|
|
1021
|
+
|
|
1022
|
+
current = _OLLAMA_INFLIGHT_COUNTS.get(key, 0)
|
|
1023
|
+
if current <= 1:
|
|
1024
|
+
_OLLAMA_INFLIGHT_COUNTS.pop(key, None)
|
|
1025
|
+
return bool(_OLLAMA_UNLOAD_REQUESTED.pop(key, False))
|
|
1026
|
+
|
|
1027
|
+
_OLLAMA_INFLIGHT_COUNTS[key] = current - 1
|
|
1028
|
+
return False
|
|
1029
|
+
|
|
1030
|
+
|
|
1031
|
+
def _best_effort_unload(llm: Any, *, request_id: str, provider: str, model: str) -> None:
|
|
1032
|
+
"""Unload provider resources without failing the request lifecycle."""
|
|
1033
|
+
try:
|
|
1034
|
+
if not hasattr(llm, "unload_model"):
|
|
1035
|
+
raise AttributeError("Provider does not implement unload_model(model_name)")
|
|
1036
|
+
llm.unload_model(model)
|
|
1037
|
+
logger.info("🧹 Provider Unloaded", request_id=request_id, provider=provider, model=model)
|
|
1038
|
+
except Exception as e:
|
|
1039
|
+
logger.warning(
|
|
1040
|
+
"⚠️ Provider unload failed",
|
|
1041
|
+
request_id=request_id,
|
|
1042
|
+
provider=provider,
|
|
1043
|
+
model=model,
|
|
1044
|
+
error=str(e),
|
|
1045
|
+
error_type=type(e).__name__,
|
|
1046
|
+
)
|
|
1047
|
+
|
|
1048
|
+
|
|
899
1049
|
def parse_model_string(model_string: str) -> tuple[str, str]:
|
|
900
1050
|
"""Parse model string to extract provider and model."""
|
|
901
1051
|
if not model_string:
|
|
@@ -997,12 +1147,205 @@ async def health_check():
|
|
|
997
1147
|
]
|
|
998
1148
|
}
|
|
999
1149
|
|
|
1150
|
+
|
|
1151
|
+
class PromptCacheProxyBase(BaseModel):
|
|
1152
|
+
"""Proxy configuration for forwarding AbstractCore prompt-cache control-plane calls."""
|
|
1153
|
+
|
|
1154
|
+
base_url: Optional[str] = Field(
|
|
1155
|
+
default=None,
|
|
1156
|
+
description=(
|
|
1157
|
+
"Upstream base URL for an AbstractEndpoint instance. Can include an OpenAI-style `/v1` suffix "
|
|
1158
|
+
"(it will be stripped when proxying `/acore/prompt_cache/*`)."
|
|
1159
|
+
),
|
|
1160
|
+
example="http://localhost:8001/v1",
|
|
1161
|
+
)
|
|
1162
|
+
api_key: Optional[str] = Field(
|
|
1163
|
+
default=None,
|
|
1164
|
+
description="Optional upstream API key (sent as Authorization: Bearer ...).",
|
|
1165
|
+
example=None,
|
|
1166
|
+
)
|
|
1167
|
+
|
|
1168
|
+
|
|
1169
|
+
class PromptCacheSetProxyRequest(PromptCacheProxyBase):
|
|
1170
|
+
key: str
|
|
1171
|
+
make_default: bool = True
|
|
1172
|
+
ttl_s: Optional[float] = None
|
|
1173
|
+
|
|
1174
|
+
|
|
1175
|
+
class PromptCacheUpdateProxyRequest(PromptCacheProxyBase):
|
|
1176
|
+
key: str
|
|
1177
|
+
prompt: Optional[str] = None
|
|
1178
|
+
messages: Optional[List[Dict[str, Any]]] = None
|
|
1179
|
+
system_prompt: Optional[str] = None
|
|
1180
|
+
tools: Optional[List[Dict[str, Any]]] = None
|
|
1181
|
+
add_generation_prompt: bool = False
|
|
1182
|
+
ttl_s: Optional[float] = None
|
|
1183
|
+
|
|
1184
|
+
|
|
1185
|
+
class PromptCacheForkProxyRequest(PromptCacheProxyBase):
|
|
1186
|
+
from_key: str
|
|
1187
|
+
to_key: str
|
|
1188
|
+
make_default: bool = False
|
|
1189
|
+
ttl_s: Optional[float] = None
|
|
1190
|
+
|
|
1191
|
+
|
|
1192
|
+
class PromptCacheClearProxyRequest(PromptCacheProxyBase):
|
|
1193
|
+
key: Optional[str] = None
|
|
1194
|
+
|
|
1195
|
+
|
|
1196
|
+
class PromptCachePrepareModulesProxyRequest(PromptCacheProxyBase):
|
|
1197
|
+
namespace: str
|
|
1198
|
+
modules: List[Dict[str, Any]]
|
|
1199
|
+
make_default: bool = False
|
|
1200
|
+
ttl_s: Optional[float] = None
|
|
1201
|
+
version: int = 1
|
|
1202
|
+
|
|
1203
|
+
|
|
1204
|
+
def _normalize_control_plane_base_url(base_url: str) -> str:
|
|
1205
|
+
u = str(base_url or "").strip().rstrip("/")
|
|
1206
|
+
if u.endswith("/v1"):
|
|
1207
|
+
u = u[:-3]
|
|
1208
|
+
return u.rstrip("/")
|
|
1209
|
+
|
|
1210
|
+
|
|
1211
|
+
def _proxy_prompt_cache_request(
|
|
1212
|
+
*,
|
|
1213
|
+
base_url: Optional[str],
|
|
1214
|
+
api_key: Optional[str],
|
|
1215
|
+
method: str,
|
|
1216
|
+
path: str,
|
|
1217
|
+
json_body: Optional[Dict[str, Any]] = None,
|
|
1218
|
+
timeout_s: float = 30.0,
|
|
1219
|
+
) -> Dict[str, Any]:
|
|
1220
|
+
if not isinstance(base_url, str) or not base_url.strip():
|
|
1221
|
+
return {
|
|
1222
|
+
"supported": False,
|
|
1223
|
+
"error": "base_url is required to proxy prompt cache control plane calls (use AbstractEndpoint)",
|
|
1224
|
+
}
|
|
1225
|
+
|
|
1226
|
+
upstream_root = _normalize_control_plane_base_url(base_url)
|
|
1227
|
+
url = f"{upstream_root}{path}"
|
|
1228
|
+
|
|
1229
|
+
headers: Dict[str, str] = {}
|
|
1230
|
+
if isinstance(api_key, str) and api_key.strip():
|
|
1231
|
+
headers["Authorization"] = f"Bearer {api_key.strip()}"
|
|
1232
|
+
|
|
1233
|
+
try:
|
|
1234
|
+
with httpx.Client(timeout=timeout_s) as client:
|
|
1235
|
+
if method.upper() == "GET":
|
|
1236
|
+
resp = client.get(url, headers=headers)
|
|
1237
|
+
else:
|
|
1238
|
+
resp = client.post(url, headers=headers, json=json_body or {})
|
|
1239
|
+
except Exception as e:
|
|
1240
|
+
return {"supported": False, "error": str(e)}
|
|
1241
|
+
|
|
1242
|
+
try:
|
|
1243
|
+
payload = resp.json()
|
|
1244
|
+
except Exception:
|
|
1245
|
+
payload = {"error": resp.text}
|
|
1246
|
+
|
|
1247
|
+
if resp.status_code >= 400:
|
|
1248
|
+
return {
|
|
1249
|
+
"supported": False,
|
|
1250
|
+
"status_code": int(resp.status_code),
|
|
1251
|
+
"error": payload,
|
|
1252
|
+
"upstream": url,
|
|
1253
|
+
}
|
|
1254
|
+
|
|
1255
|
+
if isinstance(payload, dict):
|
|
1256
|
+
return payload
|
|
1257
|
+
return {"supported": True, "data": payload}
|
|
1258
|
+
|
|
1259
|
+
|
|
1260
|
+
@app.get("/acore/prompt_cache/stats")
|
|
1261
|
+
def acore_prompt_cache_stats(
|
|
1262
|
+
base_url: Optional[str] = Query(None, description="Upstream AbstractEndpoint base_url (optionally including /v1)"),
|
|
1263
|
+
api_key: Optional[str] = Query(None, description="Optional upstream API key"),
|
|
1264
|
+
):
|
|
1265
|
+
return _proxy_prompt_cache_request(
|
|
1266
|
+
base_url=base_url,
|
|
1267
|
+
api_key=api_key,
|
|
1268
|
+
method="GET",
|
|
1269
|
+
path="/acore/prompt_cache/stats",
|
|
1270
|
+
json_body=None,
|
|
1271
|
+
)
|
|
1272
|
+
|
|
1273
|
+
|
|
1274
|
+
@app.post("/acore/prompt_cache/set")
|
|
1275
|
+
def acore_prompt_cache_set(req: PromptCacheSetProxyRequest):
|
|
1276
|
+
body = req.model_dump(exclude_none=True)
|
|
1277
|
+
base_url = body.pop("base_url", None)
|
|
1278
|
+
api_key = body.pop("api_key", None)
|
|
1279
|
+
return _proxy_prompt_cache_request(
|
|
1280
|
+
base_url=base_url,
|
|
1281
|
+
api_key=api_key,
|
|
1282
|
+
method="POST",
|
|
1283
|
+
path="/acore/prompt_cache/set",
|
|
1284
|
+
json_body=body,
|
|
1285
|
+
)
|
|
1286
|
+
|
|
1287
|
+
|
|
1288
|
+
@app.post("/acore/prompt_cache/update")
|
|
1289
|
+
def acore_prompt_cache_update(req: PromptCacheUpdateProxyRequest):
|
|
1290
|
+
body = req.model_dump(exclude_none=True)
|
|
1291
|
+
base_url = body.pop("base_url", None)
|
|
1292
|
+
api_key = body.pop("api_key", None)
|
|
1293
|
+
return _proxy_prompt_cache_request(
|
|
1294
|
+
base_url=base_url,
|
|
1295
|
+
api_key=api_key,
|
|
1296
|
+
method="POST",
|
|
1297
|
+
path="/acore/prompt_cache/update",
|
|
1298
|
+
json_body=body,
|
|
1299
|
+
)
|
|
1300
|
+
|
|
1301
|
+
|
|
1302
|
+
@app.post("/acore/prompt_cache/fork")
|
|
1303
|
+
def acore_prompt_cache_fork(req: PromptCacheForkProxyRequest):
|
|
1304
|
+
body = req.model_dump(exclude_none=True)
|
|
1305
|
+
base_url = body.pop("base_url", None)
|
|
1306
|
+
api_key = body.pop("api_key", None)
|
|
1307
|
+
return _proxy_prompt_cache_request(
|
|
1308
|
+
base_url=base_url,
|
|
1309
|
+
api_key=api_key,
|
|
1310
|
+
method="POST",
|
|
1311
|
+
path="/acore/prompt_cache/fork",
|
|
1312
|
+
json_body=body,
|
|
1313
|
+
)
|
|
1314
|
+
|
|
1315
|
+
|
|
1316
|
+
@app.post("/acore/prompt_cache/clear")
|
|
1317
|
+
def acore_prompt_cache_clear(req: PromptCacheClearProxyRequest):
|
|
1318
|
+
body = req.model_dump(exclude_none=True)
|
|
1319
|
+
base_url = body.pop("base_url", None)
|
|
1320
|
+
api_key = body.pop("api_key", None)
|
|
1321
|
+
return _proxy_prompt_cache_request(
|
|
1322
|
+
base_url=base_url,
|
|
1323
|
+
api_key=api_key,
|
|
1324
|
+
method="POST",
|
|
1325
|
+
path="/acore/prompt_cache/clear",
|
|
1326
|
+
json_body=body,
|
|
1327
|
+
)
|
|
1328
|
+
|
|
1329
|
+
|
|
1330
|
+
@app.post("/acore/prompt_cache/prepare_modules")
|
|
1331
|
+
def acore_prompt_cache_prepare_modules(req: PromptCachePrepareModulesProxyRequest):
|
|
1332
|
+
body = req.model_dump(exclude_none=True)
|
|
1333
|
+
base_url = body.pop("base_url", None)
|
|
1334
|
+
api_key = body.pop("api_key", None)
|
|
1335
|
+
return _proxy_prompt_cache_request(
|
|
1336
|
+
base_url=base_url,
|
|
1337
|
+
api_key=api_key,
|
|
1338
|
+
method="POST",
|
|
1339
|
+
path="/acore/prompt_cache/prepare_modules",
|
|
1340
|
+
json_body=body,
|
|
1341
|
+
)
|
|
1342
|
+
|
|
1343
|
+
|
|
1000
1344
|
@app.get("/v1/models")
|
|
1001
1345
|
async def list_models(
|
|
1002
1346
|
provider: Optional[str] = Query(
|
|
1003
1347
|
None,
|
|
1004
1348
|
description="Filter by provider (e.g., 'ollama', 'openai', 'anthropic', 'lmstudio')",
|
|
1005
|
-
example=""
|
|
1006
1349
|
),
|
|
1007
1350
|
input_type: Optional[ModelInputCapability] = Query(
|
|
1008
1351
|
None,
|
|
@@ -1316,6 +1659,16 @@ async def create_response(
|
|
|
1316
1659
|
detail={"error": {"message": "Request must contain either 'input' (OpenAI format) or 'messages' (legacy format)", "type": "invalid_request"}}
|
|
1317
1660
|
)
|
|
1318
1661
|
|
|
1662
|
+
# AbstractCore extension: allow opt-in unload-after-request even for OpenAI Responses format.
|
|
1663
|
+
if "unload_after" in request_data:
|
|
1664
|
+
try:
|
|
1665
|
+
chat_request = chat_request.model_copy(update={"unload_after": _parse_boolish(request_data.get("unload_after"))})
|
|
1666
|
+
except Exception as e:
|
|
1667
|
+
raise HTTPException(
|
|
1668
|
+
status_code=422,
|
|
1669
|
+
detail={"error": {"message": f"Invalid unload_after value: {e}", "type": "validation_error"}},
|
|
1670
|
+
)
|
|
1671
|
+
|
|
1319
1672
|
# Respect user's streaming preference (defaults to False)
|
|
1320
1673
|
|
|
1321
1674
|
# Process using our standard pipeline
|
|
@@ -2023,11 +2376,16 @@ async def process_chat_completion(
|
|
|
2023
2376
|
|
|
2024
2377
|
# Detect target format for tool call syntax
|
|
2025
2378
|
target_format = detect_target_format(f"{provider}/{model}", request, http_request)
|
|
2379
|
+
user_agent_raw = http_request.headers.get("user-agent", "")
|
|
2380
|
+
user_agent = str(user_agent_raw or "")
|
|
2381
|
+
if len(user_agent) > 50:
|
|
2382
|
+
#[WARNING:TRUNCATION] bounded user-agent capture for request logs
|
|
2383
|
+
user_agent = user_agent[:50].rstrip() + "…"
|
|
2026
2384
|
logger.info(
|
|
2027
2385
|
"🎯 Target Format Detected",
|
|
2028
2386
|
request_id=request_id,
|
|
2029
2387
|
target_format=target_format.value,
|
|
2030
|
-
user_agent=
|
|
2388
|
+
user_agent=user_agent,
|
|
2031
2389
|
)
|
|
2032
2390
|
|
|
2033
2391
|
# Process media from messages
|
|
@@ -2052,11 +2410,14 @@ async def process_chat_completion(
|
|
|
2052
2410
|
# Validate media files if any were found
|
|
2053
2411
|
if all_media_files:
|
|
2054
2412
|
validate_media_files(all_media_files)
|
|
2413
|
+
#[WARNING:TRUNCATION] bounded filename preview for request logs
|
|
2414
|
+
files_preview = [os.path.basename(f) for f in all_media_files[:5]]
|
|
2055
2415
|
logger.info(
|
|
2056
2416
|
"📎 Media Files Processed",
|
|
2057
2417
|
request_id=request_id,
|
|
2058
2418
|
file_count=len(all_media_files),
|
|
2059
|
-
files=
|
|
2419
|
+
files=files_preview,
|
|
2420
|
+
files_truncated=len(all_media_files) > 5,
|
|
2060
2421
|
)
|
|
2061
2422
|
|
|
2062
2423
|
# Create LLM instance
|
|
@@ -2067,6 +2428,13 @@ async def process_chat_completion(
|
|
|
2067
2428
|
# Enable trace capture (trace_id) without retaining full trace buffers by default.
|
|
2068
2429
|
provider_kwargs["enable_tracing"] = True
|
|
2069
2430
|
provider_kwargs.setdefault("max_traces", 0)
|
|
2431
|
+
if request.api_key:
|
|
2432
|
+
provider_kwargs["api_key"] = request.api_key
|
|
2433
|
+
logger.debug(
|
|
2434
|
+
"🔑 Custom API Key Provided",
|
|
2435
|
+
request_id=request_id,
|
|
2436
|
+
provider=provider
|
|
2437
|
+
)
|
|
2070
2438
|
if request.base_url:
|
|
2071
2439
|
provider_kwargs["base_url"] = request.base_url
|
|
2072
2440
|
logger.info(
|
|
@@ -2079,7 +2447,28 @@ async def process_chat_completion(
|
|
|
2079
2447
|
# Note: BaseProvider treats non-positive values as "unlimited".
|
|
2080
2448
|
provider_kwargs["timeout"] = request.timeout_s
|
|
2081
2449
|
|
|
2450
|
+
provider_normalized = provider.strip().lower()
|
|
2451
|
+
unload_after_requested = bool(getattr(request, "unload_after", False))
|
|
2452
|
+
allow_unsafe_unload_after = _parse_bool_env("ABSTRACTCORE_ALLOW_UNSAFE_UNLOAD_AFTER")
|
|
2453
|
+
if unload_after_requested and provider_normalized == "ollama" and not allow_unsafe_unload_after:
|
|
2454
|
+
raise HTTPException(
|
|
2455
|
+
status_code=403,
|
|
2456
|
+
detail={
|
|
2457
|
+
"error": {
|
|
2458
|
+
"message": (
|
|
2459
|
+
"unload_after=true is disabled for provider 'ollama' because it can unload shared server "
|
|
2460
|
+
"state and disrupt other clients. Set ABSTRACTCORE_ALLOW_UNSAFE_UNLOAD_AFTER=1 to enable."
|
|
2461
|
+
),
|
|
2462
|
+
"type": "forbidden",
|
|
2463
|
+
}
|
|
2464
|
+
},
|
|
2465
|
+
)
|
|
2466
|
+
|
|
2082
2467
|
llm = create_llm(provider, model=model, **provider_kwargs)
|
|
2468
|
+
ollama_key: Optional[Tuple[str, str, str]] = None
|
|
2469
|
+
if provider_normalized == "ollama":
|
|
2470
|
+
ollama_key = _ollama_inflight_key(provider, request.base_url, model)
|
|
2471
|
+
_ollama_inflight_enter(ollama_key)
|
|
2083
2472
|
|
|
2084
2473
|
# Convert messages
|
|
2085
2474
|
messages = convert_to_abstractcore_messages(processed_messages)
|
|
@@ -2103,6 +2492,8 @@ async def process_chat_completion(
|
|
|
2103
2492
|
gen_kwargs["trace_metadata"] = trace_metadata
|
|
2104
2493
|
|
|
2105
2494
|
# Add optional parameters
|
|
2495
|
+
if request.thinking is not None:
|
|
2496
|
+
gen_kwargs["thinking"] = request.thinking
|
|
2106
2497
|
if request.stop:
|
|
2107
2498
|
gen_kwargs["stop"] = request.stop
|
|
2108
2499
|
if request.seed:
|
|
@@ -2111,6 +2502,8 @@ async def process_chat_completion(
|
|
|
2111
2502
|
gen_kwargs["frequency_penalty"] = request.frequency_penalty
|
|
2112
2503
|
if request.presence_penalty:
|
|
2113
2504
|
gen_kwargs["presence_penalty"] = request.presence_penalty
|
|
2505
|
+
if isinstance(request.prompt_cache_key, str) and request.prompt_cache_key.strip():
|
|
2506
|
+
gen_kwargs["prompt_cache_key"] = request.prompt_cache_key.strip()
|
|
2114
2507
|
|
|
2115
2508
|
# Generate response
|
|
2116
2509
|
# Only cleanup files created by this request (with our specific prefixes)
|
|
@@ -2128,7 +2521,16 @@ async def process_chat_completion(
|
|
|
2128
2521
|
if request.stream:
|
|
2129
2522
|
return StreamingResponse(
|
|
2130
2523
|
generate_streaming_response(
|
|
2131
|
-
llm,
|
|
2524
|
+
llm,
|
|
2525
|
+
gen_kwargs,
|
|
2526
|
+
provider,
|
|
2527
|
+
model,
|
|
2528
|
+
syntax_rewriter,
|
|
2529
|
+
request_id,
|
|
2530
|
+
temp_files_to_cleanup,
|
|
2531
|
+
unload_after=unload_after_requested,
|
|
2532
|
+
ollama_key=ollama_key,
|
|
2533
|
+
allow_unsafe_unload_after=allow_unsafe_unload_after,
|
|
2132
2534
|
),
|
|
2133
2535
|
media_type="text/event-stream",
|
|
2134
2536
|
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
|
|
@@ -2148,9 +2550,22 @@ async def process_chat_completion(
|
|
|
2148
2550
|
)
|
|
2149
2551
|
return openai_response
|
|
2150
2552
|
finally:
|
|
2151
|
-
|
|
2152
|
-
|
|
2553
|
+
if not request.stream:
|
|
2554
|
+
if provider_normalized == "ollama" and ollama_key is not None:
|
|
2555
|
+
should_unload = _ollama_inflight_exit(ollama_key, unload_after_requested=unload_after_requested)
|
|
2556
|
+
if should_unload and allow_unsafe_unload_after:
|
|
2557
|
+
_best_effort_unload(llm, request_id=request_id, provider=provider, model=model)
|
|
2558
|
+
elif should_unload:
|
|
2559
|
+
logger.warning(
|
|
2560
|
+
"⚠️ Unload requested but disabled by server policy",
|
|
2561
|
+
request_id=request_id,
|
|
2562
|
+
provider=provider,
|
|
2563
|
+
model=model,
|
|
2564
|
+
)
|
|
2565
|
+
elif unload_after_requested:
|
|
2566
|
+
_best_effort_unload(llm, request_id=request_id, provider=provider, model=model)
|
|
2153
2567
|
|
|
2568
|
+
# Cleanup temporary files (base64 and downloaded images) with delay to avoid race conditions
|
|
2154
2569
|
def delayed_cleanup():
|
|
2155
2570
|
"""Cleanup temporary files after a short delay to avoid race conditions"""
|
|
2156
2571
|
time.sleep(1) # Short delay to ensure generation is complete
|
|
@@ -2170,6 +2585,8 @@ async def process_chat_completion(
|
|
|
2170
2585
|
cleanup_thread = threading.Thread(target=delayed_cleanup, daemon=True)
|
|
2171
2586
|
cleanup_thread.start()
|
|
2172
2587
|
|
|
2588
|
+
except HTTPException:
|
|
2589
|
+
raise
|
|
2173
2590
|
except Exception as e:
|
|
2174
2591
|
logger.error(
|
|
2175
2592
|
"❌ Chat completion failed",
|
|
@@ -2189,9 +2606,14 @@ def generate_streaming_response(
|
|
|
2189
2606
|
model: str,
|
|
2190
2607
|
syntax_rewriter: ToolCallSyntaxRewriter,
|
|
2191
2608
|
request_id: str,
|
|
2192
|
-
temp_files_to_cleanup: List[str] = None
|
|
2609
|
+
temp_files_to_cleanup: List[str] = None,
|
|
2610
|
+
*,
|
|
2611
|
+
unload_after: bool = False,
|
|
2612
|
+
ollama_key: Optional[Tuple[str, str, str]] = None,
|
|
2613
|
+
allow_unsafe_unload_after: bool = False,
|
|
2193
2614
|
) -> Iterator[str]:
|
|
2194
2615
|
"""Generate OpenAI-compatible streaming response with syntax rewriting."""
|
|
2616
|
+
provider_normalized = provider.strip().lower()
|
|
2195
2617
|
try:
|
|
2196
2618
|
chat_id = f"chatcmpl-{uuid.uuid4().hex[:8]}"
|
|
2197
2619
|
created_time = int(time.time())
|
|
@@ -2324,6 +2746,32 @@ def generate_streaming_response(
|
|
|
2324
2746
|
)
|
|
2325
2747
|
error_chunk = {"error": {"message": str(e), "type": "server_error"}}
|
|
2326
2748
|
yield f"data: {json.dumps(error_chunk)}\n\n"
|
|
2749
|
+
finally:
|
|
2750
|
+
if provider_normalized == "ollama" and ollama_key is not None:
|
|
2751
|
+
try:
|
|
2752
|
+
should_unload = _ollama_inflight_exit(ollama_key, unload_after_requested=unload_after)
|
|
2753
|
+
except Exception as e:
|
|
2754
|
+
logger.warning(
|
|
2755
|
+
"⚠️ Failed to update in-flight unload state",
|
|
2756
|
+
request_id=request_id,
|
|
2757
|
+
provider=provider,
|
|
2758
|
+
model=model,
|
|
2759
|
+
error=str(e),
|
|
2760
|
+
error_type=type(e).__name__,
|
|
2761
|
+
)
|
|
2762
|
+
should_unload = False
|
|
2763
|
+
|
|
2764
|
+
if should_unload and allow_unsafe_unload_after:
|
|
2765
|
+
_best_effort_unload(llm, request_id=request_id, provider=provider, model=model)
|
|
2766
|
+
elif should_unload:
|
|
2767
|
+
logger.warning(
|
|
2768
|
+
"⚠️ Unload requested but disabled by server policy",
|
|
2769
|
+
request_id=request_id,
|
|
2770
|
+
provider=provider,
|
|
2771
|
+
model=model,
|
|
2772
|
+
)
|
|
2773
|
+
elif unload_after:
|
|
2774
|
+
_best_effort_unload(llm, request_id=request_id, provider=provider, model=model)
|
|
2327
2775
|
|
|
2328
2776
|
def convert_to_openai_response(
|
|
2329
2777
|
response,
|