abstractcore 2.9.1__py3-none-any.whl → 2.11.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/__init__.py +7 -27
- abstractcore/apps/deepsearch.py +9 -4
- abstractcore/apps/extractor.py +33 -100
- abstractcore/apps/intent.py +19 -0
- abstractcore/apps/judge.py +20 -1
- abstractcore/apps/summarizer.py +20 -1
- abstractcore/architectures/detection.py +34 -1
- abstractcore/architectures/response_postprocessing.py +313 -0
- abstractcore/assets/architecture_formats.json +38 -8
- abstractcore/assets/model_capabilities.json +882 -160
- abstractcore/compression/__init__.py +1 -2
- abstractcore/compression/glyph_processor.py +6 -4
- abstractcore/config/main.py +52 -20
- abstractcore/config/manager.py +390 -12
- abstractcore/config/vision_config.py +5 -5
- abstractcore/core/interface.py +151 -3
- abstractcore/core/session.py +16 -10
- abstractcore/download.py +1 -1
- abstractcore/embeddings/manager.py +20 -6
- abstractcore/endpoint/__init__.py +2 -0
- abstractcore/endpoint/app.py +458 -0
- abstractcore/mcp/client.py +3 -1
- abstractcore/media/__init__.py +52 -17
- abstractcore/media/auto_handler.py +42 -22
- abstractcore/media/base.py +44 -1
- abstractcore/media/capabilities.py +12 -33
- abstractcore/media/enrichment.py +105 -0
- abstractcore/media/handlers/anthropic_handler.py +19 -28
- abstractcore/media/handlers/local_handler.py +124 -70
- abstractcore/media/handlers/openai_handler.py +19 -31
- abstractcore/media/processors/__init__.py +4 -2
- abstractcore/media/processors/audio_processor.py +57 -0
- abstractcore/media/processors/office_processor.py +8 -3
- abstractcore/media/processors/pdf_processor.py +46 -3
- abstractcore/media/processors/text_processor.py +22 -24
- abstractcore/media/processors/video_processor.py +58 -0
- abstractcore/media/types.py +97 -4
- abstractcore/media/utils/image_scaler.py +20 -2
- abstractcore/media/utils/video_frames.py +219 -0
- abstractcore/media/vision_fallback.py +136 -22
- abstractcore/processing/__init__.py +32 -3
- abstractcore/processing/basic_deepsearch.py +15 -10
- abstractcore/processing/basic_intent.py +3 -2
- abstractcore/processing/basic_judge.py +3 -2
- abstractcore/processing/basic_summarizer.py +1 -1
- abstractcore/providers/__init__.py +3 -1
- abstractcore/providers/anthropic_provider.py +95 -8
- abstractcore/providers/base.py +1516 -81
- abstractcore/providers/huggingface_provider.py +546 -69
- abstractcore/providers/lmstudio_provider.py +30 -916
- abstractcore/providers/mlx_provider.py +382 -35
- abstractcore/providers/model_capabilities.py +5 -1
- abstractcore/providers/ollama_provider.py +99 -15
- abstractcore/providers/openai_compatible_provider.py +406 -180
- abstractcore/providers/openai_provider.py +188 -44
- abstractcore/providers/openrouter_provider.py +76 -0
- abstractcore/providers/registry.py +61 -5
- abstractcore/providers/streaming.py +138 -33
- abstractcore/providers/vllm_provider.py +92 -817
- abstractcore/server/app.py +478 -28
- abstractcore/server/audio_endpoints.py +139 -0
- abstractcore/server/vision_endpoints.py +1319 -0
- abstractcore/structured/handler.py +316 -41
- abstractcore/tools/common_tools.py +5501 -2012
- abstractcore/tools/comms_tools.py +1641 -0
- abstractcore/tools/core.py +37 -7
- abstractcore/tools/handler.py +4 -9
- abstractcore/tools/parser.py +49 -2
- abstractcore/tools/tag_rewriter.py +2 -1
- abstractcore/tools/telegram_tdlib.py +407 -0
- abstractcore/tools/telegram_tools.py +261 -0
- abstractcore/utils/cli.py +1085 -72
- abstractcore/utils/structured_logging.py +29 -8
- abstractcore/utils/token_utils.py +2 -0
- abstractcore/utils/truncation.py +29 -0
- abstractcore/utils/version.py +3 -4
- abstractcore/utils/vlm_token_calculator.py +12 -2
- abstractcore-2.11.4.dist-info/METADATA +562 -0
- abstractcore-2.11.4.dist-info/RECORD +133 -0
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.4.dist-info}/WHEEL +1 -1
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.4.dist-info}/entry_points.txt +1 -0
- abstractcore-2.9.1.dist-info/METADATA +0 -1190
- abstractcore-2.9.1.dist-info/RECORD +0 -119
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.4.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.4.dist-info}/top_level.txt +0 -0
abstractcore/server/app.py
CHANGED
|
@@ -33,6 +33,8 @@ import urllib.parse
|
|
|
33
33
|
import argparse
|
|
34
34
|
import sys
|
|
35
35
|
import logging
|
|
36
|
+
import threading
|
|
37
|
+
import httpx
|
|
36
38
|
from typing import List, Dict, Any, Optional, Literal, Union, Iterator, Tuple, Annotated
|
|
37
39
|
from enum import Enum
|
|
38
40
|
from fastapi import FastAPI, HTTPException, Request, Query, Body
|
|
@@ -60,25 +62,27 @@ from ..tools.syntax_rewriter import (
|
|
|
60
62
|
# Configuration
|
|
61
63
|
# ============================================================================
|
|
62
64
|
|
|
63
|
-
# Initialize with default logging configuration (can be overridden later)
|
|
65
|
+
# Initialize with default logging configuration (can be overridden later).
|
|
66
|
+
#
|
|
67
|
+
# IMPORTANT: default console verbosity is controlled by AbstractCore's centralized logging defaults
|
|
68
|
+
# (and env overrides like ABSTRACTCORE_CONSOLE_LOG_LEVEL). The server must not force INFO-level
|
|
69
|
+
# console logs on startup.
|
|
64
70
|
debug_mode = os.getenv("ABSTRACTCORE_DEBUG", "false").lower() == "true"
|
|
65
71
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
file_json=True
|
|
76
|
-
)
|
|
72
|
+
if debug_mode:
|
|
73
|
+
configure_logging(
|
|
74
|
+
console_level=logging.DEBUG,
|
|
75
|
+
file_level=logging.DEBUG,
|
|
76
|
+
log_dir="logs",
|
|
77
|
+
verbatim_enabled=True,
|
|
78
|
+
console_json=False,
|
|
79
|
+
file_json=True,
|
|
80
|
+
)
|
|
77
81
|
|
|
78
82
|
# Get initial logger
|
|
79
83
|
logger = get_logger("server")
|
|
80
84
|
|
|
81
|
-
# Log initial startup with debug mode status
|
|
85
|
+
# Log initial startup with debug mode status (may be suppressed by console level).
|
|
82
86
|
logger.info("🚀 AbstractCore Server Initializing", version=__version__, debug_mode=debug_mode)
|
|
83
87
|
|
|
84
88
|
def reconfigure_for_debug():
|
|
@@ -117,6 +121,26 @@ app.add_middleware(
|
|
|
117
121
|
allow_headers=["*"],
|
|
118
122
|
)
|
|
119
123
|
|
|
124
|
+
# Optional: OpenAI-compatible vision generation endpoints (/v1/images/*).
|
|
125
|
+
# These are safe-by-default and require explicit configuration; see `vision_endpoints.py`.
|
|
126
|
+
try:
|
|
127
|
+
from .vision_endpoints import router as _vision_router
|
|
128
|
+
|
|
129
|
+
app.include_router(_vision_router, prefix="/v1")
|
|
130
|
+
logger.info("🖼️ Vision endpoints enabled at /v1/images/*")
|
|
131
|
+
except Exception as e:
|
|
132
|
+
logger.debug(f"Vision endpoints not loaded: {e}")
|
|
133
|
+
|
|
134
|
+
# Optional: OpenAI-compatible audio endpoints (/v1/audio/*).
|
|
135
|
+
# These delegate to capability plugins (e.g. AbstractVoice) and degrade to 501 when unavailable.
|
|
136
|
+
try:
|
|
137
|
+
from .audio_endpoints import router as _audio_router
|
|
138
|
+
|
|
139
|
+
app.include_router(_audio_router, prefix="/v1")
|
|
140
|
+
logger.info("🔊 Audio endpoints enabled at /v1/audio/*")
|
|
141
|
+
except Exception as e:
|
|
142
|
+
logger.debug(f"Audio endpoints not loaded: {e}")
|
|
143
|
+
|
|
120
144
|
# ============================================================================
|
|
121
145
|
# Enhanced Error Handling and Logging Middleware
|
|
122
146
|
# ============================================================================
|
|
@@ -193,9 +217,14 @@ async def validation_exception_handler(request: Request, exc: RequestValidationE
|
|
|
193
217
|
body=body_json
|
|
194
218
|
)
|
|
195
219
|
except json.JSONDecodeError:
|
|
220
|
+
raw = body.decode("utf-8", errors="replace")
|
|
221
|
+
body_text = raw
|
|
222
|
+
if len(body_text) > 1000:
|
|
223
|
+
#[WARNING:TRUNCATION] bounded request-body preview for debug logs
|
|
224
|
+
body_text = body_text[:980].rstrip() + "\n… (truncated)"
|
|
196
225
|
logger.debug(
|
|
197
226
|
"📋 Request Body (Validation Error)",
|
|
198
|
-
body_text=
|
|
227
|
+
body_text=body_text,
|
|
199
228
|
)
|
|
200
229
|
except Exception as e:
|
|
201
230
|
logger.debug(f"Could not read request body for debugging: {e}")
|
|
@@ -450,6 +479,14 @@ class ChatCompletionRequest(BaseModel):
|
|
|
450
479
|
example=False
|
|
451
480
|
)
|
|
452
481
|
|
|
482
|
+
# Unified thinking/reasoning control (AbstractCore-specific feature)
|
|
483
|
+
thinking: Optional[Union[bool, str]] = Field(
|
|
484
|
+
default=None,
|
|
485
|
+
description="Unified thinking/reasoning control (best-effort across providers/models). "
|
|
486
|
+
"Accepted values: null/'auto'/'on'/'off' or 'low'/'medium'/'high' when supported.",
|
|
487
|
+
example="off",
|
|
488
|
+
)
|
|
489
|
+
|
|
453
490
|
# Tool calling
|
|
454
491
|
tools: Optional[List[Dict[str, Any]]] = Field(
|
|
455
492
|
default=None,
|
|
@@ -498,6 +535,13 @@ class ChatCompletionRequest(BaseModel):
|
|
|
498
535
|
example=0.0
|
|
499
536
|
)
|
|
500
537
|
|
|
538
|
+
# OpenAI prompt caching (2025+): forwarded best-effort by providers that support it.
|
|
539
|
+
prompt_cache_key: Optional[str] = Field(
|
|
540
|
+
default=None,
|
|
541
|
+
description="Provider-specific prompt cache key for prefix caching (best-effort).",
|
|
542
|
+
example="tenantA:session123"
|
|
543
|
+
)
|
|
544
|
+
|
|
501
545
|
# Agent format control (AppV2 feature)
|
|
502
546
|
agent_format: Optional[str] = Field(
|
|
503
547
|
default=None,
|
|
@@ -508,10 +552,18 @@ class ChatCompletionRequest(BaseModel):
|
|
|
508
552
|
)
|
|
509
553
|
|
|
510
554
|
# Provider-specific parameters (AbstractCore-specific feature)
|
|
555
|
+
api_key: Optional[str] = Field(
|
|
556
|
+
default=None,
|
|
557
|
+
description="API key for the provider (AbstractCore-specific feature). "
|
|
558
|
+
"Supports all providers requiring authentication: openai, anthropic, openrouter, openai-compatible, huggingface. "
|
|
559
|
+
"If not specified, falls back to provider-specific environment variables "
|
|
560
|
+
"(e.g., OPENAI_API_KEY, ANTHROPIC_API_KEY, OPENROUTER_API_KEY).",
|
|
561
|
+
example=None
|
|
562
|
+
)
|
|
511
563
|
base_url: Optional[str] = Field(
|
|
512
564
|
default=None,
|
|
513
565
|
description="Base URL for the provider API endpoint (AbstractCore-specific feature). "
|
|
514
|
-
"Useful for
|
|
566
|
+
"Useful for OpenAI-compatible providers (lmstudio, vllm, openrouter, openai-compatible) and custom/proxied endpoints. "
|
|
515
567
|
"Example: 'http://localhost:1234/v1' for LMStudio, 'http://localhost:8080/v1' for llama.cpp. "
|
|
516
568
|
"If not specified, uses provider's default or environment variable.",
|
|
517
569
|
example="http://localhost:1234/v1"
|
|
@@ -526,9 +578,17 @@ class ChatCompletionRequest(BaseModel):
|
|
|
526
578
|
"Values <= 0 are treated as unlimited.",
|
|
527
579
|
example=7200.0,
|
|
528
580
|
)
|
|
581
|
+
unload_after: bool = Field(
|
|
582
|
+
default=False,
|
|
583
|
+
description="If true, call `llm.unload_model(model)` after the request completes (AbstractCore-specific feature). "
|
|
584
|
+
"This is useful for explicit memory hygiene in single-tenant or batch scenarios. "
|
|
585
|
+
"WARNING: for providers that unload shared server state (e.g. Ollama), this can disrupt other "
|
|
586
|
+
"clients and is disabled by default unless explicitly enabled by the server operator.",
|
|
587
|
+
example=False,
|
|
588
|
+
)
|
|
529
589
|
|
|
530
590
|
class Config:
|
|
531
|
-
|
|
591
|
+
json_schema_extra = {
|
|
532
592
|
"examples": {
|
|
533
593
|
"basic_text": {
|
|
534
594
|
"summary": "Basic Text Chat",
|
|
@@ -729,7 +789,25 @@ class ChatCompletionRequest(BaseModel):
|
|
|
729
789
|
"seed": 12345,
|
|
730
790
|
"frequency_penalty": 0.0,
|
|
731
791
|
"presence_penalty": 0.0,
|
|
732
|
-
"agent_format": "auto"
|
|
792
|
+
"agent_format": "auto",
|
|
793
|
+
"api_key": None,
|
|
794
|
+
"base_url": None
|
|
795
|
+
}
|
|
796
|
+
},
|
|
797
|
+
"openrouter_with_api_key": {
|
|
798
|
+
"summary": "OpenRouter with Per-Request API Key",
|
|
799
|
+
"description": "Use OpenRouter with a per-request API key (useful for multi-tenant scenarios)",
|
|
800
|
+
"value": {
|
|
801
|
+
"model": "openrouter/anthropic/claude-3.5-sonnet",
|
|
802
|
+
"messages": [
|
|
803
|
+
{
|
|
804
|
+
"role": "user",
|
|
805
|
+
"content": "Explain quantum computing in simple terms"
|
|
806
|
+
}
|
|
807
|
+
],
|
|
808
|
+
"api_key": "sk-or-v1-your-openrouter-key",
|
|
809
|
+
"temperature": 0.7,
|
|
810
|
+
"max_tokens": 500
|
|
733
811
|
}
|
|
734
812
|
}
|
|
735
813
|
}
|
|
@@ -771,7 +849,7 @@ class EmbeddingRequest(BaseModel):
|
|
|
771
849
|
)
|
|
772
850
|
|
|
773
851
|
class Config:
|
|
774
|
-
|
|
852
|
+
json_schema_extra = {
|
|
775
853
|
"example": {
|
|
776
854
|
"input": "this is the story of starship lost in space",
|
|
777
855
|
"model": "huggingface/sentence-transformers/all-MiniLM-L6-v2",
|
|
@@ -792,7 +870,7 @@ class ResponsesAPIRequest(BaseModel):
|
|
|
792
870
|
The endpoint automatically detects the format based on the presence of 'input' vs 'messages' field.
|
|
793
871
|
"""
|
|
794
872
|
class Config:
|
|
795
|
-
|
|
873
|
+
json_schema_extra = {
|
|
796
874
|
"oneOf": [
|
|
797
875
|
{
|
|
798
876
|
"title": "OpenAI Responses API Format",
|
|
@@ -896,6 +974,80 @@ def convert_openai_responses_to_chat_completion(openai_request: OpenAIResponsesR
|
|
|
896
974
|
# Helper Functions
|
|
897
975
|
# ============================================================================
|
|
898
976
|
|
|
977
|
+
def _parse_bool_env(var_name: str) -> bool:
|
|
978
|
+
"""Parse a boolean environment variable (1/true/yes/on)."""
|
|
979
|
+
val = os.getenv(var_name)
|
|
980
|
+
if val is None:
|
|
981
|
+
return False
|
|
982
|
+
return str(val).strip().lower() in {"1", "true", "yes", "on"}
|
|
983
|
+
|
|
984
|
+
|
|
985
|
+
def _parse_boolish(value: Any) -> bool:
|
|
986
|
+
"""Parse a request-supplied bool-ish value (bool/int/str/None)."""
|
|
987
|
+
if value is None:
|
|
988
|
+
return False
|
|
989
|
+
if isinstance(value, bool):
|
|
990
|
+
return value
|
|
991
|
+
if isinstance(value, (int, float)):
|
|
992
|
+
return bool(value)
|
|
993
|
+
if isinstance(value, str):
|
|
994
|
+
normalized = value.strip().lower()
|
|
995
|
+
if normalized in {"1", "true", "yes", "on"}:
|
|
996
|
+
return True
|
|
997
|
+
if normalized in {"0", "false", "no", "off", ""}:
|
|
998
|
+
return False
|
|
999
|
+
raise ValueError(f"Expected boolean, got {type(value).__name__}: {value!r}")
|
|
1000
|
+
|
|
1001
|
+
|
|
1002
|
+
_OLLAMA_INFLIGHT_LOCK = threading.Lock()
|
|
1003
|
+
_OLLAMA_INFLIGHT_COUNTS: Dict[Tuple[str, str, str], int] = {}
|
|
1004
|
+
_OLLAMA_UNLOAD_REQUESTED: Dict[Tuple[str, str, str], bool] = {}
|
|
1005
|
+
|
|
1006
|
+
|
|
1007
|
+
def _ollama_inflight_key(provider: str, base_url: Optional[str], model: str) -> Tuple[str, str, str]:
|
|
1008
|
+
"""Build a stable key for tracking in-flight Ollama requests."""
|
|
1009
|
+
return (provider.strip().lower(), (base_url or "").strip(), model)
|
|
1010
|
+
|
|
1011
|
+
|
|
1012
|
+
def _ollama_inflight_enter(key: Tuple[str, str, str]) -> None:
|
|
1013
|
+
"""Increment in-flight counter for an Ollama (provider/base_url/model) key."""
|
|
1014
|
+
with _OLLAMA_INFLIGHT_LOCK:
|
|
1015
|
+
_OLLAMA_INFLIGHT_COUNTS[key] = _OLLAMA_INFLIGHT_COUNTS.get(key, 0) + 1
|
|
1016
|
+
|
|
1017
|
+
|
|
1018
|
+
def _ollama_inflight_exit(key: Tuple[str, str, str], *, unload_after_requested: bool) -> bool:
|
|
1019
|
+
"""Decrement in-flight counter and return True if an unload should happen now."""
|
|
1020
|
+
with _OLLAMA_INFLIGHT_LOCK:
|
|
1021
|
+
if unload_after_requested:
|
|
1022
|
+
_OLLAMA_UNLOAD_REQUESTED[key] = True
|
|
1023
|
+
|
|
1024
|
+
current = _OLLAMA_INFLIGHT_COUNTS.get(key, 0)
|
|
1025
|
+
if current <= 1:
|
|
1026
|
+
_OLLAMA_INFLIGHT_COUNTS.pop(key, None)
|
|
1027
|
+
return bool(_OLLAMA_UNLOAD_REQUESTED.pop(key, False))
|
|
1028
|
+
|
|
1029
|
+
_OLLAMA_INFLIGHT_COUNTS[key] = current - 1
|
|
1030
|
+
return False
|
|
1031
|
+
|
|
1032
|
+
|
|
1033
|
+
def _best_effort_unload(llm: Any, *, request_id: str, provider: str, model: str) -> None:
|
|
1034
|
+
"""Unload provider resources without failing the request lifecycle."""
|
|
1035
|
+
try:
|
|
1036
|
+
if not hasattr(llm, "unload_model"):
|
|
1037
|
+
raise AttributeError("Provider does not implement unload_model(model_name)")
|
|
1038
|
+
llm.unload_model(model)
|
|
1039
|
+
logger.info("🧹 Provider Unloaded", request_id=request_id, provider=provider, model=model)
|
|
1040
|
+
except Exception as e:
|
|
1041
|
+
logger.warning(
|
|
1042
|
+
"⚠️ Provider unload failed",
|
|
1043
|
+
request_id=request_id,
|
|
1044
|
+
provider=provider,
|
|
1045
|
+
model=model,
|
|
1046
|
+
error=str(e),
|
|
1047
|
+
error_type=type(e).__name__,
|
|
1048
|
+
)
|
|
1049
|
+
|
|
1050
|
+
|
|
899
1051
|
def parse_model_string(model_string: str) -> tuple[str, str]:
|
|
900
1052
|
"""Parse model string to extract provider and model."""
|
|
901
1053
|
if not model_string:
|
|
@@ -997,12 +1149,205 @@ async def health_check():
|
|
|
997
1149
|
]
|
|
998
1150
|
}
|
|
999
1151
|
|
|
1152
|
+
|
|
1153
|
+
class PromptCacheProxyBase(BaseModel):
|
|
1154
|
+
"""Proxy configuration for forwarding AbstractCore prompt-cache control-plane calls."""
|
|
1155
|
+
|
|
1156
|
+
base_url: Optional[str] = Field(
|
|
1157
|
+
default=None,
|
|
1158
|
+
description=(
|
|
1159
|
+
"Upstream base URL for an AbstractEndpoint instance. Can include an OpenAI-style `/v1` suffix "
|
|
1160
|
+
"(it will be stripped when proxying `/acore/prompt_cache/*`)."
|
|
1161
|
+
),
|
|
1162
|
+
example="http://localhost:8001/v1",
|
|
1163
|
+
)
|
|
1164
|
+
api_key: Optional[str] = Field(
|
|
1165
|
+
default=None,
|
|
1166
|
+
description="Optional upstream API key (sent as Authorization: Bearer ...).",
|
|
1167
|
+
example=None,
|
|
1168
|
+
)
|
|
1169
|
+
|
|
1170
|
+
|
|
1171
|
+
class PromptCacheSetProxyRequest(PromptCacheProxyBase):
|
|
1172
|
+
key: str
|
|
1173
|
+
make_default: bool = True
|
|
1174
|
+
ttl_s: Optional[float] = None
|
|
1175
|
+
|
|
1176
|
+
|
|
1177
|
+
class PromptCacheUpdateProxyRequest(PromptCacheProxyBase):
|
|
1178
|
+
key: str
|
|
1179
|
+
prompt: Optional[str] = None
|
|
1180
|
+
messages: Optional[List[Dict[str, Any]]] = None
|
|
1181
|
+
system_prompt: Optional[str] = None
|
|
1182
|
+
tools: Optional[List[Dict[str, Any]]] = None
|
|
1183
|
+
add_generation_prompt: bool = False
|
|
1184
|
+
ttl_s: Optional[float] = None
|
|
1185
|
+
|
|
1186
|
+
|
|
1187
|
+
class PromptCacheForkProxyRequest(PromptCacheProxyBase):
|
|
1188
|
+
from_key: str
|
|
1189
|
+
to_key: str
|
|
1190
|
+
make_default: bool = False
|
|
1191
|
+
ttl_s: Optional[float] = None
|
|
1192
|
+
|
|
1193
|
+
|
|
1194
|
+
class PromptCacheClearProxyRequest(PromptCacheProxyBase):
|
|
1195
|
+
key: Optional[str] = None
|
|
1196
|
+
|
|
1197
|
+
|
|
1198
|
+
class PromptCachePrepareModulesProxyRequest(PromptCacheProxyBase):
|
|
1199
|
+
namespace: str
|
|
1200
|
+
modules: List[Dict[str, Any]]
|
|
1201
|
+
make_default: bool = False
|
|
1202
|
+
ttl_s: Optional[float] = None
|
|
1203
|
+
version: int = 1
|
|
1204
|
+
|
|
1205
|
+
|
|
1206
|
+
def _normalize_control_plane_base_url(base_url: str) -> str:
|
|
1207
|
+
u = str(base_url or "").strip().rstrip("/")
|
|
1208
|
+
if u.endswith("/v1"):
|
|
1209
|
+
u = u[:-3]
|
|
1210
|
+
return u.rstrip("/")
|
|
1211
|
+
|
|
1212
|
+
|
|
1213
|
+
def _proxy_prompt_cache_request(
|
|
1214
|
+
*,
|
|
1215
|
+
base_url: Optional[str],
|
|
1216
|
+
api_key: Optional[str],
|
|
1217
|
+
method: str,
|
|
1218
|
+
path: str,
|
|
1219
|
+
json_body: Optional[Dict[str, Any]] = None,
|
|
1220
|
+
timeout_s: float = 30.0,
|
|
1221
|
+
) -> Dict[str, Any]:
|
|
1222
|
+
if not isinstance(base_url, str) or not base_url.strip():
|
|
1223
|
+
return {
|
|
1224
|
+
"supported": False,
|
|
1225
|
+
"error": "base_url is required to proxy prompt cache control plane calls (use AbstractEndpoint)",
|
|
1226
|
+
}
|
|
1227
|
+
|
|
1228
|
+
upstream_root = _normalize_control_plane_base_url(base_url)
|
|
1229
|
+
url = f"{upstream_root}{path}"
|
|
1230
|
+
|
|
1231
|
+
headers: Dict[str, str] = {}
|
|
1232
|
+
if isinstance(api_key, str) and api_key.strip():
|
|
1233
|
+
headers["Authorization"] = f"Bearer {api_key.strip()}"
|
|
1234
|
+
|
|
1235
|
+
try:
|
|
1236
|
+
with httpx.Client(timeout=timeout_s) as client:
|
|
1237
|
+
if method.upper() == "GET":
|
|
1238
|
+
resp = client.get(url, headers=headers)
|
|
1239
|
+
else:
|
|
1240
|
+
resp = client.post(url, headers=headers, json=json_body or {})
|
|
1241
|
+
except Exception as e:
|
|
1242
|
+
return {"supported": False, "error": str(e)}
|
|
1243
|
+
|
|
1244
|
+
try:
|
|
1245
|
+
payload = resp.json()
|
|
1246
|
+
except Exception:
|
|
1247
|
+
payload = {"error": resp.text}
|
|
1248
|
+
|
|
1249
|
+
if resp.status_code >= 400:
|
|
1250
|
+
return {
|
|
1251
|
+
"supported": False,
|
|
1252
|
+
"status_code": int(resp.status_code),
|
|
1253
|
+
"error": payload,
|
|
1254
|
+
"upstream": url,
|
|
1255
|
+
}
|
|
1256
|
+
|
|
1257
|
+
if isinstance(payload, dict):
|
|
1258
|
+
return payload
|
|
1259
|
+
return {"supported": True, "data": payload}
|
|
1260
|
+
|
|
1261
|
+
|
|
1262
|
+
@app.get("/acore/prompt_cache/stats")
|
|
1263
|
+
def acore_prompt_cache_stats(
|
|
1264
|
+
base_url: Optional[str] = Query(None, description="Upstream AbstractEndpoint base_url (optionally including /v1)"),
|
|
1265
|
+
api_key: Optional[str] = Query(None, description="Optional upstream API key"),
|
|
1266
|
+
):
|
|
1267
|
+
return _proxy_prompt_cache_request(
|
|
1268
|
+
base_url=base_url,
|
|
1269
|
+
api_key=api_key,
|
|
1270
|
+
method="GET",
|
|
1271
|
+
path="/acore/prompt_cache/stats",
|
|
1272
|
+
json_body=None,
|
|
1273
|
+
)
|
|
1274
|
+
|
|
1275
|
+
|
|
1276
|
+
@app.post("/acore/prompt_cache/set")
|
|
1277
|
+
def acore_prompt_cache_set(req: PromptCacheSetProxyRequest):
|
|
1278
|
+
body = req.model_dump(exclude_none=True)
|
|
1279
|
+
base_url = body.pop("base_url", None)
|
|
1280
|
+
api_key = body.pop("api_key", None)
|
|
1281
|
+
return _proxy_prompt_cache_request(
|
|
1282
|
+
base_url=base_url,
|
|
1283
|
+
api_key=api_key,
|
|
1284
|
+
method="POST",
|
|
1285
|
+
path="/acore/prompt_cache/set",
|
|
1286
|
+
json_body=body,
|
|
1287
|
+
)
|
|
1288
|
+
|
|
1289
|
+
|
|
1290
|
+
@app.post("/acore/prompt_cache/update")
|
|
1291
|
+
def acore_prompt_cache_update(req: PromptCacheUpdateProxyRequest):
|
|
1292
|
+
body = req.model_dump(exclude_none=True)
|
|
1293
|
+
base_url = body.pop("base_url", None)
|
|
1294
|
+
api_key = body.pop("api_key", None)
|
|
1295
|
+
return _proxy_prompt_cache_request(
|
|
1296
|
+
base_url=base_url,
|
|
1297
|
+
api_key=api_key,
|
|
1298
|
+
method="POST",
|
|
1299
|
+
path="/acore/prompt_cache/update",
|
|
1300
|
+
json_body=body,
|
|
1301
|
+
)
|
|
1302
|
+
|
|
1303
|
+
|
|
1304
|
+
@app.post("/acore/prompt_cache/fork")
|
|
1305
|
+
def acore_prompt_cache_fork(req: PromptCacheForkProxyRequest):
|
|
1306
|
+
body = req.model_dump(exclude_none=True)
|
|
1307
|
+
base_url = body.pop("base_url", None)
|
|
1308
|
+
api_key = body.pop("api_key", None)
|
|
1309
|
+
return _proxy_prompt_cache_request(
|
|
1310
|
+
base_url=base_url,
|
|
1311
|
+
api_key=api_key,
|
|
1312
|
+
method="POST",
|
|
1313
|
+
path="/acore/prompt_cache/fork",
|
|
1314
|
+
json_body=body,
|
|
1315
|
+
)
|
|
1316
|
+
|
|
1317
|
+
|
|
1318
|
+
@app.post("/acore/prompt_cache/clear")
|
|
1319
|
+
def acore_prompt_cache_clear(req: PromptCacheClearProxyRequest):
|
|
1320
|
+
body = req.model_dump(exclude_none=True)
|
|
1321
|
+
base_url = body.pop("base_url", None)
|
|
1322
|
+
api_key = body.pop("api_key", None)
|
|
1323
|
+
return _proxy_prompt_cache_request(
|
|
1324
|
+
base_url=base_url,
|
|
1325
|
+
api_key=api_key,
|
|
1326
|
+
method="POST",
|
|
1327
|
+
path="/acore/prompt_cache/clear",
|
|
1328
|
+
json_body=body,
|
|
1329
|
+
)
|
|
1330
|
+
|
|
1331
|
+
|
|
1332
|
+
@app.post("/acore/prompt_cache/prepare_modules")
|
|
1333
|
+
def acore_prompt_cache_prepare_modules(req: PromptCachePrepareModulesProxyRequest):
|
|
1334
|
+
body = req.model_dump(exclude_none=True)
|
|
1335
|
+
base_url = body.pop("base_url", None)
|
|
1336
|
+
api_key = body.pop("api_key", None)
|
|
1337
|
+
return _proxy_prompt_cache_request(
|
|
1338
|
+
base_url=base_url,
|
|
1339
|
+
api_key=api_key,
|
|
1340
|
+
method="POST",
|
|
1341
|
+
path="/acore/prompt_cache/prepare_modules",
|
|
1342
|
+
json_body=body,
|
|
1343
|
+
)
|
|
1344
|
+
|
|
1345
|
+
|
|
1000
1346
|
@app.get("/v1/models")
|
|
1001
1347
|
async def list_models(
|
|
1002
1348
|
provider: Optional[str] = Query(
|
|
1003
1349
|
None,
|
|
1004
1350
|
description="Filter by provider (e.g., 'ollama', 'openai', 'anthropic', 'lmstudio')",
|
|
1005
|
-
example=""
|
|
1006
1351
|
),
|
|
1007
1352
|
input_type: Optional[ModelInputCapability] = Query(
|
|
1008
1353
|
None,
|
|
@@ -1316,6 +1661,16 @@ async def create_response(
|
|
|
1316
1661
|
detail={"error": {"message": "Request must contain either 'input' (OpenAI format) or 'messages' (legacy format)", "type": "invalid_request"}}
|
|
1317
1662
|
)
|
|
1318
1663
|
|
|
1664
|
+
# AbstractCore extension: allow opt-in unload-after-request even for OpenAI Responses format.
|
|
1665
|
+
if "unload_after" in request_data:
|
|
1666
|
+
try:
|
|
1667
|
+
chat_request = chat_request.model_copy(update={"unload_after": _parse_boolish(request_data.get("unload_after"))})
|
|
1668
|
+
except Exception as e:
|
|
1669
|
+
raise HTTPException(
|
|
1670
|
+
status_code=422,
|
|
1671
|
+
detail={"error": {"message": f"Invalid unload_after value: {e}", "type": "validation_error"}},
|
|
1672
|
+
)
|
|
1673
|
+
|
|
1319
1674
|
# Respect user's streaming preference (defaults to False)
|
|
1320
1675
|
|
|
1321
1676
|
# Process using our standard pipeline
|
|
@@ -2023,11 +2378,16 @@ async def process_chat_completion(
|
|
|
2023
2378
|
|
|
2024
2379
|
# Detect target format for tool call syntax
|
|
2025
2380
|
target_format = detect_target_format(f"{provider}/{model}", request, http_request)
|
|
2381
|
+
user_agent_raw = http_request.headers.get("user-agent", "")
|
|
2382
|
+
user_agent = str(user_agent_raw or "")
|
|
2383
|
+
if len(user_agent) > 50:
|
|
2384
|
+
#[WARNING:TRUNCATION] bounded user-agent capture for request logs
|
|
2385
|
+
user_agent = user_agent[:50].rstrip() + "…"
|
|
2026
2386
|
logger.info(
|
|
2027
2387
|
"🎯 Target Format Detected",
|
|
2028
2388
|
request_id=request_id,
|
|
2029
2389
|
target_format=target_format.value,
|
|
2030
|
-
user_agent=
|
|
2390
|
+
user_agent=user_agent,
|
|
2031
2391
|
)
|
|
2032
2392
|
|
|
2033
2393
|
# Process media from messages
|
|
@@ -2052,11 +2412,14 @@ async def process_chat_completion(
|
|
|
2052
2412
|
# Validate media files if any were found
|
|
2053
2413
|
if all_media_files:
|
|
2054
2414
|
validate_media_files(all_media_files)
|
|
2415
|
+
#[WARNING:TRUNCATION] bounded filename preview for request logs
|
|
2416
|
+
files_preview = [os.path.basename(f) for f in all_media_files[:5]]
|
|
2055
2417
|
logger.info(
|
|
2056
2418
|
"📎 Media Files Processed",
|
|
2057
2419
|
request_id=request_id,
|
|
2058
2420
|
file_count=len(all_media_files),
|
|
2059
|
-
files=
|
|
2421
|
+
files=files_preview,
|
|
2422
|
+
files_truncated=len(all_media_files) > 5,
|
|
2060
2423
|
)
|
|
2061
2424
|
|
|
2062
2425
|
# Create LLM instance
|
|
@@ -2067,6 +2430,13 @@ async def process_chat_completion(
|
|
|
2067
2430
|
# Enable trace capture (trace_id) without retaining full trace buffers by default.
|
|
2068
2431
|
provider_kwargs["enable_tracing"] = True
|
|
2069
2432
|
provider_kwargs.setdefault("max_traces", 0)
|
|
2433
|
+
if request.api_key:
|
|
2434
|
+
provider_kwargs["api_key"] = request.api_key
|
|
2435
|
+
logger.debug(
|
|
2436
|
+
"🔑 Custom API Key Provided",
|
|
2437
|
+
request_id=request_id,
|
|
2438
|
+
provider=provider
|
|
2439
|
+
)
|
|
2070
2440
|
if request.base_url:
|
|
2071
2441
|
provider_kwargs["base_url"] = request.base_url
|
|
2072
2442
|
logger.info(
|
|
@@ -2079,7 +2449,28 @@ async def process_chat_completion(
|
|
|
2079
2449
|
# Note: BaseProvider treats non-positive values as "unlimited".
|
|
2080
2450
|
provider_kwargs["timeout"] = request.timeout_s
|
|
2081
2451
|
|
|
2452
|
+
provider_normalized = provider.strip().lower()
|
|
2453
|
+
unload_after_requested = bool(getattr(request, "unload_after", False))
|
|
2454
|
+
allow_unsafe_unload_after = _parse_bool_env("ABSTRACTCORE_ALLOW_UNSAFE_UNLOAD_AFTER")
|
|
2455
|
+
if unload_after_requested and provider_normalized == "ollama" and not allow_unsafe_unload_after:
|
|
2456
|
+
raise HTTPException(
|
|
2457
|
+
status_code=403,
|
|
2458
|
+
detail={
|
|
2459
|
+
"error": {
|
|
2460
|
+
"message": (
|
|
2461
|
+
"unload_after=true is disabled for provider 'ollama' because it can unload shared server "
|
|
2462
|
+
"state and disrupt other clients. Set ABSTRACTCORE_ALLOW_UNSAFE_UNLOAD_AFTER=1 to enable."
|
|
2463
|
+
),
|
|
2464
|
+
"type": "forbidden",
|
|
2465
|
+
}
|
|
2466
|
+
},
|
|
2467
|
+
)
|
|
2468
|
+
|
|
2082
2469
|
llm = create_llm(provider, model=model, **provider_kwargs)
|
|
2470
|
+
ollama_key: Optional[Tuple[str, str, str]] = None
|
|
2471
|
+
if provider_normalized == "ollama":
|
|
2472
|
+
ollama_key = _ollama_inflight_key(provider, request.base_url, model)
|
|
2473
|
+
_ollama_inflight_enter(ollama_key)
|
|
2083
2474
|
|
|
2084
2475
|
# Convert messages
|
|
2085
2476
|
messages = convert_to_abstractcore_messages(processed_messages)
|
|
@@ -2103,6 +2494,8 @@ async def process_chat_completion(
|
|
|
2103
2494
|
gen_kwargs["trace_metadata"] = trace_metadata
|
|
2104
2495
|
|
|
2105
2496
|
# Add optional parameters
|
|
2497
|
+
if request.thinking is not None:
|
|
2498
|
+
gen_kwargs["thinking"] = request.thinking
|
|
2106
2499
|
if request.stop:
|
|
2107
2500
|
gen_kwargs["stop"] = request.stop
|
|
2108
2501
|
if request.seed:
|
|
@@ -2111,6 +2504,8 @@ async def process_chat_completion(
|
|
|
2111
2504
|
gen_kwargs["frequency_penalty"] = request.frequency_penalty
|
|
2112
2505
|
if request.presence_penalty:
|
|
2113
2506
|
gen_kwargs["presence_penalty"] = request.presence_penalty
|
|
2507
|
+
if isinstance(request.prompt_cache_key, str) and request.prompt_cache_key.strip():
|
|
2508
|
+
gen_kwargs["prompt_cache_key"] = request.prompt_cache_key.strip()
|
|
2114
2509
|
|
|
2115
2510
|
# Generate response
|
|
2116
2511
|
# Only cleanup files created by this request (with our specific prefixes)
|
|
@@ -2128,7 +2523,16 @@ async def process_chat_completion(
|
|
|
2128
2523
|
if request.stream:
|
|
2129
2524
|
return StreamingResponse(
|
|
2130
2525
|
generate_streaming_response(
|
|
2131
|
-
llm,
|
|
2526
|
+
llm,
|
|
2527
|
+
gen_kwargs,
|
|
2528
|
+
provider,
|
|
2529
|
+
model,
|
|
2530
|
+
syntax_rewriter,
|
|
2531
|
+
request_id,
|
|
2532
|
+
temp_files_to_cleanup,
|
|
2533
|
+
unload_after=unload_after_requested,
|
|
2534
|
+
ollama_key=ollama_key,
|
|
2535
|
+
allow_unsafe_unload_after=allow_unsafe_unload_after,
|
|
2132
2536
|
),
|
|
2133
2537
|
media_type="text/event-stream",
|
|
2134
2538
|
headers={"Cache-Control": "no-cache", "Connection": "keep-alive"}
|
|
@@ -2148,9 +2552,22 @@ async def process_chat_completion(
|
|
|
2148
2552
|
)
|
|
2149
2553
|
return openai_response
|
|
2150
2554
|
finally:
|
|
2151
|
-
|
|
2152
|
-
|
|
2555
|
+
if not request.stream:
|
|
2556
|
+
if provider_normalized == "ollama" and ollama_key is not None:
|
|
2557
|
+
should_unload = _ollama_inflight_exit(ollama_key, unload_after_requested=unload_after_requested)
|
|
2558
|
+
if should_unload and allow_unsafe_unload_after:
|
|
2559
|
+
_best_effort_unload(llm, request_id=request_id, provider=provider, model=model)
|
|
2560
|
+
elif should_unload:
|
|
2561
|
+
logger.warning(
|
|
2562
|
+
"⚠️ Unload requested but disabled by server policy",
|
|
2563
|
+
request_id=request_id,
|
|
2564
|
+
provider=provider,
|
|
2565
|
+
model=model,
|
|
2566
|
+
)
|
|
2567
|
+
elif unload_after_requested:
|
|
2568
|
+
_best_effort_unload(llm, request_id=request_id, provider=provider, model=model)
|
|
2153
2569
|
|
|
2570
|
+
# Cleanup temporary files (base64 and downloaded images) with delay to avoid race conditions
|
|
2154
2571
|
def delayed_cleanup():
|
|
2155
2572
|
"""Cleanup temporary files after a short delay to avoid race conditions"""
|
|
2156
2573
|
time.sleep(1) # Short delay to ensure generation is complete
|
|
@@ -2170,6 +2587,8 @@ async def process_chat_completion(
|
|
|
2170
2587
|
cleanup_thread = threading.Thread(target=delayed_cleanup, daemon=True)
|
|
2171
2588
|
cleanup_thread.start()
|
|
2172
2589
|
|
|
2590
|
+
except HTTPException:
|
|
2591
|
+
raise
|
|
2173
2592
|
except Exception as e:
|
|
2174
2593
|
logger.error(
|
|
2175
2594
|
"❌ Chat completion failed",
|
|
@@ -2189,9 +2608,14 @@ def generate_streaming_response(
|
|
|
2189
2608
|
model: str,
|
|
2190
2609
|
syntax_rewriter: ToolCallSyntaxRewriter,
|
|
2191
2610
|
request_id: str,
|
|
2192
|
-
temp_files_to_cleanup: List[str] = None
|
|
2611
|
+
temp_files_to_cleanup: List[str] = None,
|
|
2612
|
+
*,
|
|
2613
|
+
unload_after: bool = False,
|
|
2614
|
+
ollama_key: Optional[Tuple[str, str, str]] = None,
|
|
2615
|
+
allow_unsafe_unload_after: bool = False,
|
|
2193
2616
|
) -> Iterator[str]:
|
|
2194
2617
|
"""Generate OpenAI-compatible streaming response with syntax rewriting."""
|
|
2618
|
+
provider_normalized = provider.strip().lower()
|
|
2195
2619
|
try:
|
|
2196
2620
|
chat_id = f"chatcmpl-{uuid.uuid4().hex[:8]}"
|
|
2197
2621
|
created_time = int(time.time())
|
|
@@ -2324,6 +2748,32 @@ def generate_streaming_response(
|
|
|
2324
2748
|
)
|
|
2325
2749
|
error_chunk = {"error": {"message": str(e), "type": "server_error"}}
|
|
2326
2750
|
yield f"data: {json.dumps(error_chunk)}\n\n"
|
|
2751
|
+
finally:
|
|
2752
|
+
if provider_normalized == "ollama" and ollama_key is not None:
|
|
2753
|
+
try:
|
|
2754
|
+
should_unload = _ollama_inflight_exit(ollama_key, unload_after_requested=unload_after)
|
|
2755
|
+
except Exception as e:
|
|
2756
|
+
logger.warning(
|
|
2757
|
+
"⚠️ Failed to update in-flight unload state",
|
|
2758
|
+
request_id=request_id,
|
|
2759
|
+
provider=provider,
|
|
2760
|
+
model=model,
|
|
2761
|
+
error=str(e),
|
|
2762
|
+
error_type=type(e).__name__,
|
|
2763
|
+
)
|
|
2764
|
+
should_unload = False
|
|
2765
|
+
|
|
2766
|
+
if should_unload and allow_unsafe_unload_after:
|
|
2767
|
+
_best_effort_unload(llm, request_id=request_id, provider=provider, model=model)
|
|
2768
|
+
elif should_unload:
|
|
2769
|
+
logger.warning(
|
|
2770
|
+
"⚠️ Unload requested but disabled by server policy",
|
|
2771
|
+
request_id=request_id,
|
|
2772
|
+
provider=provider,
|
|
2773
|
+
model=model,
|
|
2774
|
+
)
|
|
2775
|
+
elif unload_after:
|
|
2776
|
+
_best_effort_unload(llm, request_id=request_id, provider=provider, model=model)
|
|
2327
2777
|
|
|
2328
2778
|
def convert_to_openai_response(
|
|
2329
2779
|
response,
|
|
@@ -2407,7 +2857,7 @@ def convert_to_openai_response(
|
|
|
2407
2857
|
def run_server(host: str = "0.0.0.0", port: int = 8000):
|
|
2408
2858
|
"""Run the server"""
|
|
2409
2859
|
import uvicorn
|
|
2410
|
-
uvicorn.run(app, host=host, port=port)
|
|
2860
|
+
uvicorn.run(app, host=host, port=port, log_level="error")
|
|
2411
2861
|
|
|
2412
2862
|
# ============================================================================
|
|
2413
2863
|
# Server Runner Function
|
|
@@ -2476,7 +2926,7 @@ Debug Mode:
|
|
|
2476
2926
|
"app": app,
|
|
2477
2927
|
"host": args.host,
|
|
2478
2928
|
"port": args.port,
|
|
2479
|
-
"log_level": "debug" if debug_mode else "
|
|
2929
|
+
"log_level": "debug" if debug_mode else "error",
|
|
2480
2930
|
}
|
|
2481
2931
|
|
|
2482
2932
|
# In debug mode, enable more detailed uvicorn logging
|