abstractcore 2.9.1__py3-none-any.whl → 2.11.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/__init__.py +7 -27
- abstractcore/apps/extractor.py +33 -100
- abstractcore/apps/intent.py +19 -0
- abstractcore/apps/judge.py +20 -1
- abstractcore/apps/summarizer.py +20 -1
- abstractcore/architectures/detection.py +34 -1
- abstractcore/architectures/response_postprocessing.py +313 -0
- abstractcore/assets/architecture_formats.json +38 -8
- abstractcore/assets/model_capabilities.json +781 -160
- abstractcore/compression/__init__.py +1 -2
- abstractcore/compression/glyph_processor.py +6 -4
- abstractcore/config/main.py +31 -19
- abstractcore/config/manager.py +389 -11
- abstractcore/config/vision_config.py +5 -5
- abstractcore/core/interface.py +151 -3
- abstractcore/core/session.py +16 -10
- abstractcore/download.py +1 -1
- abstractcore/embeddings/manager.py +20 -6
- abstractcore/endpoint/__init__.py +2 -0
- abstractcore/endpoint/app.py +458 -0
- abstractcore/mcp/client.py +3 -1
- abstractcore/media/__init__.py +52 -17
- abstractcore/media/auto_handler.py +42 -22
- abstractcore/media/base.py +44 -1
- abstractcore/media/capabilities.py +12 -33
- abstractcore/media/enrichment.py +105 -0
- abstractcore/media/handlers/anthropic_handler.py +19 -28
- abstractcore/media/handlers/local_handler.py +124 -70
- abstractcore/media/handlers/openai_handler.py +19 -31
- abstractcore/media/processors/__init__.py +4 -2
- abstractcore/media/processors/audio_processor.py +57 -0
- abstractcore/media/processors/office_processor.py +8 -3
- abstractcore/media/processors/pdf_processor.py +46 -3
- abstractcore/media/processors/text_processor.py +22 -24
- abstractcore/media/processors/video_processor.py +58 -0
- abstractcore/media/types.py +97 -4
- abstractcore/media/utils/image_scaler.py +20 -2
- abstractcore/media/utils/video_frames.py +219 -0
- abstractcore/media/vision_fallback.py +136 -22
- abstractcore/processing/__init__.py +32 -3
- abstractcore/processing/basic_deepsearch.py +15 -10
- abstractcore/processing/basic_intent.py +3 -2
- abstractcore/processing/basic_judge.py +3 -2
- abstractcore/processing/basic_summarizer.py +1 -1
- abstractcore/providers/__init__.py +3 -1
- abstractcore/providers/anthropic_provider.py +95 -8
- abstractcore/providers/base.py +1516 -81
- abstractcore/providers/huggingface_provider.py +546 -69
- abstractcore/providers/lmstudio_provider.py +35 -923
- abstractcore/providers/mlx_provider.py +382 -35
- abstractcore/providers/model_capabilities.py +5 -1
- abstractcore/providers/ollama_provider.py +99 -15
- abstractcore/providers/openai_compatible_provider.py +406 -180
- abstractcore/providers/openai_provider.py +188 -44
- abstractcore/providers/openrouter_provider.py +76 -0
- abstractcore/providers/registry.py +61 -5
- abstractcore/providers/streaming.py +138 -33
- abstractcore/providers/vllm_provider.py +92 -817
- abstractcore/server/app.py +461 -13
- abstractcore/server/audio_endpoints.py +139 -0
- abstractcore/server/vision_endpoints.py +1319 -0
- abstractcore/structured/handler.py +316 -41
- abstractcore/tools/common_tools.py +5501 -2012
- abstractcore/tools/comms_tools.py +1641 -0
- abstractcore/tools/core.py +37 -7
- abstractcore/tools/handler.py +4 -9
- abstractcore/tools/parser.py +49 -2
- abstractcore/tools/tag_rewriter.py +2 -1
- abstractcore/tools/telegram_tdlib.py +407 -0
- abstractcore/tools/telegram_tools.py +261 -0
- abstractcore/utils/cli.py +1085 -72
- abstractcore/utils/token_utils.py +2 -0
- abstractcore/utils/truncation.py +29 -0
- abstractcore/utils/version.py +3 -4
- abstractcore/utils/vlm_token_calculator.py +12 -2
- abstractcore-2.11.2.dist-info/METADATA +562 -0
- abstractcore-2.11.2.dist-info/RECORD +133 -0
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/WHEEL +1 -1
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/entry_points.txt +1 -0
- abstractcore-2.9.1.dist-info/METADATA +0 -1190
- abstractcore-2.9.1.dist-info/RECORD +0 -119
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.9.1.dist-info → abstractcore-2.11.2.dist-info}/top_level.txt +0 -0
abstractcore/providers/base.py
CHANGED
|
@@ -9,8 +9,10 @@ import warnings
|
|
|
9
9
|
import json
|
|
10
10
|
import re
|
|
11
11
|
import socket
|
|
12
|
-
|
|
13
|
-
from
|
|
12
|
+
import hashlib
|
|
13
|
+
from collections import deque, OrderedDict
|
|
14
|
+
from dataclasses import dataclass, field
|
|
15
|
+
from typing import List, Dict, Any, Optional, Union, Iterator, AsyncIterator, Type, TYPE_CHECKING, Tuple
|
|
14
16
|
from abc import ABC, abstractmethod
|
|
15
17
|
|
|
16
18
|
try:
|
|
@@ -26,14 +28,20 @@ from ..events import EventType, Event
|
|
|
26
28
|
from datetime import datetime
|
|
27
29
|
from ..utils.structured_logging import get_logger
|
|
28
30
|
from ..utils.jsonish import loads_dict_like
|
|
31
|
+
from ..utils.truncation import preview_text
|
|
29
32
|
from ..exceptions import (
|
|
30
33
|
ProviderAPIError,
|
|
31
34
|
AuthenticationError,
|
|
32
35
|
RateLimitError,
|
|
33
36
|
InvalidRequestError,
|
|
37
|
+
UnsupportedFeatureError,
|
|
34
38
|
ModelNotFoundError
|
|
35
39
|
)
|
|
36
40
|
from ..architectures import detect_architecture, get_architecture_format, get_model_capabilities
|
|
41
|
+
from ..architectures.response_postprocessing import (
|
|
42
|
+
normalize_assistant_text,
|
|
43
|
+
strip_output_wrappers,
|
|
44
|
+
)
|
|
37
45
|
from ..tools import execute_tools
|
|
38
46
|
from ..core.retry import RetryManager, RetryConfig
|
|
39
47
|
|
|
@@ -42,6 +50,178 @@ if TYPE_CHECKING: # pragma: no cover
|
|
|
42
50
|
from ..media.types import MediaContent
|
|
43
51
|
|
|
44
52
|
|
|
53
|
+
@dataclass
|
|
54
|
+
class _PromptCacheEntry:
|
|
55
|
+
value: Any
|
|
56
|
+
created_at_s: float
|
|
57
|
+
last_accessed_at_s: float
|
|
58
|
+
ttl_s: Optional[float] = None
|
|
59
|
+
meta: Dict[str, Any] = field(default_factory=dict)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class PromptCacheStore:
|
|
63
|
+
"""Best-effort in-process prompt cache store (LRU + optional TTL).
|
|
64
|
+
|
|
65
|
+
Providers can store arbitrary backend-specific cache objects keyed by a caller-provided string
|
|
66
|
+
(`prompt_cache_key`). This is primarily useful for local inference backends (MLX, llama.cpp).
|
|
67
|
+
|
|
68
|
+
Notes:
|
|
69
|
+
- This store is intentionally simple and in-process only.
|
|
70
|
+
- Callers should treat prompt caches as potentially sensitive (they contain user prompt state).
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
def __init__(self, *, max_entries: int = 32, default_ttl_s: Optional[float] = None):
|
|
74
|
+
self._max_entries = int(max_entries) if max_entries and int(max_entries) > 0 else 32
|
|
75
|
+
self._default_ttl_s = default_ttl_s if default_ttl_s is None else float(default_ttl_s)
|
|
76
|
+
self._entries: "OrderedDict[str, _PromptCacheEntry]" = OrderedDict()
|
|
77
|
+
|
|
78
|
+
def _is_expired(self, entry: _PromptCacheEntry) -> bool:
|
|
79
|
+
ttl_s = entry.ttl_s if entry.ttl_s is not None else self._default_ttl_s
|
|
80
|
+
if ttl_s is None:
|
|
81
|
+
return False
|
|
82
|
+
return (time.time() - entry.last_accessed_at_s) > float(ttl_s)
|
|
83
|
+
|
|
84
|
+
def get(self, key: str) -> Optional[Any]:
|
|
85
|
+
if not isinstance(key, str) or not key.strip():
|
|
86
|
+
return None
|
|
87
|
+
key = key.strip()
|
|
88
|
+
entry = self._entries.get(key)
|
|
89
|
+
if entry is None:
|
|
90
|
+
return None
|
|
91
|
+
if self._is_expired(entry):
|
|
92
|
+
self.delete(key)
|
|
93
|
+
return None
|
|
94
|
+
entry.last_accessed_at_s = time.time()
|
|
95
|
+
self._entries.move_to_end(key)
|
|
96
|
+
return entry.value
|
|
97
|
+
|
|
98
|
+
def set(
|
|
99
|
+
self,
|
|
100
|
+
key: str,
|
|
101
|
+
value: Any,
|
|
102
|
+
*,
|
|
103
|
+
ttl_s: Optional[float] = None,
|
|
104
|
+
meta: Optional[Dict[str, Any]] = None,
|
|
105
|
+
) -> None:
|
|
106
|
+
if not isinstance(key, str) or not key.strip():
|
|
107
|
+
raise ValueError("prompt cache key must be a non-empty string")
|
|
108
|
+
key = key.strip()
|
|
109
|
+
now = time.time()
|
|
110
|
+
self._entries[key] = _PromptCacheEntry(
|
|
111
|
+
value=value,
|
|
112
|
+
created_at_s=now,
|
|
113
|
+
last_accessed_at_s=now,
|
|
114
|
+
ttl_s=ttl_s,
|
|
115
|
+
meta=dict(meta or {}),
|
|
116
|
+
)
|
|
117
|
+
self._entries.move_to_end(key)
|
|
118
|
+
while len(self._entries) > self._max_entries:
|
|
119
|
+
self._entries.popitem(last=False)
|
|
120
|
+
|
|
121
|
+
def delete(self, key: str) -> bool:
|
|
122
|
+
if not isinstance(key, str) or not key.strip():
|
|
123
|
+
return False
|
|
124
|
+
key = key.strip()
|
|
125
|
+
return self._entries.pop(key, None) is not None
|
|
126
|
+
|
|
127
|
+
def clear(self) -> None:
|
|
128
|
+
self._entries.clear()
|
|
129
|
+
|
|
130
|
+
def stats(self) -> Dict[str, Any]:
|
|
131
|
+
# Opportunistically purge expired entries.
|
|
132
|
+
expired = []
|
|
133
|
+
for k, v in self._entries.items():
|
|
134
|
+
if self._is_expired(v):
|
|
135
|
+
expired.append(k)
|
|
136
|
+
for k in expired:
|
|
137
|
+
self.delete(k)
|
|
138
|
+
|
|
139
|
+
return {
|
|
140
|
+
"entries": len(self._entries),
|
|
141
|
+
"max_entries": self._max_entries,
|
|
142
|
+
"default_ttl_s": self._default_ttl_s,
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
def keys(self) -> List[str]:
|
|
146
|
+
return list(self._entries.keys())
|
|
147
|
+
|
|
148
|
+
def meta(self, key: str) -> Optional[Dict[str, Any]]:
|
|
149
|
+
if not isinstance(key, str) or not key.strip():
|
|
150
|
+
return None
|
|
151
|
+
entry = self._entries.get(key.strip())
|
|
152
|
+
if entry is None:
|
|
153
|
+
return None
|
|
154
|
+
return dict(entry.meta or {})
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
@dataclass(frozen=True)
|
|
158
|
+
class PromptCacheModule:
|
|
159
|
+
"""A single cacheable module of prompt context.
|
|
160
|
+
|
|
161
|
+
This is intentionally generic and JSON-serializable so higher-level layers (runtime/agent/memory)
|
|
162
|
+
can express cache intent without hard-coding provider-specific prompt formats.
|
|
163
|
+
"""
|
|
164
|
+
|
|
165
|
+
module_id: str
|
|
166
|
+
system_prompt: Optional[str] = None
|
|
167
|
+
prompt: Optional[str] = None
|
|
168
|
+
messages: Optional[List[Dict[str, Any]]] = None
|
|
169
|
+
tools: Optional[List[Dict[str, Any]]] = None
|
|
170
|
+
add_generation_prompt: bool = False
|
|
171
|
+
scope: str = "private" # "private" | "shared" (advisory; enforcement is host-dependent)
|
|
172
|
+
meta: Dict[str, Any] = field(default_factory=dict)
|
|
173
|
+
|
|
174
|
+
def normalized(self) -> "PromptCacheModule":
|
|
175
|
+
module_id = str(self.module_id or "").strip()
|
|
176
|
+
system_prompt = str(self.system_prompt).strip() if isinstance(self.system_prompt, str) and self.system_prompt else None
|
|
177
|
+
prompt = str(self.prompt).strip() if isinstance(self.prompt, str) and self.prompt else None
|
|
178
|
+
messages = None
|
|
179
|
+
if isinstance(self.messages, list) and self.messages:
|
|
180
|
+
out: List[Dict[str, Any]] = []
|
|
181
|
+
for m in self.messages:
|
|
182
|
+
if isinstance(m, dict):
|
|
183
|
+
out.append(dict(m))
|
|
184
|
+
messages = out or None
|
|
185
|
+
tools = None
|
|
186
|
+
if isinstance(self.tools, list) and self.tools:
|
|
187
|
+
out_tools: List[Dict[str, Any]] = []
|
|
188
|
+
for t in self.tools:
|
|
189
|
+
if isinstance(t, dict):
|
|
190
|
+
out_tools.append(dict(t))
|
|
191
|
+
tools = out_tools or None
|
|
192
|
+
add_generation_prompt = bool(self.add_generation_prompt)
|
|
193
|
+
scope = str(self.scope or "private").strip().lower() or "private"
|
|
194
|
+
if scope not in {"private", "shared"}:
|
|
195
|
+
scope = "private"
|
|
196
|
+
meta = dict(self.meta or {})
|
|
197
|
+
return PromptCacheModule(
|
|
198
|
+
module_id=module_id,
|
|
199
|
+
system_prompt=system_prompt,
|
|
200
|
+
prompt=prompt,
|
|
201
|
+
messages=messages,
|
|
202
|
+
tools=tools,
|
|
203
|
+
add_generation_prompt=add_generation_prompt,
|
|
204
|
+
scope=scope,
|
|
205
|
+
meta=meta,
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
def fingerprint(self, *, version: int = 1) -> str:
|
|
209
|
+
"""Stable module fingerprint for hierarchical cache keys (hex sha256)."""
|
|
210
|
+
mod = self.normalized()
|
|
211
|
+
payload = {
|
|
212
|
+
"v": int(version),
|
|
213
|
+
"module_id": mod.module_id,
|
|
214
|
+
"system_prompt": mod.system_prompt,
|
|
215
|
+
"prompt": mod.prompt,
|
|
216
|
+
"messages": mod.messages,
|
|
217
|
+
"tools": mod.tools,
|
|
218
|
+
"add_generation_prompt": bool(mod.add_generation_prompt),
|
|
219
|
+
"scope": mod.scope,
|
|
220
|
+
}
|
|
221
|
+
raw = json.dumps(payload, sort_keys=True, ensure_ascii=False, separators=(",", ":"))
|
|
222
|
+
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
|
|
223
|
+
|
|
224
|
+
|
|
45
225
|
class BaseProvider(AbstractCoreInterface, ABC):
|
|
46
226
|
"""
|
|
47
227
|
Base provider class with integrated telemetry and events.
|
|
@@ -60,6 +240,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
60
240
|
self.architecture_config = get_architecture_format(self.architecture)
|
|
61
241
|
self.model_capabilities = get_model_capabilities(model)
|
|
62
242
|
|
|
243
|
+
# #[WARNING:TIMEOUT]
|
|
63
244
|
# Setup timeout configuration (centralized defaults).
|
|
64
245
|
#
|
|
65
246
|
# Semantics:
|
|
@@ -136,12 +317,33 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
136
317
|
self.enable_tracing = kwargs.get('enable_tracing', False)
|
|
137
318
|
self._traces = deque(maxlen=kwargs.get('max_traces', 100)) # Ring buffer for memory efficiency
|
|
138
319
|
|
|
320
|
+
# Prompt caching (best-effort; provider-specific behavior).
|
|
321
|
+
#
|
|
322
|
+
# - Remote providers (OpenAI): supports `prompt_cache_key` pass-through (server-managed caching).
|
|
323
|
+
# - Local runtimes (MLX / llama.cpp): can store KV/prefix caches in-process keyed by `prompt_cache_key`.
|
|
324
|
+
self._default_prompt_cache_key: Optional[str] = None
|
|
325
|
+
prompt_cache_max_entries = kwargs.get("prompt_cache_max_entries", kwargs.get("prompt_cache_max_items", 32))
|
|
326
|
+
prompt_cache_ttl_s = kwargs.get("prompt_cache_ttl_s", None)
|
|
327
|
+
self._prompt_cache_store = PromptCacheStore(
|
|
328
|
+
max_entries=int(prompt_cache_max_entries) if prompt_cache_max_entries is not None else 32,
|
|
329
|
+
default_ttl_s=prompt_cache_ttl_s,
|
|
330
|
+
)
|
|
331
|
+
|
|
139
332
|
# Provider created successfully - no event emission needed
|
|
140
333
|
# (The simplified event system focuses on generation and tool events only)
|
|
141
334
|
|
|
142
335
|
# Set default token limits if not provided
|
|
143
336
|
self._initialize_token_limits()
|
|
144
337
|
|
|
338
|
+
def __init_subclass__(cls, **kwargs): # pragma: no cover
|
|
339
|
+
super().__init_subclass__(**kwargs)
|
|
340
|
+
# Enforce a single unload path: providers must implement `unload_model()` and must not define `unload()`.
|
|
341
|
+
if "unload" in cls.__dict__:
|
|
342
|
+
raise TypeError(
|
|
343
|
+
f"{cls.__name__} defines unload(). "
|
|
344
|
+
"Providers must implement unload_model(model_name) and must not provide any other unload entrypoint."
|
|
345
|
+
)
|
|
346
|
+
|
|
145
347
|
def _track_generation(self, prompt: str, response: Optional[GenerateResponse],
|
|
146
348
|
start_time: float, success: bool = True,
|
|
147
349
|
error: Optional[Exception] = None, stream: bool = False):
|
|
@@ -174,7 +376,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
174
376
|
|
|
175
377
|
# Emit comprehensive event with all data in one dict
|
|
176
378
|
event_data = {
|
|
177
|
-
"prompt":
|
|
379
|
+
"prompt": preview_text(prompt, max_chars=100),
|
|
178
380
|
"success": success,
|
|
179
381
|
"error": str(error) if error else None,
|
|
180
382
|
"response_length": len(response.content) if response and response.content else 0,
|
|
@@ -222,7 +424,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
222
424
|
event_data = {
|
|
223
425
|
"tool_name": tool_name,
|
|
224
426
|
"arguments": arguments,
|
|
225
|
-
"result":
|
|
427
|
+
"result": preview_text(result, max_chars=100) if result else None,
|
|
226
428
|
"error": str(error) if error else None,
|
|
227
429
|
"success": success
|
|
228
430
|
}
|
|
@@ -268,9 +470,11 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
268
470
|
|
|
269
471
|
# Extract generation parameters
|
|
270
472
|
temperature = kwargs.get('temperature', self.temperature)
|
|
473
|
+
if temperature is None:
|
|
474
|
+
temperature = self.temperature
|
|
271
475
|
max_tokens = kwargs.get('max_tokens', self.max_tokens)
|
|
272
476
|
max_output_tokens = kwargs.get('max_output_tokens', self.max_output_tokens)
|
|
273
|
-
seed = kwargs.get('seed', self.seed)
|
|
477
|
+
seed = self._normalize_seed(kwargs.get('seed', self.seed))
|
|
274
478
|
top_p = kwargs.get('top_p', getattr(self, 'top_p', None))
|
|
275
479
|
top_k = kwargs.get('top_k', getattr(self, 'top_k', None))
|
|
276
480
|
|
|
@@ -393,7 +597,10 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
393
597
|
if _looks_like_timeout(error) and not _has_explicit_duration(msg):
|
|
394
598
|
t = _configured_timeout_s()
|
|
395
599
|
if t is not None:
|
|
396
|
-
return ProviderAPIError(
|
|
600
|
+
return ProviderAPIError(
|
|
601
|
+
f"{_provider_label()} API error: timed out after {t}s "
|
|
602
|
+
"(configured timeout; set timeout=None or default_timeout=0 for unlimited)"
|
|
603
|
+
)
|
|
397
604
|
return ProviderAPIError(f"{_provider_label()} API error: timed out")
|
|
398
605
|
return error
|
|
399
606
|
|
|
@@ -404,7 +611,10 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
404
611
|
if _looks_like_timeout(error):
|
|
405
612
|
t = _configured_timeout_s()
|
|
406
613
|
if t is not None:
|
|
407
|
-
return ProviderAPIError(
|
|
614
|
+
return ProviderAPIError(
|
|
615
|
+
f"{_provider_label()} API error: timed out after {t}s "
|
|
616
|
+
"(configured timeout; set timeout=None or default_timeout=0 for unlimited)"
|
|
617
|
+
)
|
|
408
618
|
return ProviderAPIError(f"{_provider_label()} API error: timed out")
|
|
409
619
|
|
|
410
620
|
error_str = str(error).lower()
|
|
@@ -418,6 +628,233 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
418
628
|
else:
|
|
419
629
|
return ProviderAPIError(f"API error: {error}")
|
|
420
630
|
|
|
631
|
+
@staticmethod
|
|
632
|
+
def _normalize_thinking_request(thinking: Optional[Union[bool, str]]) -> Tuple[Optional[bool], Optional[str]]:
|
|
633
|
+
"""Normalize `thinking=` into (enabled, level).
|
|
634
|
+
|
|
635
|
+
- enabled: True/False/None (None == "auto")
|
|
636
|
+
- level: Optional[str] in {"low","medium","high"} when requested
|
|
637
|
+
"""
|
|
638
|
+
if thinking is None:
|
|
639
|
+
return None, None
|
|
640
|
+
|
|
641
|
+
if isinstance(thinking, bool):
|
|
642
|
+
return thinking, None
|
|
643
|
+
|
|
644
|
+
if isinstance(thinking, str):
|
|
645
|
+
s = thinking.strip().lower()
|
|
646
|
+
if not s or s == "auto":
|
|
647
|
+
return None, None
|
|
648
|
+
if s in {"on", "true", "yes"}:
|
|
649
|
+
return True, None
|
|
650
|
+
if s in {"off", "false", "no"}:
|
|
651
|
+
return False, None
|
|
652
|
+
if s in {"low", "medium", "high"}:
|
|
653
|
+
return True, s
|
|
654
|
+
|
|
655
|
+
raise ValueError('thinking must be one of: None, bool, "auto", "on", "off", "low", "medium", "high"')
|
|
656
|
+
|
|
657
|
+
def _model_reasoning_levels(self) -> List[str]:
|
|
658
|
+
levels = None
|
|
659
|
+
for src in (self.model_capabilities, self.architecture_config):
|
|
660
|
+
if not isinstance(src, dict):
|
|
661
|
+
continue
|
|
662
|
+
value = src.get("reasoning_levels")
|
|
663
|
+
if isinstance(value, list) and value:
|
|
664
|
+
levels = value
|
|
665
|
+
break
|
|
666
|
+
if not isinstance(levels, list):
|
|
667
|
+
return []
|
|
668
|
+
out: List[str] = []
|
|
669
|
+
for x in levels:
|
|
670
|
+
if isinstance(x, str) and x.strip():
|
|
671
|
+
out.append(x.strip().lower())
|
|
672
|
+
# Deduplicate while preserving order.
|
|
673
|
+
seen: set[str] = set()
|
|
674
|
+
uniq: List[str] = []
|
|
675
|
+
for x in out:
|
|
676
|
+
if x in seen:
|
|
677
|
+
continue
|
|
678
|
+
seen.add(x)
|
|
679
|
+
uniq.append(x)
|
|
680
|
+
return uniq
|
|
681
|
+
|
|
682
|
+
def _model_supports_thinking_control(self) -> bool:
|
|
683
|
+
caps = self.model_capabilities if isinstance(self.model_capabilities, dict) else {}
|
|
684
|
+
arch = self.architecture_config if isinstance(self.architecture_config, dict) else {}
|
|
685
|
+
|
|
686
|
+
if caps.get("thinking_support") is True:
|
|
687
|
+
return True
|
|
688
|
+
if isinstance(caps.get("thinking_tags"), (list, tuple)) and len(caps.get("thinking_tags")) == 2:
|
|
689
|
+
return True
|
|
690
|
+
if isinstance(caps.get("thinking_output_field"), str) and caps.get("thinking_output_field").strip():
|
|
691
|
+
return True
|
|
692
|
+
if self._model_reasoning_levels():
|
|
693
|
+
return True
|
|
694
|
+
|
|
695
|
+
if isinstance(arch.get("thinking_tags"), (list, tuple)) and len(arch.get("thinking_tags")) == 2:
|
|
696
|
+
return True
|
|
697
|
+
if isinstance(arch.get("thinking_control"), str) and arch.get("thinking_control").strip():
|
|
698
|
+
return True
|
|
699
|
+
if arch.get("reasoning_support") is True:
|
|
700
|
+
return True
|
|
701
|
+
if isinstance(arch.get("reasoning_levels"), list) and arch.get("reasoning_levels"):
|
|
702
|
+
return True
|
|
703
|
+
|
|
704
|
+
return False
|
|
705
|
+
|
|
706
|
+
def _apply_thinking_request(
|
|
707
|
+
self,
|
|
708
|
+
*,
|
|
709
|
+
thinking: Optional[Union[bool, str]],
|
|
710
|
+
prompt: str,
|
|
711
|
+
messages: Optional[List[Dict[str, str]]],
|
|
712
|
+
system_prompt: Optional[str],
|
|
713
|
+
kwargs: Dict[str, Any],
|
|
714
|
+
) -> Tuple[str, Optional[List[Dict[str, str]]], Optional[str], Dict[str, Any]]:
|
|
715
|
+
"""Apply unified thinking controls to the request."""
|
|
716
|
+
enabled, level = self._normalize_thinking_request(thinking)
|
|
717
|
+
if enabled is None and level is None:
|
|
718
|
+
return prompt, messages, system_prompt, kwargs
|
|
719
|
+
|
|
720
|
+
supports_control = self._model_supports_thinking_control()
|
|
721
|
+
reasoning_levels = self._model_reasoning_levels()
|
|
722
|
+
|
|
723
|
+
if level is not None and reasoning_levels and level not in reasoning_levels:
|
|
724
|
+
warnings.warn(
|
|
725
|
+
f"thinking level '{level}' requested but not supported for model '{self.model}' "
|
|
726
|
+
f"(supported: {reasoning_levels}); falling back to thinking='on'.",
|
|
727
|
+
RuntimeWarning,
|
|
728
|
+
stacklevel=3,
|
|
729
|
+
)
|
|
730
|
+
level = None
|
|
731
|
+
enabled = True
|
|
732
|
+
|
|
733
|
+
if level is not None and not reasoning_levels:
|
|
734
|
+
warnings.warn(
|
|
735
|
+
f"thinking level '{level}' requested but model '{self.model}' has no configured reasoning_levels; "
|
|
736
|
+
"falling back to thinking='on'.",
|
|
737
|
+
RuntimeWarning,
|
|
738
|
+
stacklevel=3,
|
|
739
|
+
)
|
|
740
|
+
level = None
|
|
741
|
+
enabled = True
|
|
742
|
+
|
|
743
|
+
handled_by_model_prompt = False
|
|
744
|
+
|
|
745
|
+
# Harmony (GPT-OSS): control via system message `Reasoning: low|medium|high`.
|
|
746
|
+
msg_fmt = str((self.architecture_config or {}).get("message_format") or "").strip().lower()
|
|
747
|
+
resp_fmt = str((self.model_capabilities or {}).get("response_format") or "").strip().lower()
|
|
748
|
+
is_harmony = msg_fmt == "harmony" or resp_fmt == "harmony"
|
|
749
|
+
if is_harmony:
|
|
750
|
+
target_level: Optional[str] = None
|
|
751
|
+
if level is not None:
|
|
752
|
+
target_level = level
|
|
753
|
+
elif enabled is False:
|
|
754
|
+
warnings.warn(
|
|
755
|
+
f"thinking='off' requested for Harmony model '{self.model}', but GPT-OSS reasoning traces "
|
|
756
|
+
"cannot be fully disabled; using Reasoning: low.",
|
|
757
|
+
RuntimeWarning,
|
|
758
|
+
stacklevel=3,
|
|
759
|
+
)
|
|
760
|
+
target_level = "low"
|
|
761
|
+
elif enabled is True:
|
|
762
|
+
# Make the default explicit when the caller opts-in.
|
|
763
|
+
target_level = "medium"
|
|
764
|
+
|
|
765
|
+
if target_level:
|
|
766
|
+
line = f"Reasoning: {target_level}"
|
|
767
|
+
if isinstance(system_prompt, str) and system_prompt.strip():
|
|
768
|
+
# Replace any existing Reasoning line; otherwise prepend.
|
|
769
|
+
if re.search(r"(?mi)^\\s*Reasoning\\s*:\\s*(low|medium|high)\\s*$", system_prompt):
|
|
770
|
+
system_prompt = re.sub(
|
|
771
|
+
r"(?mi)^\\s*Reasoning\\s*:\\s*(low|medium|high)\\s*$",
|
|
772
|
+
line,
|
|
773
|
+
system_prompt,
|
|
774
|
+
count=1,
|
|
775
|
+
)
|
|
776
|
+
else:
|
|
777
|
+
system_prompt = f"{line}\n{system_prompt}"
|
|
778
|
+
else:
|
|
779
|
+
system_prompt = line
|
|
780
|
+
handled_by_model_prompt = True
|
|
781
|
+
|
|
782
|
+
# Model-level control token for disabling thinking (e.g., GLM `/nothink`).
|
|
783
|
+
thinking_control = None
|
|
784
|
+
for src in (self.model_capabilities, self.architecture_config):
|
|
785
|
+
if not isinstance(src, dict):
|
|
786
|
+
continue
|
|
787
|
+
token = src.get("thinking_control")
|
|
788
|
+
if isinstance(token, str) and token.strip():
|
|
789
|
+
thinking_control = token.strip()
|
|
790
|
+
|
|
791
|
+
if enabled is False and thinking_control:
|
|
792
|
+
handled_by_model_prompt = True
|
|
793
|
+
|
|
794
|
+
def _append_control(text: str) -> str:
|
|
795
|
+
if thinking_control in text:
|
|
796
|
+
return text
|
|
797
|
+
return f"{text.rstrip()}\n{thinking_control}".strip()
|
|
798
|
+
|
|
799
|
+
if isinstance(prompt, str) and prompt.strip():
|
|
800
|
+
prompt = _append_control(prompt)
|
|
801
|
+
elif isinstance(messages, list) and messages:
|
|
802
|
+
# Append to the most recent user turn, if possible.
|
|
803
|
+
new_messages: List[Dict[str, str]] = []
|
|
804
|
+
appended = False
|
|
805
|
+
for m in messages:
|
|
806
|
+
if not isinstance(m, dict):
|
|
807
|
+
continue
|
|
808
|
+
new_messages.append(dict(m))
|
|
809
|
+
for m in reversed(new_messages):
|
|
810
|
+
if m.get("role") == "user" and isinstance(m.get("content"), str) and m["content"].strip():
|
|
811
|
+
m["content"] = _append_control(m["content"])
|
|
812
|
+
appended = True
|
|
813
|
+
break
|
|
814
|
+
messages = new_messages
|
|
815
|
+
if not appended:
|
|
816
|
+
warnings.warn(
|
|
817
|
+
f"thinking='off' requested for model '{self.model}', but no user prompt was available "
|
|
818
|
+
f"to append thinking_control='{thinking_control}'.",
|
|
819
|
+
RuntimeWarning,
|
|
820
|
+
stacklevel=3,
|
|
821
|
+
)
|
|
822
|
+
|
|
823
|
+
kwargs, handled_by_provider = self._apply_provider_thinking_kwargs(
|
|
824
|
+
enabled=enabled,
|
|
825
|
+
level=level,
|
|
826
|
+
kwargs=kwargs,
|
|
827
|
+
)
|
|
828
|
+
|
|
829
|
+
if not supports_control and thinking is not None:
|
|
830
|
+
warnings.warn(
|
|
831
|
+
f"thinking={thinking!r} requested but model '{self.model}' is not marked as thinking-capable "
|
|
832
|
+
"in model_capabilities.json; the request may be ignored.",
|
|
833
|
+
RuntimeWarning,
|
|
834
|
+
stacklevel=3,
|
|
835
|
+
)
|
|
836
|
+
|
|
837
|
+
if not handled_by_model_prompt and not handled_by_provider and (enabled is False or level is not None):
|
|
838
|
+
warnings.warn(
|
|
839
|
+
f"thinking={thinking!r} requested but provider '{self.provider or self.__class__.__name__}' "
|
|
840
|
+
"does not implement a thinking control mapping for this model; the request may be ignored.",
|
|
841
|
+
RuntimeWarning,
|
|
842
|
+
stacklevel=3,
|
|
843
|
+
)
|
|
844
|
+
|
|
845
|
+
return prompt, messages, system_prompt, kwargs
|
|
846
|
+
|
|
847
|
+
def _apply_provider_thinking_kwargs(
|
|
848
|
+
self,
|
|
849
|
+
*,
|
|
850
|
+
enabled: Optional[bool],
|
|
851
|
+
level: Optional[str],
|
|
852
|
+
kwargs: Dict[str, Any],
|
|
853
|
+
) -> Tuple[Dict[str, Any], bool]:
|
|
854
|
+
"""Provider-specific thinking knob hook (default: unsupported)."""
|
|
855
|
+
_ = (enabled, level)
|
|
856
|
+
return kwargs, False
|
|
857
|
+
|
|
421
858
|
def generate_with_telemetry(self,
|
|
422
859
|
prompt: str,
|
|
423
860
|
messages: Optional[List[Dict[str, str]]] = None,
|
|
@@ -430,6 +867,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
430
867
|
tool_call_tags: Optional[str] = None, # Tool call tag rewriting
|
|
431
868
|
execute_tools: Optional[bool] = None, # Tool execution control
|
|
432
869
|
glyph_compression: Optional[str] = None, # Glyph compression preference
|
|
870
|
+
thinking: Optional[Union[bool, str]] = None, # Unified reasoning/thinking control
|
|
433
871
|
**kwargs) -> Union[GenerateResponse, Iterator[GenerateResponse], BaseModel]:
|
|
434
872
|
"""
|
|
435
873
|
Generate with integrated telemetry and error handling.
|
|
@@ -447,6 +885,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
447
885
|
tool_call_tags: Optional tool call tag format for rewriting
|
|
448
886
|
execute_tools: Whether to execute tools automatically (True) or let agent handle execution (False)
|
|
449
887
|
glyph_compression: Glyph compression preference ("auto", "always", "never")
|
|
888
|
+
thinking: Unified reasoning/thinking control (auto/on/off or low/medium/high when supported)
|
|
450
889
|
"""
|
|
451
890
|
# Normalize token limit naming at the provider boundary.
|
|
452
891
|
#
|
|
@@ -458,6 +897,18 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
458
897
|
if "max_output_tokens" not in kwargs and "max_tokens" in kwargs and kwargs.get("max_tokens") is not None:
|
|
459
898
|
kwargs["max_output_tokens"] = kwargs.pop("max_tokens")
|
|
460
899
|
|
|
900
|
+
# Prompt caching: apply a default `prompt_cache_key` if configured.
|
|
901
|
+
self._apply_default_prompt_cache_key(kwargs)
|
|
902
|
+
|
|
903
|
+
# Apply unified thinking controls (provider-agnostic + provider-specific mappings).
|
|
904
|
+
prompt, messages, system_prompt, kwargs = self._apply_thinking_request(
|
|
905
|
+
thinking=thinking,
|
|
906
|
+
prompt=prompt,
|
|
907
|
+
messages=messages,
|
|
908
|
+
system_prompt=system_prompt,
|
|
909
|
+
kwargs=kwargs,
|
|
910
|
+
)
|
|
911
|
+
|
|
461
912
|
# Handle structured output request
|
|
462
913
|
if response_model is not None:
|
|
463
914
|
if not PYDANTIC_AVAILABLE:
|
|
@@ -466,8 +917,12 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
466
917
|
"Install with: pip install pydantic>=2.0.0"
|
|
467
918
|
)
|
|
468
919
|
|
|
469
|
-
# Handle hybrid case: tools + structured output
|
|
470
|
-
|
|
920
|
+
# Handle hybrid case: tools + structured output.
|
|
921
|
+
#
|
|
922
|
+
# NOTE: `tools=[]` should behave like "no tools". Treating an empty list as
|
|
923
|
+
# "tools present" triggers the hybrid 2-pass flow (unstructured call + structured
|
|
924
|
+
# follow-up) which is both slower and can cause provider-side timeouts/unloads.
|
|
925
|
+
if isinstance(tools, list) and len(tools) > 0:
|
|
471
926
|
return self._handle_tools_with_structured_output(
|
|
472
927
|
prompt=prompt,
|
|
473
928
|
messages=messages,
|
|
@@ -500,6 +955,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
500
955
|
# Process media content if provided
|
|
501
956
|
processed_media = None
|
|
502
957
|
media_metadata = None
|
|
958
|
+
media_enrichment = None
|
|
503
959
|
if media:
|
|
504
960
|
compression_pref = glyph_compression or kwargs.get('glyph_compression', 'auto')
|
|
505
961
|
processed_media = self._process_media_content(media, compression_pref)
|
|
@@ -511,6 +967,639 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
511
967
|
if hasattr(media_content, 'metadata') and media_content.metadata:
|
|
512
968
|
media_metadata.append(media_content.metadata)
|
|
513
969
|
|
|
970
|
+
# Audio input policy (v0): avoid placeholder degradation and require explicit fallbacks.
|
|
971
|
+
if processed_media:
|
|
972
|
+
try:
|
|
973
|
+
from ..media.types import ContentFormat, MediaType
|
|
974
|
+
from ..media.enrichment import build_enrichment_item
|
|
975
|
+
from ..capabilities.errors import CapabilityUnavailableError
|
|
976
|
+
except Exception:
|
|
977
|
+
ContentFormat = None # type: ignore[assignment]
|
|
978
|
+
MediaType = None # type: ignore[assignment]
|
|
979
|
+
build_enrichment_item = None # type: ignore[assignment]
|
|
980
|
+
CapabilityUnavailableError = Exception # type: ignore[assignment]
|
|
981
|
+
|
|
982
|
+
if MediaType is not None:
|
|
983
|
+
audio_items = [mc for mc in processed_media if getattr(mc, "media_type", None) == MediaType.AUDIO]
|
|
984
|
+
else:
|
|
985
|
+
audio_items = []
|
|
986
|
+
|
|
987
|
+
if audio_items:
|
|
988
|
+
# Resolve policy: per-call kwarg > config default.
|
|
989
|
+
policy_raw = kwargs.pop("audio_policy", None)
|
|
990
|
+
if policy_raw is None:
|
|
991
|
+
policy_raw = kwargs.pop("audio_handling_policy", None)
|
|
992
|
+
if policy_raw is None:
|
|
993
|
+
try:
|
|
994
|
+
from ..config.manager import get_config_manager
|
|
995
|
+
|
|
996
|
+
policy_raw = getattr(get_config_manager().config, "audio", None).strategy # type: ignore[union-attr]
|
|
997
|
+
except Exception:
|
|
998
|
+
policy_raw = "native_only"
|
|
999
|
+
|
|
1000
|
+
policy = str(policy_raw or "native_only").strip().lower()
|
|
1001
|
+
model_supports_audio = bool(getattr(self, "model_capabilities", {}).get("audio_support", False))
|
|
1002
|
+
|
|
1003
|
+
if policy in ("native_only", "native", "disabled"):
|
|
1004
|
+
if not model_supports_audio:
|
|
1005
|
+
raise UnsupportedFeatureError(
|
|
1006
|
+
f"Audio input is not supported by model '{self.model}'. "
|
|
1007
|
+
"Choose an audio-capable model, or pass audio_policy='speech_to_text' "
|
|
1008
|
+
"(requires an STT capability plugin, e.g. install abstractvoice)."
|
|
1009
|
+
)
|
|
1010
|
+
# Keep audio media for provider-native handling (provider support may still vary).
|
|
1011
|
+
|
|
1012
|
+
elif policy in ("speech_to_text", "stt"):
|
|
1013
|
+
stt_language = kwargs.pop("audio_language", None)
|
|
1014
|
+
if stt_language is None:
|
|
1015
|
+
stt_language = kwargs.pop("stt_language", None)
|
|
1016
|
+
if stt_language is None:
|
|
1017
|
+
try:
|
|
1018
|
+
from ..config.manager import get_config_manager
|
|
1019
|
+
|
|
1020
|
+
stt_language = getattr(get_config_manager().config, "audio", None).stt_language # type: ignore[union-attr]
|
|
1021
|
+
except Exception:
|
|
1022
|
+
stt_language = None
|
|
1023
|
+
|
|
1024
|
+
audio_context_parts: List[str] = []
|
|
1025
|
+
enrichments: List[Dict[str, Any]] = []
|
|
1026
|
+
|
|
1027
|
+
# Resolve backend id (best-effort) for transparency metadata.
|
|
1028
|
+
backend_id = getattr(getattr(self, "audio", None), "backend_id", None)
|
|
1029
|
+
backend = {"kind": "plugin"}
|
|
1030
|
+
if isinstance(backend_id, str) and backend_id.strip():
|
|
1031
|
+
backend["backend_id"] = backend_id.strip()
|
|
1032
|
+
|
|
1033
|
+
for idx, mc in enumerate(audio_items):
|
|
1034
|
+
name = None
|
|
1035
|
+
try:
|
|
1036
|
+
name = mc.metadata.get("file_name") if hasattr(mc, "metadata") and isinstance(mc.metadata, dict) else None
|
|
1037
|
+
except Exception:
|
|
1038
|
+
name = None
|
|
1039
|
+
if not isinstance(name, str) or not name.strip():
|
|
1040
|
+
name = mc.file_path if getattr(mc, "file_path", None) else f"audio_{idx+1}"
|
|
1041
|
+
|
|
1042
|
+
# Prefer a file path when available.
|
|
1043
|
+
audio_input: Any = None
|
|
1044
|
+
try:
|
|
1045
|
+
if getattr(mc, "file_path", None):
|
|
1046
|
+
audio_input = str(mc.file_path)
|
|
1047
|
+
elif getattr(mc, "content_format", None) == ContentFormat.FILE_PATH and isinstance(getattr(mc, "content", None), str):
|
|
1048
|
+
audio_input = str(mc.content)
|
|
1049
|
+
elif isinstance(getattr(mc, "content", None), (bytes, bytearray)):
|
|
1050
|
+
audio_input = bytes(mc.content)
|
|
1051
|
+
except Exception:
|
|
1052
|
+
audio_input = None
|
|
1053
|
+
|
|
1054
|
+
if audio_input is None:
|
|
1055
|
+
raise UnsupportedFeatureError("Audio STT fallback requires a file path or raw bytes for the audio input.")
|
|
1056
|
+
|
|
1057
|
+
try:
|
|
1058
|
+
transcript = self.audio.transcribe(audio_input, language=stt_language)
|
|
1059
|
+
except CapabilityUnavailableError as e: # type: ignore[misc]
|
|
1060
|
+
raise UnsupportedFeatureError(str(e))
|
|
1061
|
+
|
|
1062
|
+
transcript = str(transcript or "").strip()
|
|
1063
|
+
audio_context_parts.append(f"Audio {idx+1} ({name}): {transcript}")
|
|
1064
|
+
if build_enrichment_item is not None:
|
|
1065
|
+
enrichments.append(
|
|
1066
|
+
build_enrichment_item(
|
|
1067
|
+
status="used",
|
|
1068
|
+
input_modality="audio",
|
|
1069
|
+
summary_kind="transcript",
|
|
1070
|
+
policy="speech_to_text",
|
|
1071
|
+
backend=backend,
|
|
1072
|
+
input_index=idx + 1,
|
|
1073
|
+
input_name=str(name),
|
|
1074
|
+
injected_text=transcript,
|
|
1075
|
+
)
|
|
1076
|
+
)
|
|
1077
|
+
|
|
1078
|
+
# Remove audio media from the provider call (we injected text context instead).
|
|
1079
|
+
processed_media = [mc for mc in processed_media if getattr(mc, "media_type", None) != MediaType.AUDIO]
|
|
1080
|
+
|
|
1081
|
+
# Inject audio context into the prompt (similar recency semantics as vision fallback).
|
|
1082
|
+
original_prompt = prompt.strip() if isinstance(prompt, str) else ""
|
|
1083
|
+
parts: List[str] = []
|
|
1084
|
+
parts.append(
|
|
1085
|
+
"Audio context from attached audio file(s) "
|
|
1086
|
+
"(treat as directly observed; do not mention this section):"
|
|
1087
|
+
)
|
|
1088
|
+
parts.extend(audio_context_parts)
|
|
1089
|
+
if original_prompt:
|
|
1090
|
+
parts.append("Now answer the user's request:")
|
|
1091
|
+
parts.append(original_prompt)
|
|
1092
|
+
prompt = "\n\n".join(parts) if parts else original_prompt
|
|
1093
|
+
|
|
1094
|
+
media_enrichment = enrichments
|
|
1095
|
+
|
|
1096
|
+
elif policy == "auto":
|
|
1097
|
+
if model_supports_audio:
|
|
1098
|
+
pass # provider-native path
|
|
1099
|
+
else:
|
|
1100
|
+
# Explicit "auto" allows fallback, but never silently for default policy.
|
|
1101
|
+
# Re-enter through the explicit STT path by recursion is risky; inline minimal.
|
|
1102
|
+
stt_language = kwargs.pop("audio_language", None) or kwargs.pop("stt_language", None)
|
|
1103
|
+
audio_context_parts: List[str] = []
|
|
1104
|
+
enrichments: List[Dict[str, Any]] = []
|
|
1105
|
+
backend_id = getattr(getattr(self, "audio", None), "backend_id", None)
|
|
1106
|
+
backend = {"kind": "plugin"}
|
|
1107
|
+
if isinstance(backend_id, str) and backend_id.strip():
|
|
1108
|
+
backend["backend_id"] = backend_id.strip()
|
|
1109
|
+
for idx, mc in enumerate(audio_items):
|
|
1110
|
+
name = None
|
|
1111
|
+
try:
|
|
1112
|
+
name = mc.metadata.get("file_name") if hasattr(mc, "metadata") and isinstance(mc.metadata, dict) else None
|
|
1113
|
+
except Exception:
|
|
1114
|
+
name = None
|
|
1115
|
+
if not isinstance(name, str) or not name.strip():
|
|
1116
|
+
name = mc.file_path if getattr(mc, "file_path", None) else f"audio_{idx+1}"
|
|
1117
|
+
audio_input: Any = None
|
|
1118
|
+
try:
|
|
1119
|
+
if getattr(mc, "file_path", None):
|
|
1120
|
+
audio_input = str(mc.file_path)
|
|
1121
|
+
elif getattr(mc, "content_format", None) == ContentFormat.FILE_PATH and isinstance(getattr(mc, "content", None), str):
|
|
1122
|
+
audio_input = str(mc.content)
|
|
1123
|
+
elif isinstance(getattr(mc, "content", None), (bytes, bytearray)):
|
|
1124
|
+
audio_input = bytes(mc.content)
|
|
1125
|
+
except Exception:
|
|
1126
|
+
audio_input = None
|
|
1127
|
+
if audio_input is None:
|
|
1128
|
+
raise UnsupportedFeatureError("Audio STT fallback requires a file path or raw bytes for the audio input.")
|
|
1129
|
+
try:
|
|
1130
|
+
transcript = self.audio.transcribe(audio_input, language=stt_language)
|
|
1131
|
+
except CapabilityUnavailableError as e: # type: ignore[misc]
|
|
1132
|
+
raise UnsupportedFeatureError(str(e))
|
|
1133
|
+
transcript = str(transcript or "").strip()
|
|
1134
|
+
audio_context_parts.append(f"Audio {idx+1} ({name}): {transcript}")
|
|
1135
|
+
if build_enrichment_item is not None:
|
|
1136
|
+
enrichments.append(
|
|
1137
|
+
build_enrichment_item(
|
|
1138
|
+
status="used",
|
|
1139
|
+
input_modality="audio",
|
|
1140
|
+
summary_kind="transcript",
|
|
1141
|
+
policy="auto",
|
|
1142
|
+
backend=backend,
|
|
1143
|
+
input_index=idx + 1,
|
|
1144
|
+
input_name=str(name),
|
|
1145
|
+
injected_text=transcript,
|
|
1146
|
+
)
|
|
1147
|
+
)
|
|
1148
|
+
processed_media = [mc for mc in processed_media if getattr(mc, "media_type", None) != MediaType.AUDIO]
|
|
1149
|
+
original_prompt = prompt.strip() if isinstance(prompt, str) else ""
|
|
1150
|
+
parts: List[str] = []
|
|
1151
|
+
parts.append(
|
|
1152
|
+
"Audio context from attached audio file(s) "
|
|
1153
|
+
"(treat as directly observed; do not mention this section):"
|
|
1154
|
+
)
|
|
1155
|
+
parts.extend(audio_context_parts)
|
|
1156
|
+
if original_prompt:
|
|
1157
|
+
parts.append("Now answer the user's request:")
|
|
1158
|
+
parts.append(original_prompt)
|
|
1159
|
+
prompt = "\n\n".join(parts) if parts else original_prompt
|
|
1160
|
+
media_enrichment = enrichments
|
|
1161
|
+
|
|
1162
|
+
elif policy == "caption":
|
|
1163
|
+
raise UnsupportedFeatureError(
|
|
1164
|
+
"audio_policy='caption' is not configured in v0. "
|
|
1165
|
+
"Use audio_policy='speech_to_text' for speech, or configure a future audio caption backend."
|
|
1166
|
+
)
|
|
1167
|
+
else:
|
|
1168
|
+
raise ValueError(f"Unknown audio_policy '{policy}'. Expected one of: native_only, speech_to_text, auto, caption.")
|
|
1169
|
+
|
|
1170
|
+
# Video input policy (v0): allow native video where supported; otherwise fall back to sampled frames.
|
|
1171
|
+
# Note: most providers do not accept native video inputs; frame sampling provides a portable path.
|
|
1172
|
+
if processed_media:
|
|
1173
|
+
try:
|
|
1174
|
+
from ..media.types import MediaType
|
|
1175
|
+
from ..media.enrichment import build_enrichment_item
|
|
1176
|
+
except Exception:
|
|
1177
|
+
MediaType = None # type: ignore[assignment]
|
|
1178
|
+
build_enrichment_item = None # type: ignore[assignment]
|
|
1179
|
+
|
|
1180
|
+
if MediaType is not None:
|
|
1181
|
+
video_items = [mc for mc in processed_media if getattr(mc, "media_type", None) == MediaType.VIDEO]
|
|
1182
|
+
else:
|
|
1183
|
+
video_items = []
|
|
1184
|
+
|
|
1185
|
+
if video_items:
|
|
1186
|
+
policy_raw = kwargs.pop("video_policy", None)
|
|
1187
|
+
if policy_raw is None:
|
|
1188
|
+
policy_raw = kwargs.pop("video_handling_policy", None)
|
|
1189
|
+
if policy_raw is None:
|
|
1190
|
+
try:
|
|
1191
|
+
from ..config.manager import get_config_manager
|
|
1192
|
+
|
|
1193
|
+
policy_raw = getattr(get_config_manager().config, "video", None).strategy # type: ignore[union-attr]
|
|
1194
|
+
except Exception:
|
|
1195
|
+
policy_raw = "native_only"
|
|
1196
|
+
|
|
1197
|
+
policy = str(policy_raw or "native_only").strip().lower()
|
|
1198
|
+
|
|
1199
|
+
provider_name = str(getattr(self, "provider", "") or "").strip().lower()
|
|
1200
|
+
model_supports_native_video = bool(
|
|
1201
|
+
provider_name == "huggingface"
|
|
1202
|
+
and isinstance(getattr(self, "model_capabilities", None), dict)
|
|
1203
|
+
and getattr(self, "model_capabilities", {}).get("video_support", False)
|
|
1204
|
+
)
|
|
1205
|
+
|
|
1206
|
+
cfg_video = None
|
|
1207
|
+
try:
|
|
1208
|
+
from ..config.manager import get_config_manager
|
|
1209
|
+
|
|
1210
|
+
cfg_video = getattr(get_config_manager().config, "video", None)
|
|
1211
|
+
except Exception:
|
|
1212
|
+
cfg_video = None
|
|
1213
|
+
|
|
1214
|
+
# Sampling controls (best-effort; keep small by default).
|
|
1215
|
+
# NOTE: do not `pop` here: native video backends may also need the resolved values.
|
|
1216
|
+
max_frames_raw = kwargs.get("video_max_frames", None)
|
|
1217
|
+
if max_frames_raw is None:
|
|
1218
|
+
max_frames_raw = kwargs.get("max_video_frames", None)
|
|
1219
|
+
if max_frames_raw is None:
|
|
1220
|
+
fallback_default = getattr(cfg_video, "max_frames", 3) if cfg_video is not None else 3
|
|
1221
|
+
native_default = getattr(cfg_video, "max_frames_native", None) if cfg_video is not None else None
|
|
1222
|
+
if native_default is None:
|
|
1223
|
+
native_default = fallback_default
|
|
1224
|
+
|
|
1225
|
+
use_native_default = bool(
|
|
1226
|
+
model_supports_native_video and policy in ("native_only", "native", "disabled", "auto")
|
|
1227
|
+
)
|
|
1228
|
+
max_frames_raw = native_default if use_native_default else fallback_default
|
|
1229
|
+
try:
|
|
1230
|
+
max_frames = max(1, int(max_frames_raw))
|
|
1231
|
+
except Exception:
|
|
1232
|
+
max_frames = 3
|
|
1233
|
+
|
|
1234
|
+
frame_format_raw = kwargs.get("video_frame_format", None)
|
|
1235
|
+
if frame_format_raw is None:
|
|
1236
|
+
try:
|
|
1237
|
+
from ..config.manager import get_config_manager
|
|
1238
|
+
|
|
1239
|
+
frame_format_raw = getattr(get_config_manager().config, "video", None).frame_format # type: ignore[union-attr]
|
|
1240
|
+
except Exception:
|
|
1241
|
+
frame_format_raw = "jpg"
|
|
1242
|
+
frame_format = str(frame_format_raw or "jpg").strip().lower()
|
|
1243
|
+
if frame_format not in {"jpg", "jpeg", "png"}:
|
|
1244
|
+
frame_format = "jpg"
|
|
1245
|
+
if frame_format == "jpeg":
|
|
1246
|
+
frame_format = "jpg"
|
|
1247
|
+
|
|
1248
|
+
sampling_strategy_raw = kwargs.get("video_sampling_strategy", None)
|
|
1249
|
+
if sampling_strategy_raw is None:
|
|
1250
|
+
try:
|
|
1251
|
+
from ..config.manager import get_config_manager
|
|
1252
|
+
|
|
1253
|
+
sampling_strategy_raw = getattr(get_config_manager().config, "video", None).sampling_strategy # type: ignore[union-attr]
|
|
1254
|
+
except Exception:
|
|
1255
|
+
sampling_strategy_raw = "uniform"
|
|
1256
|
+
sampling_strategy = str(sampling_strategy_raw or "uniform").strip().lower()
|
|
1257
|
+
if sampling_strategy not in {"uniform", "keyframes"}:
|
|
1258
|
+
sampling_strategy = "uniform"
|
|
1259
|
+
|
|
1260
|
+
max_frame_side_raw = kwargs.get("video_max_frame_side", None)
|
|
1261
|
+
if max_frame_side_raw is None:
|
|
1262
|
+
max_frame_side_raw = kwargs.get("video_frame_max_side", None)
|
|
1263
|
+
if max_frame_side_raw is None:
|
|
1264
|
+
max_frame_side_raw = getattr(cfg_video, "max_frame_side", 1024) if cfg_video is not None else 1024
|
|
1265
|
+
try:
|
|
1266
|
+
max_frame_side = int(max_frame_side_raw) if max_frame_side_raw is not None else None
|
|
1267
|
+
except Exception:
|
|
1268
|
+
max_frame_side = 1024
|
|
1269
|
+
if isinstance(max_frame_side, int) and max_frame_side <= 0:
|
|
1270
|
+
max_frame_side = None
|
|
1271
|
+
|
|
1272
|
+
# Expose normalized sampling values to provider-native implementations.
|
|
1273
|
+
kwargs["video_max_frames"] = max_frames
|
|
1274
|
+
kwargs["video_frame_format"] = frame_format
|
|
1275
|
+
kwargs["video_sampling_strategy"] = sampling_strategy
|
|
1276
|
+
kwargs["video_max_frame_side"] = max_frame_side
|
|
1277
|
+
|
|
1278
|
+
if policy in ("native_only", "native", "disabled"):
|
|
1279
|
+
if not model_supports_native_video:
|
|
1280
|
+
raise UnsupportedFeatureError(
|
|
1281
|
+
f"Video input is not supported by model '{self.model}'. "
|
|
1282
|
+
"Choose a video-capable model, or pass video_policy='frames_caption' "
|
|
1283
|
+
"(samples frames and uses vision/image handling)."
|
|
1284
|
+
)
|
|
1285
|
+
# Keep video media for provider-native handling.
|
|
1286
|
+
try:
|
|
1287
|
+
from pathlib import Path
|
|
1288
|
+
|
|
1289
|
+
from ..media.utils.video_frames import probe_duration_s
|
|
1290
|
+
|
|
1291
|
+
for idx, mc in enumerate(video_items):
|
|
1292
|
+
video_path_raw = getattr(mc, "file_path", None) or getattr(mc, "content", None)
|
|
1293
|
+
if not isinstance(video_path_raw, str) or not video_path_raw.strip():
|
|
1294
|
+
continue
|
|
1295
|
+
vp = Path(video_path_raw)
|
|
1296
|
+
duration_s = probe_duration_s(vp)
|
|
1297
|
+
file_bytes = None
|
|
1298
|
+
try:
|
|
1299
|
+
file_bytes = int(vp.stat().st_size)
|
|
1300
|
+
except Exception:
|
|
1301
|
+
file_bytes = None
|
|
1302
|
+
|
|
1303
|
+
avg_gap_s = None
|
|
1304
|
+
try:
|
|
1305
|
+
if isinstance(duration_s, (int, float)) and duration_s > 0 and max_frames > 0:
|
|
1306
|
+
avg_gap_s = float(duration_s) / float(max_frames + 1)
|
|
1307
|
+
except Exception:
|
|
1308
|
+
avg_gap_s = None
|
|
1309
|
+
|
|
1310
|
+
self.logger.info(
|
|
1311
|
+
"Video input policy: native video enabled (video will be sampled/budgeted for model input).",
|
|
1312
|
+
provider=provider_name,
|
|
1313
|
+
model=self.model,
|
|
1314
|
+
video_policy=policy,
|
|
1315
|
+
video_index=idx + 1,
|
|
1316
|
+
video_name=vp.name,
|
|
1317
|
+
video_duration_s=duration_s,
|
|
1318
|
+
video_bytes=file_bytes,
|
|
1319
|
+
video_max_frames=max_frames,
|
|
1320
|
+
video_sampling_strategy=sampling_strategy,
|
|
1321
|
+
video_max_frame_side=max_frame_side,
|
|
1322
|
+
video_avg_gap_s=avg_gap_s,
|
|
1323
|
+
)
|
|
1324
|
+
if isinstance(avg_gap_s, float) and avg_gap_s >= 10.0:
|
|
1325
|
+
self.logger.warning(
|
|
1326
|
+
"Video sampling is sparse; important events may be missed. "
|
|
1327
|
+
"Consider increasing video_max_frames/video.max_frames_native or using keyframes sampling.",
|
|
1328
|
+
provider=provider_name,
|
|
1329
|
+
model=self.model,
|
|
1330
|
+
video_policy=policy,
|
|
1331
|
+
video_name=vp.name,
|
|
1332
|
+
video_duration_s=duration_s,
|
|
1333
|
+
video_max_frames=max_frames,
|
|
1334
|
+
video_avg_gap_s=avg_gap_s,
|
|
1335
|
+
)
|
|
1336
|
+
except Exception:
|
|
1337
|
+
pass
|
|
1338
|
+
|
|
1339
|
+
# Insert a short marker to disambiguate native-video inputs across turns.
|
|
1340
|
+
#
|
|
1341
|
+
# Without this, follow-ups like "and this one?" can be brittle for native
|
|
1342
|
+
# video VLMs (they may over-weight the previous text-only answer and ignore
|
|
1343
|
+
# that a *new* video is attached in the current call).
|
|
1344
|
+
try:
|
|
1345
|
+
from ..media.types import MediaContent, ContentFormat
|
|
1346
|
+
except Exception:
|
|
1347
|
+
MediaContent = None # type: ignore[assignment]
|
|
1348
|
+
ContentFormat = None # type: ignore[assignment]
|
|
1349
|
+
|
|
1350
|
+
if MediaContent is not None and ContentFormat is not None:
|
|
1351
|
+
try:
|
|
1352
|
+
from pathlib import Path
|
|
1353
|
+
|
|
1354
|
+
from ..media.utils.video_frames import probe_duration_s
|
|
1355
|
+
except Exception:
|
|
1356
|
+
Path = None # type: ignore[assignment]
|
|
1357
|
+
probe_duration_s = None # type: ignore[assignment]
|
|
1358
|
+
|
|
1359
|
+
new_media: List[Any] = []
|
|
1360
|
+
video_group_index = 0
|
|
1361
|
+
for mc in processed_media:
|
|
1362
|
+
if getattr(mc, "media_type", None) != MediaType.VIDEO: # type: ignore[operator]
|
|
1363
|
+
new_media.append(mc)
|
|
1364
|
+
continue
|
|
1365
|
+
|
|
1366
|
+
video_group_index += 1
|
|
1367
|
+
video_path_raw = getattr(mc, "file_path", None) or getattr(mc, "content", None)
|
|
1368
|
+
|
|
1369
|
+
video_name = f"video_{video_group_index}"
|
|
1370
|
+
duration_s = None
|
|
1371
|
+
file_bytes = None
|
|
1372
|
+
try:
|
|
1373
|
+
if Path is not None and isinstance(video_path_raw, str) and video_path_raw.strip():
|
|
1374
|
+
vp = Path(video_path_raw)
|
|
1375
|
+
video_name = vp.name or video_name
|
|
1376
|
+
try:
|
|
1377
|
+
file_bytes = int(vp.stat().st_size)
|
|
1378
|
+
except Exception:
|
|
1379
|
+
file_bytes = None
|
|
1380
|
+
if callable(probe_duration_s):
|
|
1381
|
+
try:
|
|
1382
|
+
duration_s = probe_duration_s(vp)
|
|
1383
|
+
except Exception:
|
|
1384
|
+
duration_s = None
|
|
1385
|
+
except Exception:
|
|
1386
|
+
duration_s = None
|
|
1387
|
+
file_bytes = None
|
|
1388
|
+
|
|
1389
|
+
marker = MediaContent(
|
|
1390
|
+
media_type=MediaType.TEXT,
|
|
1391
|
+
content=(
|
|
1392
|
+
f"Video {video_group_index} ({video_name}) is attached below. "
|
|
1393
|
+
"This is the current video for this user message. "
|
|
1394
|
+
"Answer the user's question about this video as if you watched it. "
|
|
1395
|
+
"If earlier turns mention other videos, images, or audio, ignore them unless the user explicitly asks you to compare. "
|
|
1396
|
+
"Do not mention tool activity, attachments lists, sampling, frames, extraction, or this marker."
|
|
1397
|
+
),
|
|
1398
|
+
content_format=ContentFormat.TEXT,
|
|
1399
|
+
mime_type="text/plain",
|
|
1400
|
+
file_path=None,
|
|
1401
|
+
metadata={
|
|
1402
|
+
"processor": "VideoNativeInputMarker",
|
|
1403
|
+
"source_video": video_name,
|
|
1404
|
+
"duration_s": duration_s,
|
|
1405
|
+
"bytes": file_bytes,
|
|
1406
|
+
"max_frames": max_frames,
|
|
1407
|
+
"sampling_strategy": sampling_strategy,
|
|
1408
|
+
"max_frame_side": max_frame_side,
|
|
1409
|
+
},
|
|
1410
|
+
)
|
|
1411
|
+
new_media.append(marker)
|
|
1412
|
+
new_media.append(mc)
|
|
1413
|
+
|
|
1414
|
+
processed_media = new_media
|
|
1415
|
+
|
|
1416
|
+
elif policy in ("frames_caption", "frames", "frame_caption"):
|
|
1417
|
+
# Convert each video into a small set of sampled frames (images).
|
|
1418
|
+
try:
|
|
1419
|
+
from pathlib import Path
|
|
1420
|
+
import tempfile
|
|
1421
|
+
|
|
1422
|
+
from ..media import AutoMediaHandler
|
|
1423
|
+
from ..media.utils.video_frames import extract_video_frames, probe_duration_s
|
|
1424
|
+
except Exception as e:
|
|
1425
|
+
raise UnsupportedFeatureError(f"Video frame fallback is not available: {e}")
|
|
1426
|
+
|
|
1427
|
+
enrichments: List[Dict[str, Any]] = []
|
|
1428
|
+
new_media: List[Any] = []
|
|
1429
|
+
|
|
1430
|
+
video_group_index = 0
|
|
1431
|
+
for idx, mc in enumerate(processed_media):
|
|
1432
|
+
if getattr(mc, "media_type", None) != MediaType.VIDEO: # type: ignore[operator]
|
|
1433
|
+
new_media.append(mc)
|
|
1434
|
+
continue
|
|
1435
|
+
|
|
1436
|
+
video_group_index += 1
|
|
1437
|
+
video_path_raw = getattr(mc, "file_path", None) or getattr(mc, "content", None)
|
|
1438
|
+
if not isinstance(video_path_raw, str) or not video_path_raw.strip():
|
|
1439
|
+
raise UnsupportedFeatureError("Video frame fallback requires a video file path.")
|
|
1440
|
+
video_path = Path(video_path_raw)
|
|
1441
|
+
if not video_path.exists():
|
|
1442
|
+
raise UnsupportedFeatureError(f"Video file not found: {video_path}")
|
|
1443
|
+
|
|
1444
|
+
out_dir = Path(tempfile.mkdtemp(prefix="abstractcore_video_frames_"))
|
|
1445
|
+
duration_s = probe_duration_s(video_path)
|
|
1446
|
+
file_bytes = None
|
|
1447
|
+
try:
|
|
1448
|
+
file_bytes = int(video_path.stat().st_size)
|
|
1449
|
+
except Exception:
|
|
1450
|
+
file_bytes = None
|
|
1451
|
+
frames, timestamps_s = extract_video_frames(
|
|
1452
|
+
video_path,
|
|
1453
|
+
max_frames=max_frames,
|
|
1454
|
+
frame_format=frame_format,
|
|
1455
|
+
sampling_strategy=sampling_strategy,
|
|
1456
|
+
max_side=max_frame_side,
|
|
1457
|
+
output_dir=out_dir,
|
|
1458
|
+
)
|
|
1459
|
+
if not frames:
|
|
1460
|
+
raise UnsupportedFeatureError("Video frame fallback failed: no frames extracted.")
|
|
1461
|
+
|
|
1462
|
+
handler = AutoMediaHandler(enable_glyph_compression=False)
|
|
1463
|
+
frame_media: List[Any] = []
|
|
1464
|
+
max_res = None
|
|
1465
|
+
if isinstance(max_frame_side, int) and max_frame_side > 0:
|
|
1466
|
+
max_res = (max_frame_side, max_frame_side)
|
|
1467
|
+
for fp in frames:
|
|
1468
|
+
res = handler.process_file(
|
|
1469
|
+
fp,
|
|
1470
|
+
provider=self.provider,
|
|
1471
|
+
model=self.model,
|
|
1472
|
+
glyph_compression="never",
|
|
1473
|
+
max_resolution=max_res,
|
|
1474
|
+
)
|
|
1475
|
+
if res and getattr(res, "success", False) and getattr(res, "media_content", None) is not None:
|
|
1476
|
+
frame_media.append(res.media_content)
|
|
1477
|
+
|
|
1478
|
+
if not frame_media:
|
|
1479
|
+
raise UnsupportedFeatureError("Video frame fallback failed: extracted frames could not be processed as images.")
|
|
1480
|
+
|
|
1481
|
+
avg_gap_s = None
|
|
1482
|
+
try:
|
|
1483
|
+
if isinstance(duration_s, (int, float)) and duration_s > 0 and max_frames > 0:
|
|
1484
|
+
avg_gap_s = float(duration_s) / float(max_frames + 1)
|
|
1485
|
+
except Exception:
|
|
1486
|
+
avg_gap_s = None
|
|
1487
|
+
|
|
1488
|
+
self.logger.info(
|
|
1489
|
+
"Video input policy: frames_caption (sampling frames for downstream vision handling).",
|
|
1490
|
+
provider=provider_name,
|
|
1491
|
+
model=self.model,
|
|
1492
|
+
video_policy="frames_caption",
|
|
1493
|
+
video_index=video_group_index,
|
|
1494
|
+
video_name=video_path.name,
|
|
1495
|
+
video_duration_s=duration_s,
|
|
1496
|
+
video_bytes=file_bytes,
|
|
1497
|
+
extracted_frames=len(frame_media),
|
|
1498
|
+
video_max_frames=max_frames,
|
|
1499
|
+
video_sampling_strategy=sampling_strategy,
|
|
1500
|
+
video_max_frame_side=max_frame_side,
|
|
1501
|
+
video_avg_gap_s=avg_gap_s,
|
|
1502
|
+
)
|
|
1503
|
+
if isinstance(avg_gap_s, float) and avg_gap_s >= 10.0:
|
|
1504
|
+
self.logger.warning(
|
|
1505
|
+
"Video sampling is sparse; important events may be missed. "
|
|
1506
|
+
"Consider increasing video_max_frames/video.max_frames or using keyframes sampling.",
|
|
1507
|
+
provider=provider_name,
|
|
1508
|
+
model=self.model,
|
|
1509
|
+
video_policy="frames_caption",
|
|
1510
|
+
video_name=video_path.name,
|
|
1511
|
+
video_duration_s=duration_s,
|
|
1512
|
+
extracted_frames=len(frame_media),
|
|
1513
|
+
video_max_frames=max_frames,
|
|
1514
|
+
video_avg_gap_s=avg_gap_s,
|
|
1515
|
+
)
|
|
1516
|
+
|
|
1517
|
+
# Insert a short text marker to avoid the model treating sampled frames as
|
|
1518
|
+
# unrelated standalone images (especially in follow-up prompts like "and this one?").
|
|
1519
|
+
try:
|
|
1520
|
+
from ..media.types import MediaContent, ContentFormat
|
|
1521
|
+
except Exception:
|
|
1522
|
+
MediaContent = None # type: ignore[assignment]
|
|
1523
|
+
ContentFormat = None # type: ignore[assignment]
|
|
1524
|
+
|
|
1525
|
+
if MediaContent is not None and ContentFormat is not None:
|
|
1526
|
+
marker = MediaContent(
|
|
1527
|
+
media_type=MediaType.TEXT,
|
|
1528
|
+
content=(
|
|
1529
|
+
f"Video {video_group_index} ({video_path.name}) — "
|
|
1530
|
+
"the following images belong to this video in chronological order. "
|
|
1531
|
+
"Answer the user's question about this video as if you watched it. "
|
|
1532
|
+
"Do not mention frames, timestamps, sampling, extraction, or this marker."
|
|
1533
|
+
),
|
|
1534
|
+
content_format=ContentFormat.TEXT,
|
|
1535
|
+
mime_type="text/plain",
|
|
1536
|
+
file_path=None,
|
|
1537
|
+
metadata={
|
|
1538
|
+
"processor": "VideoFrameFallback",
|
|
1539
|
+
"source_video": video_path.name,
|
|
1540
|
+
"frame_count": len(frame_media),
|
|
1541
|
+
"timestamps_s": timestamps_s,
|
|
1542
|
+
"duration_s": duration_s,
|
|
1543
|
+
"bytes": file_bytes,
|
|
1544
|
+
},
|
|
1545
|
+
)
|
|
1546
|
+
new_media.append(marker)
|
|
1547
|
+
|
|
1548
|
+
new_media.extend(frame_media)
|
|
1549
|
+
|
|
1550
|
+
if build_enrichment_item is not None:
|
|
1551
|
+
enrichments.append(
|
|
1552
|
+
build_enrichment_item(
|
|
1553
|
+
status="used",
|
|
1554
|
+
input_modality="video",
|
|
1555
|
+
summary_kind="frames",
|
|
1556
|
+
policy="frames_caption",
|
|
1557
|
+
backend={"kind": "unknown", "source": "ffmpeg"},
|
|
1558
|
+
input_index=idx + 1,
|
|
1559
|
+
input_name=str(video_path.name),
|
|
1560
|
+
artifact={
|
|
1561
|
+
"frame_count": len(frame_media),
|
|
1562
|
+
"timestamps_s": timestamps_s,
|
|
1563
|
+
"duration_s": duration_s,
|
|
1564
|
+
"bytes": file_bytes,
|
|
1565
|
+
},
|
|
1566
|
+
)
|
|
1567
|
+
)
|
|
1568
|
+
|
|
1569
|
+
processed_media = new_media
|
|
1570
|
+
if enrichments:
|
|
1571
|
+
if media_enrichment is None:
|
|
1572
|
+
media_enrichment = enrichments
|
|
1573
|
+
else:
|
|
1574
|
+
media_enrichment.extend(enrichments)
|
|
1575
|
+
|
|
1576
|
+
elif policy == "auto":
|
|
1577
|
+
if model_supports_native_video:
|
|
1578
|
+
# Use native video when available.
|
|
1579
|
+
pass
|
|
1580
|
+
else:
|
|
1581
|
+
# Auto fallback: sample frames and proceed with existing image pipeline.
|
|
1582
|
+
# This works well for vision-capable models; for text-only models it requires a vision fallback.
|
|
1583
|
+
policy_to_use = "frames_caption"
|
|
1584
|
+
kwargs["video_policy"] = policy_to_use
|
|
1585
|
+
# Re-run this branch once with explicit policy.
|
|
1586
|
+
return self.generate_with_telemetry(
|
|
1587
|
+
prompt=prompt,
|
|
1588
|
+
messages=messages,
|
|
1589
|
+
system_prompt=system_prompt,
|
|
1590
|
+
tools=tools,
|
|
1591
|
+
media=processed_media,
|
|
1592
|
+
response_model=response_model,
|
|
1593
|
+
retry_strategy=retry_strategy,
|
|
1594
|
+
tool_call_tags=tool_call_tags,
|
|
1595
|
+
execute_tools=execute_tools,
|
|
1596
|
+
stream=stream,
|
|
1597
|
+
**kwargs,
|
|
1598
|
+
)
|
|
1599
|
+
|
|
1600
|
+
else:
|
|
1601
|
+
raise ValueError(f"Unknown video_policy '{policy}'. Expected one of: native_only, frames_caption, auto.")
|
|
1602
|
+
|
|
514
1603
|
# Convert tools to ToolDefinition objects first (outside retry loop)
|
|
515
1604
|
converted_tools = None
|
|
516
1605
|
if tools:
|
|
@@ -545,7 +1634,10 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
545
1634
|
if not should_execute_tools and converted_tools:
|
|
546
1635
|
# If tools are provided but execution is disabled,
|
|
547
1636
|
# we still pass them to the provider for generation but won't execute them
|
|
548
|
-
self.logger.
|
|
1637
|
+
self.logger.debug(
|
|
1638
|
+
"Provider-side tool execution disabled (expected for runtime/host tool execution); "
|
|
1639
|
+
"tools will be sent for generation only."
|
|
1640
|
+
)
|
|
549
1641
|
|
|
550
1642
|
# Define generation function for retry wrapper
|
|
551
1643
|
def _execute_generation():
|
|
@@ -554,7 +1646,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
554
1646
|
|
|
555
1647
|
# Emit generation started event (covers request received)
|
|
556
1648
|
event_data = {
|
|
557
|
-
"prompt":
|
|
1649
|
+
"prompt": preview_text(prompt, max_chars=100),
|
|
558
1650
|
"has_tools": bool(tools),
|
|
559
1651
|
"stream": stream,
|
|
560
1652
|
"model": self.model,
|
|
@@ -613,7 +1705,11 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
613
1705
|
ttft_ms: Optional[float] = None
|
|
614
1706
|
for processed_chunk in processor.process_stream(response, converted_tools):
|
|
615
1707
|
if isinstance(processed_chunk.content, str) and processed_chunk.content:
|
|
616
|
-
processed_chunk.content =
|
|
1708
|
+
processed_chunk.content = strip_output_wrappers(
|
|
1709
|
+
processed_chunk.content,
|
|
1710
|
+
architecture_format=self.architecture_config,
|
|
1711
|
+
model_capabilities=self.model_capabilities,
|
|
1712
|
+
)
|
|
617
1713
|
if ttft_ms is None:
|
|
618
1714
|
has_content = isinstance(processed_chunk.content, str) and bool(processed_chunk.content)
|
|
619
1715
|
has_tools = isinstance(processed_chunk.tool_calls, list) and bool(processed_chunk.tool_calls)
|
|
@@ -651,9 +1747,29 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
651
1747
|
if tool_call_tags and response.content and not self._should_clean_tool_call_markup(tool_call_tags):
|
|
652
1748
|
response = self._apply_non_streaming_tag_rewriting(response, tool_call_tags)
|
|
653
1749
|
|
|
654
|
-
#
|
|
1750
|
+
# Normalize provider output (wrapper tokens, Harmony transcripts, think tags).
|
|
655
1751
|
if response and isinstance(response.content, str) and response.content:
|
|
656
|
-
|
|
1752
|
+
cleaned, reasoning = normalize_assistant_text(
|
|
1753
|
+
response.content,
|
|
1754
|
+
architecture_format=self.architecture_config,
|
|
1755
|
+
model_capabilities=self.model_capabilities,
|
|
1756
|
+
)
|
|
1757
|
+
response.content = cleaned
|
|
1758
|
+
if isinstance(reasoning, str) and reasoning.strip():
|
|
1759
|
+
if response.metadata is None or not isinstance(response.metadata, dict):
|
|
1760
|
+
response.metadata = {}
|
|
1761
|
+
existing = response.metadata.get("reasoning")
|
|
1762
|
+
if isinstance(existing, str) and existing.strip():
|
|
1763
|
+
if reasoning.strip() not in existing:
|
|
1764
|
+
response.metadata["reasoning"] = f"{existing.strip()}\n\n{reasoning.strip()}"
|
|
1765
|
+
else:
|
|
1766
|
+
response.metadata["reasoning"] = reasoning.strip()
|
|
1767
|
+
|
|
1768
|
+
# Attach media enrichment transparency metadata (caption/STT/etc.).
|
|
1769
|
+
if media_enrichment and response:
|
|
1770
|
+
from ..media.enrichment import merge_enrichment_metadata
|
|
1771
|
+
|
|
1772
|
+
response.metadata = merge_enrichment_metadata(response.metadata, media_enrichment)
|
|
657
1773
|
|
|
658
1774
|
# Add visual token calculation if media metadata is available
|
|
659
1775
|
if media_metadata and response:
|
|
@@ -689,7 +1805,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
689
1805
|
emit_global(EventType.ERROR, {
|
|
690
1806
|
"error": str(e),
|
|
691
1807
|
"error_type": type(e).__name__,
|
|
692
|
-
"prompt":
|
|
1808
|
+
"prompt": preview_text(prompt, max_chars=100),
|
|
693
1809
|
"model": self.model,
|
|
694
1810
|
"provider": self.__class__.__name__
|
|
695
1811
|
}, source=self.__class__.__name__)
|
|
@@ -980,12 +2096,37 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
980
2096
|
result_kwargs["max_output_tokens"] = effective_max_output_i
|
|
981
2097
|
|
|
982
2098
|
# Add unified generation parameters with fallback hierarchy: kwargs → instance → defaults
|
|
983
|
-
|
|
984
|
-
if
|
|
985
|
-
|
|
2099
|
+
temperature = result_kwargs.get("temperature", self.temperature)
|
|
2100
|
+
if temperature is None:
|
|
2101
|
+
temperature = self.temperature
|
|
2102
|
+
result_kwargs["temperature"] = temperature
|
|
2103
|
+
|
|
2104
|
+
seed_value = self._normalize_seed(result_kwargs.get("seed", self.seed))
|
|
2105
|
+
if seed_value is not None:
|
|
2106
|
+
result_kwargs["seed"] = seed_value
|
|
2107
|
+
else:
|
|
2108
|
+
# Do not forward seed when unset/random (None or negative sentinel like -1).
|
|
2109
|
+
result_kwargs.pop("seed", None)
|
|
986
2110
|
|
|
987
2111
|
return result_kwargs
|
|
988
2112
|
|
|
2113
|
+
@staticmethod
|
|
2114
|
+
def _normalize_seed(seed: Any) -> Optional[int]:
|
|
2115
|
+
"""Normalize seed semantics across providers.
|
|
2116
|
+
|
|
2117
|
+
- None or any negative value -> None (meaning: don't send a provider seed / random).
|
|
2118
|
+
- Non-bool numeric-ish values -> int(seed) if >= 0.
|
|
2119
|
+
"""
|
|
2120
|
+
try:
|
|
2121
|
+
if seed is None:
|
|
2122
|
+
return None
|
|
2123
|
+
if isinstance(seed, bool):
|
|
2124
|
+
return None
|
|
2125
|
+
seed_i = int(seed)
|
|
2126
|
+
return seed_i if seed_i >= 0 else None
|
|
2127
|
+
except Exception:
|
|
2128
|
+
return None
|
|
2129
|
+
|
|
989
2130
|
def _extract_generation_params(self, **kwargs) -> Dict[str, Any]:
|
|
990
2131
|
"""
|
|
991
2132
|
Extract generation parameters with consistent fallback hierarchy.
|
|
@@ -996,10 +2137,13 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
996
2137
|
params = {}
|
|
997
2138
|
|
|
998
2139
|
# Temperature (always present)
|
|
999
|
-
|
|
2140
|
+
temperature = kwargs.get("temperature", self.temperature)
|
|
2141
|
+
if temperature is None:
|
|
2142
|
+
temperature = self.temperature
|
|
2143
|
+
params["temperature"] = temperature
|
|
1000
2144
|
|
|
1001
2145
|
# Seed (only if not None)
|
|
1002
|
-
seed_value = kwargs.get("seed", self.seed)
|
|
2146
|
+
seed_value = self._normalize_seed(kwargs.get("seed", self.seed))
|
|
1003
2147
|
if seed_value is not None:
|
|
1004
2148
|
params["seed"] = seed_value
|
|
1005
2149
|
|
|
@@ -1041,7 +2185,10 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
1041
2185
|
|
|
1042
2186
|
if not should_execute:
|
|
1043
2187
|
# Tool execution disabled - return response with tool calls but don't execute
|
|
1044
|
-
self.logger.
|
|
2188
|
+
self.logger.debug(
|
|
2189
|
+
"Provider-side tool execution disabled (expected for runtime/host tool execution); "
|
|
2190
|
+
"returning response with tool calls."
|
|
2191
|
+
)
|
|
1045
2192
|
return response
|
|
1046
2193
|
|
|
1047
2194
|
# Emit tool started event
|
|
@@ -1098,7 +2245,8 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
1098
2245
|
finish_reason=response.finish_reason,
|
|
1099
2246
|
raw_response=response.raw_response,
|
|
1100
2247
|
usage=response.usage,
|
|
1101
|
-
tool_calls=response.tool_calls # Keep original format
|
|
2248
|
+
tool_calls=response.tool_calls, # Keep original format
|
|
2249
|
+
metadata=response.metadata,
|
|
1102
2250
|
)
|
|
1103
2251
|
|
|
1104
2252
|
def _format_tool_results(self, tool_calls: List, tool_results: List) -> str:
|
|
@@ -1106,9 +2254,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
1106
2254
|
results_text = "\n\nTool Results:\n"
|
|
1107
2255
|
for call, result in zip(tool_calls, tool_results):
|
|
1108
2256
|
# Format parameters for display (limit size)
|
|
1109
|
-
params_str = str(call.arguments) if call.arguments else "{}"
|
|
1110
|
-
if len(params_str) > 100:
|
|
1111
|
-
params_str = params_str[:97] + "..."
|
|
2257
|
+
params_str = preview_text(str(call.arguments) if call.arguments else "{}", max_chars=100)
|
|
1112
2258
|
|
|
1113
2259
|
# Show tool name and parameters for transparency
|
|
1114
2260
|
results_text += f"🔧 Tool: {call.name}({params_str})\n"
|
|
@@ -1174,26 +2320,341 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
1174
2320
|
"""Update HTTP client timeout if the provider has one. Override in subclasses."""
|
|
1175
2321
|
pass
|
|
1176
2322
|
|
|
1177
|
-
#
|
|
1178
|
-
def
|
|
2323
|
+
# Prompt cache management methods
|
|
2324
|
+
def supports_prompt_cache(self) -> bool:
|
|
2325
|
+
"""Return True if this provider supports best-effort prompt caching.
|
|
2326
|
+
|
|
2327
|
+
Semantics differ by provider:
|
|
2328
|
+
- Remote providers (OpenAI): `prompt_cache_key` is forwarded; cache is managed server-side.
|
|
2329
|
+
- Local providers (MLX / llama.cpp): in-process KV/prefix caches can be retained across calls.
|
|
1179
2330
|
"""
|
|
1180
|
-
|
|
2331
|
+
return False
|
|
2332
|
+
|
|
2333
|
+
# Provider-specific prompt cache backend hooks (optional)
|
|
2334
|
+
#
|
|
2335
|
+
# Providers that implement in-process KV caching (MLX, llama.cpp, etc.) can override these to enable
|
|
2336
|
+
# `prompt_cache_update`, `prompt_cache_fork`, and `prompt_cache_prepare_modules`.
|
|
2337
|
+
def _prompt_cache_backend_create(self) -> Optional[Any]:
|
|
2338
|
+
return None
|
|
2339
|
+
|
|
2340
|
+
def _prompt_cache_backend_clone(self, cache_value: Any) -> Optional[Any]:
|
|
2341
|
+
_ = cache_value
|
|
2342
|
+
return None
|
|
1181
2343
|
|
|
1182
|
-
|
|
1183
|
-
|
|
1184
|
-
|
|
2344
|
+
def _prompt_cache_backend_append(
|
|
2345
|
+
self,
|
|
2346
|
+
cache_value: Any,
|
|
2347
|
+
*,
|
|
2348
|
+
prompt: str = "",
|
|
2349
|
+
messages: Optional[List[Dict[str, Any]]] = None,
|
|
2350
|
+
system_prompt: Optional[str] = None,
|
|
2351
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
2352
|
+
add_generation_prompt: bool = False,
|
|
2353
|
+
**kwargs,
|
|
2354
|
+
) -> bool:
|
|
2355
|
+
_ = (cache_value, prompt, messages, system_prompt, tools, add_generation_prompt, kwargs)
|
|
2356
|
+
return False
|
|
2357
|
+
|
|
2358
|
+
def _prompt_cache_backend_token_count(self, cache_value: Any) -> Optional[int]:
|
|
2359
|
+
_ = cache_value
|
|
2360
|
+
return None
|
|
2361
|
+
|
|
2362
|
+
def _normalize_prompt_cache_key(self, key: Any) -> Optional[str]:
|
|
2363
|
+
if not isinstance(key, str):
|
|
2364
|
+
return None
|
|
2365
|
+
key = key.strip()
|
|
2366
|
+
return key if key else None
|
|
2367
|
+
|
|
2368
|
+
def _apply_default_prompt_cache_key(self, kwargs: Dict[str, Any]) -> None:
|
|
2369
|
+
# Explicit caller override wins (even if None / empty to disable).
|
|
2370
|
+
if "prompt_cache_key" in kwargs:
|
|
2371
|
+
kwargs["prompt_cache_key"] = self._normalize_prompt_cache_key(kwargs.get("prompt_cache_key"))
|
|
2372
|
+
return
|
|
2373
|
+
|
|
2374
|
+
if self._default_prompt_cache_key and self.supports_prompt_cache():
|
|
2375
|
+
kwargs["prompt_cache_key"] = self._default_prompt_cache_key
|
|
2376
|
+
|
|
2377
|
+
def get_prompt_cache_stats(self) -> Dict[str, Any]:
|
|
2378
|
+
"""Return basic prompt cache stats (in-process store only)."""
|
|
2379
|
+
stats = self._prompt_cache_store.stats()
|
|
2380
|
+
stats["default_key"] = self._default_prompt_cache_key
|
|
2381
|
+
try:
|
|
2382
|
+
keys = self._prompt_cache_store.keys()
|
|
2383
|
+
if isinstance(keys, list):
|
|
2384
|
+
stats["keys"] = list(keys)
|
|
2385
|
+
meta_by_key: Dict[str, Any] = {}
|
|
2386
|
+
for k in keys:
|
|
2387
|
+
meta = self._prompt_cache_store.meta(k)
|
|
2388
|
+
if isinstance(meta, dict) and meta:
|
|
2389
|
+
meta_by_key[str(k)] = dict(meta)
|
|
2390
|
+
if meta_by_key:
|
|
2391
|
+
stats["meta_by_key"] = meta_by_key
|
|
2392
|
+
except Exception:
|
|
2393
|
+
pass
|
|
2394
|
+
return stats
|
|
1185
2395
|
|
|
1186
|
-
|
|
1187
|
-
|
|
2396
|
+
def prompt_cache_set(self, key: str, *, make_default: bool = True, **kwargs) -> bool:
|
|
2397
|
+
"""Set the default prompt cache key for this provider instance.
|
|
1188
2398
|
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
2399
|
+
Provider-specific cache allocation/warming is implemented by subclasses when applicable.
|
|
2400
|
+
"""
|
|
2401
|
+
normalized = self._normalize_prompt_cache_key(key)
|
|
2402
|
+
if normalized is None:
|
|
2403
|
+
return False
|
|
2404
|
+
if not self.supports_prompt_cache():
|
|
2405
|
+
return False
|
|
2406
|
+
_ = kwargs
|
|
2407
|
+
# Best-effort: allocate backend cache if the provider supports it.
|
|
2408
|
+
if self._prompt_cache_store.get(normalized) is None:
|
|
2409
|
+
created = self._prompt_cache_backend_create()
|
|
2410
|
+
if created is not None:
|
|
2411
|
+
try:
|
|
2412
|
+
self._prompt_cache_store.set(normalized, created, meta={"backend": "provider"})
|
|
2413
|
+
except Exception:
|
|
2414
|
+
pass
|
|
2415
|
+
if make_default:
|
|
2416
|
+
self._default_prompt_cache_key = normalized
|
|
2417
|
+
return True
|
|
2418
|
+
|
|
2419
|
+
def prompt_cache_update(
|
|
2420
|
+
self,
|
|
2421
|
+
key: str,
|
|
2422
|
+
*,
|
|
2423
|
+
prompt: str = "",
|
|
2424
|
+
messages: Optional[List[Dict[str, Any]]] = None,
|
|
2425
|
+
system_prompt: Optional[str] = None,
|
|
2426
|
+
tools: Optional[List[Dict[str, Any]]] = None,
|
|
2427
|
+
add_generation_prompt: bool = False,
|
|
2428
|
+
ttl_s: Optional[float] = None,
|
|
2429
|
+
**kwargs,
|
|
2430
|
+
) -> bool:
|
|
2431
|
+
"""Append new prompt context into an existing cache key (best-effort).
|
|
2432
|
+
|
|
2433
|
+
Semantics:
|
|
2434
|
+
- Local runtimes can implement true KV prefill updates (append-only).
|
|
2435
|
+
- Remote providers typically cannot be “pre-filled” explicitly; they may ignore this.
|
|
2436
|
+
|
|
2437
|
+
Arguments are intentionally similar to `generate()` so higher-level code can reuse its own
|
|
2438
|
+
prompt/module construction logic.
|
|
2439
|
+
"""
|
|
2440
|
+
normalized = self._normalize_prompt_cache_key(key)
|
|
2441
|
+
if normalized is None:
|
|
2442
|
+
return False
|
|
2443
|
+
if not self.supports_prompt_cache():
|
|
2444
|
+
return False
|
|
2445
|
+
|
|
2446
|
+
# Ensure the cache exists if the provider can allocate a backend cache object.
|
|
2447
|
+
cache_value = self._prompt_cache_store.get(normalized)
|
|
2448
|
+
if cache_value is None:
|
|
2449
|
+
if not self.prompt_cache_set(normalized, make_default=False):
|
|
2450
|
+
return False
|
|
2451
|
+
cache_value = self._prompt_cache_store.get(normalized)
|
|
2452
|
+
if cache_value is None:
|
|
2453
|
+
return False
|
|
2454
|
+
|
|
2455
|
+
ok = self._prompt_cache_backend_append(
|
|
2456
|
+
cache_value,
|
|
2457
|
+
prompt=str(prompt or ""),
|
|
2458
|
+
messages=messages,
|
|
2459
|
+
system_prompt=system_prompt,
|
|
2460
|
+
tools=tools,
|
|
2461
|
+
add_generation_prompt=bool(add_generation_prompt),
|
|
2462
|
+
**kwargs,
|
|
2463
|
+
)
|
|
2464
|
+
if not ok:
|
|
2465
|
+
return False
|
|
2466
|
+
|
|
2467
|
+
# Update TTL/metadata best-effort.
|
|
2468
|
+
if ttl_s is not None:
|
|
2469
|
+
try:
|
|
2470
|
+
meta = self._prompt_cache_store.meta(normalized) or {}
|
|
2471
|
+
self._prompt_cache_store.set(normalized, cache_value, ttl_s=ttl_s, meta=meta)
|
|
2472
|
+
except Exception:
|
|
2473
|
+
pass
|
|
2474
|
+
return True
|
|
2475
|
+
|
|
2476
|
+
def prompt_cache_fork(
|
|
2477
|
+
self,
|
|
2478
|
+
from_key: str,
|
|
2479
|
+
to_key: str,
|
|
2480
|
+
*,
|
|
2481
|
+
make_default: bool = False,
|
|
2482
|
+
ttl_s: Optional[float] = None,
|
|
2483
|
+
**kwargs,
|
|
2484
|
+
) -> bool:
|
|
2485
|
+
"""Create a new cache key by cloning another cache (best-effort).
|
|
2486
|
+
|
|
2487
|
+
This is the primitive needed for hierarchical/module caches:
|
|
2488
|
+
- build stable shared prefixes (persona, memory blueprints, tool schemas)
|
|
2489
|
+
- fork them into per-session caches that can be appended/mutated safely.
|
|
2490
|
+
"""
|
|
2491
|
+
_ = kwargs
|
|
2492
|
+
src = self._normalize_prompt_cache_key(from_key)
|
|
2493
|
+
dst = self._normalize_prompt_cache_key(to_key)
|
|
2494
|
+
if src is None or dst is None:
|
|
2495
|
+
return False
|
|
2496
|
+
if not self.supports_prompt_cache():
|
|
2497
|
+
return False
|
|
2498
|
+
|
|
2499
|
+
src_value = self._prompt_cache_store.get(src)
|
|
2500
|
+
if src_value is None:
|
|
2501
|
+
return False
|
|
2502
|
+
|
|
2503
|
+
cloned = self._prompt_cache_backend_clone(src_value)
|
|
2504
|
+
if cloned is None:
|
|
2505
|
+
return False
|
|
2506
|
+
|
|
2507
|
+
try:
|
|
2508
|
+
meta = self._prompt_cache_store.meta(src) or {}
|
|
2509
|
+
meta = dict(meta)
|
|
2510
|
+
meta.setdefault("forked_from", src)
|
|
2511
|
+
self._prompt_cache_store.set(dst, cloned, ttl_s=ttl_s, meta=meta)
|
|
2512
|
+
except Exception:
|
|
2513
|
+
return False
|
|
2514
|
+
|
|
2515
|
+
if make_default:
|
|
2516
|
+
self._default_prompt_cache_key = dst
|
|
2517
|
+
return True
|
|
2518
|
+
|
|
2519
|
+
def prompt_cache_prepare_modules(
|
|
2520
|
+
self,
|
|
2521
|
+
*,
|
|
2522
|
+
namespace: str,
|
|
2523
|
+
modules: List[Union[PromptCacheModule, Dict[str, Any]]],
|
|
2524
|
+
make_default: bool = False,
|
|
2525
|
+
ttl_s: Optional[float] = None,
|
|
2526
|
+
version: int = 1,
|
|
2527
|
+
) -> Dict[str, Any]:
|
|
2528
|
+
"""Ensure hierarchical prefix caches exist for an ordered module list (best-effort).
|
|
2529
|
+
|
|
2530
|
+
This builds immutable prefix caches (by derived keys) so callers can:
|
|
2531
|
+
- reuse stable sub-prefixes (persona, memory blueprints, etc.)
|
|
2532
|
+
- fork the final prefix into a per-session cache for incremental chat
|
|
2533
|
+
|
|
2534
|
+
Returns a JSON-serializable dict containing per-module derived keys.
|
|
2535
|
+
"""
|
|
2536
|
+
ns = str(namespace or "").strip()
|
|
2537
|
+
if not ns:
|
|
2538
|
+
return {"supported": False, "error": "namespace required"}
|
|
2539
|
+
if not self.supports_prompt_cache():
|
|
2540
|
+
return {"supported": False, "error": "provider does not support prompt caching"}
|
|
2541
|
+
|
|
2542
|
+
normalized_modules: List[PromptCacheModule] = []
|
|
2543
|
+
for m in modules or []:
|
|
2544
|
+
if isinstance(m, PromptCacheModule):
|
|
2545
|
+
normalized_modules.append(m.normalized())
|
|
2546
|
+
elif isinstance(m, dict):
|
|
2547
|
+
try:
|
|
2548
|
+
normalized_modules.append(PromptCacheModule(**m).normalized())
|
|
2549
|
+
except Exception:
|
|
2550
|
+
continue
|
|
2551
|
+
|
|
2552
|
+
if not normalized_modules:
|
|
2553
|
+
return {"supported": False, "error": "no modules provided"}
|
|
2554
|
+
|
|
2555
|
+
# Derive deterministic prefix keys per module boundary.
|
|
2556
|
+
prefix_hash = hashlib.sha256(f"acore-prompt-cache:{int(version)}".encode("utf-8")).hexdigest()
|
|
2557
|
+
derived: List[Dict[str, Any]] = []
|
|
2558
|
+
keys: List[str] = []
|
|
2559
|
+
for mod in normalized_modules:
|
|
2560
|
+
prefix_hash = hashlib.sha256((prefix_hash + mod.fingerprint(version=version)).encode("utf-8")).hexdigest()
|
|
2561
|
+
key = f"{ns}:{prefix_hash[:16]}"
|
|
2562
|
+
keys.append(key)
|
|
2563
|
+
derived.append({"module_id": mod.module_id, "cache_key": key, "module_hash": mod.fingerprint(version=version)})
|
|
2564
|
+
|
|
2565
|
+
# Find the longest existing prefix cache.
|
|
2566
|
+
start_idx = -1
|
|
2567
|
+
for i, key in enumerate(keys):
|
|
2568
|
+
if self._prompt_cache_store.get(key) is None:
|
|
2569
|
+
break
|
|
2570
|
+
start_idx = i
|
|
2571
|
+
|
|
2572
|
+
# Start from existing prefix (clone to avoid mutating the stored snapshot).
|
|
2573
|
+
current_cache: Optional[Any] = None
|
|
2574
|
+
if start_idx >= 0:
|
|
2575
|
+
existing = self._prompt_cache_store.get(keys[start_idx])
|
|
2576
|
+
if existing is not None:
|
|
2577
|
+
current_cache = self._prompt_cache_backend_clone(existing) or None
|
|
2578
|
+
|
|
2579
|
+
# If we have no starting cache, start from empty backend cache.
|
|
2580
|
+
if current_cache is None:
|
|
2581
|
+
current_cache = self._prompt_cache_backend_create()
|
|
2582
|
+
if current_cache is None:
|
|
2583
|
+
return {"supported": False, "error": "provider does not implement in-process cache backend"}
|
|
2584
|
+
|
|
2585
|
+
# Build missing caches.
|
|
2586
|
+
for j in range(start_idx + 1, len(keys)):
|
|
2587
|
+
mod = normalized_modules[j]
|
|
2588
|
+
ok = self._prompt_cache_backend_append(
|
|
2589
|
+
current_cache,
|
|
2590
|
+
prompt=str(mod.prompt or ""),
|
|
2591
|
+
messages=mod.messages,
|
|
2592
|
+
system_prompt=mod.system_prompt,
|
|
2593
|
+
tools=mod.tools,
|
|
2594
|
+
add_generation_prompt=bool(mod.add_generation_prompt),
|
|
2595
|
+
)
|
|
2596
|
+
if not ok:
|
|
2597
|
+
return {"supported": False, "error": f"failed to append module '{mod.module_id}'"}
|
|
2598
|
+
|
|
2599
|
+
snapshot = self._prompt_cache_backend_clone(current_cache) or None
|
|
2600
|
+
if snapshot is None:
|
|
2601
|
+
return {"supported": False, "error": "provider does not support cache cloning"}
|
|
2602
|
+
|
|
2603
|
+
meta = {
|
|
2604
|
+
"namespace": ns,
|
|
2605
|
+
"module_id": mod.module_id,
|
|
2606
|
+
"module_hash": mod.fingerprint(version=version),
|
|
2607
|
+
"index": j,
|
|
2608
|
+
"backend": "provider",
|
|
2609
|
+
}
|
|
2610
|
+
tok = self._prompt_cache_backend_token_count(snapshot)
|
|
2611
|
+
if isinstance(tok, int) and tok >= 0:
|
|
2612
|
+
meta["token_count"] = tok
|
|
2613
|
+
|
|
2614
|
+
self._prompt_cache_store.set(keys[j], snapshot, ttl_s=ttl_s, meta=meta)
|
|
2615
|
+
|
|
2616
|
+
if make_default:
|
|
2617
|
+
self._default_prompt_cache_key = keys[-1]
|
|
2618
|
+
|
|
2619
|
+
return {
|
|
2620
|
+
"supported": True,
|
|
2621
|
+
"namespace": ns,
|
|
2622
|
+
"version": int(version),
|
|
2623
|
+
"modules": derived,
|
|
2624
|
+
"final_cache_key": keys[-1],
|
|
2625
|
+
}
|
|
2626
|
+
|
|
2627
|
+
def prompt_cache_clear(self, key: Optional[str] = None) -> bool:
|
|
2628
|
+
"""Clear prompt caches for this provider instance (best-effort)."""
|
|
2629
|
+
normalized = self._normalize_prompt_cache_key(key) if key is not None else None
|
|
2630
|
+
if not self.supports_prompt_cache():
|
|
2631
|
+
return False
|
|
2632
|
+
|
|
2633
|
+
if normalized is None:
|
|
2634
|
+
self._default_prompt_cache_key = None
|
|
2635
|
+
self._prompt_cache_store.clear()
|
|
2636
|
+
return True
|
|
2637
|
+
|
|
2638
|
+
cleared = self._prompt_cache_store.delete(normalized)
|
|
2639
|
+
if self._default_prompt_cache_key == normalized:
|
|
2640
|
+
self._default_prompt_cache_key = None
|
|
2641
|
+
return cleared
|
|
2642
|
+
|
|
2643
|
+
# Memory management methods
|
|
2644
|
+
@abstractmethod
|
|
2645
|
+
def unload_model(self, model_name: str) -> None:
|
|
2646
|
+
"""
|
|
2647
|
+
Unload/cleanup resources for a specific model.
|
|
2648
|
+
|
|
2649
|
+
This is the single canonical unload entrypoint across providers.
|
|
2650
|
+
Providers must implement this as a best-effort cleanup hook:
|
|
2651
|
+
|
|
2652
|
+
- In-process providers (e.g. MLX, HuggingFace): free local model resources.
|
|
2653
|
+
- Some self-hosted servers (e.g. Ollama): may request server-side eviction/unload.
|
|
2654
|
+
- OpenAI-compatible servers (e.g. LMStudio, vLLM, openai-compatible): typically only close client
|
|
2655
|
+
connections; server-side model unloading may not be available and is controlled by the server (TTL/eviction).
|
|
2656
|
+
- Cloud APIs (e.g. OpenAI, Anthropic): usually a no-op (safe to call).
|
|
1194
2657
|
"""
|
|
1195
|
-
# Default implementation does nothing (suitable for API providers)
|
|
1196
|
-
pass
|
|
1197
2658
|
|
|
1198
2659
|
# Token configuration helpers - expose interface methods for user convenience
|
|
1199
2660
|
def get_token_configuration_summary(self) -> str:
|
|
@@ -1202,7 +2663,19 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
1202
2663
|
|
|
1203
2664
|
def validate_token_constraints(self) -> List[str]:
|
|
1204
2665
|
"""Validate token configuration and return warnings/suggestions"""
|
|
1205
|
-
|
|
2666
|
+
warnings_list = super().validate_token_constraints()
|
|
2667
|
+
|
|
2668
|
+
# Embedding models are not text-generative: output token limits are irrelevant and can
|
|
2669
|
+
# legitimately be 0 (e.g. Nomic Embed). Suppress misleading output-token warnings.
|
|
2670
|
+
try:
|
|
2671
|
+
caps = getattr(self, "model_capabilities", None)
|
|
2672
|
+
model_type = caps.get("model_type") if isinstance(caps, dict) else None
|
|
2673
|
+
if isinstance(model_type, str) and model_type.strip().lower() == "embedding":
|
|
2674
|
+
warnings_list = [w for w in warnings_list if "max_output_tokens" not in str(w)]
|
|
2675
|
+
except Exception:
|
|
2676
|
+
pass
|
|
2677
|
+
|
|
2678
|
+
return warnings_list
|
|
1206
2679
|
|
|
1207
2680
|
def calculate_token_budget(self, input_text: str, desired_output_tokens: int,
|
|
1208
2681
|
safety_margin: float = 0.1) -> tuple[int, List[str]]:
|
|
@@ -1239,7 +2712,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
1239
2712
|
except ImportError as e:
|
|
1240
2713
|
raise ImportError(
|
|
1241
2714
|
f"Media processing requires additional dependencies. "
|
|
1242
|
-
f"Install with: pip install abstractcore[media]. Error: {e}"
|
|
2715
|
+
f"Install with: pip install \"abstractcore[media]\". Error: {e}"
|
|
1243
2716
|
)
|
|
1244
2717
|
|
|
1245
2718
|
processed_media = []
|
|
@@ -1506,45 +2979,6 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
1506
2979
|
# Return original response if rewriting fails
|
|
1507
2980
|
return response
|
|
1508
2981
|
|
|
1509
|
-
def _strip_output_wrappers(self, content: str) -> str:
|
|
1510
|
-
"""Strip known model-specific wrapper tokens around assistant output.
|
|
1511
|
-
|
|
1512
|
-
Some model/server combinations emit wrapper tokens like:
|
|
1513
|
-
<|begin_of_box|> ... <|end_of_box|>
|
|
1514
|
-
We remove these only when they appear as leading/trailing wrappers (not when
|
|
1515
|
-
embedded mid-text).
|
|
1516
|
-
"""
|
|
1517
|
-
if not isinstance(content, str) or not content:
|
|
1518
|
-
return content
|
|
1519
|
-
|
|
1520
|
-
wrappers: Dict[str, str] = {}
|
|
1521
|
-
for src in (self.architecture_config, self.model_capabilities):
|
|
1522
|
-
if not isinstance(src, dict):
|
|
1523
|
-
continue
|
|
1524
|
-
w = src.get("output_wrappers")
|
|
1525
|
-
if not isinstance(w, dict):
|
|
1526
|
-
continue
|
|
1527
|
-
start = w.get("start")
|
|
1528
|
-
end = w.get("end")
|
|
1529
|
-
if isinstance(start, str) and start.strip():
|
|
1530
|
-
wrappers.setdefault("start", start.strip())
|
|
1531
|
-
if isinstance(end, str) and end.strip():
|
|
1532
|
-
wrappers.setdefault("end", end.strip())
|
|
1533
|
-
|
|
1534
|
-
if not wrappers:
|
|
1535
|
-
return content
|
|
1536
|
-
|
|
1537
|
-
out = content
|
|
1538
|
-
start_token = wrappers.get("start")
|
|
1539
|
-
end_token = wrappers.get("end")
|
|
1540
|
-
|
|
1541
|
-
if isinstance(start_token, str) and start_token:
|
|
1542
|
-
out = re.sub(r"^\s*" + re.escape(start_token) + r"\s*", "", out, count=1)
|
|
1543
|
-
if isinstance(end_token, str) and end_token:
|
|
1544
|
-
out = re.sub(r"\s*" + re.escape(end_token) + r"\s*$", "", out, count=1)
|
|
1545
|
-
|
|
1546
|
-
return out
|
|
1547
|
-
|
|
1548
2982
|
def _normalize_tool_calls_passthrough(
|
|
1549
2983
|
self,
|
|
1550
2984
|
*,
|
|
@@ -2014,6 +3448,7 @@ Please provide a structured response."""
|
|
|
2014
3448
|
Returns:
|
|
2015
3449
|
GenerateResponse, AsyncIterator[GenerateResponse] for streaming, or BaseModel for structured output
|
|
2016
3450
|
"""
|
|
3451
|
+
self._apply_default_prompt_cache_key(kwargs)
|
|
2017
3452
|
response = await self._agenerate_internal(
|
|
2018
3453
|
prompt, messages, system_prompt, tools, media, stream, **kwargs
|
|
2019
3454
|
)
|