abstractcore 2.6.9__py3-none-any.whl → 2.9.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- abstractcore/apps/summarizer.py +69 -27
- abstractcore/architectures/detection.py +190 -25
- abstractcore/assets/architecture_formats.json +129 -6
- abstractcore/assets/model_capabilities.json +803 -141
- abstractcore/config/main.py +2 -2
- abstractcore/config/manager.py +3 -1
- abstractcore/events/__init__.py +7 -1
- abstractcore/mcp/__init__.py +30 -0
- abstractcore/mcp/client.py +213 -0
- abstractcore/mcp/factory.py +64 -0
- abstractcore/mcp/naming.py +28 -0
- abstractcore/mcp/stdio_client.py +336 -0
- abstractcore/mcp/tool_source.py +164 -0
- abstractcore/processing/__init__.py +2 -2
- abstractcore/processing/basic_deepsearch.py +1 -1
- abstractcore/processing/basic_summarizer.py +379 -93
- abstractcore/providers/anthropic_provider.py +91 -10
- abstractcore/providers/base.py +540 -16
- abstractcore/providers/huggingface_provider.py +17 -8
- abstractcore/providers/lmstudio_provider.py +170 -25
- abstractcore/providers/mlx_provider.py +13 -10
- abstractcore/providers/ollama_provider.py +42 -26
- abstractcore/providers/openai_compatible_provider.py +87 -22
- abstractcore/providers/openai_provider.py +12 -9
- abstractcore/providers/streaming.py +201 -39
- abstractcore/providers/vllm_provider.py +78 -21
- abstractcore/server/app.py +116 -30
- abstractcore/structured/retry.py +20 -7
- abstractcore/tools/__init__.py +46 -24
- abstractcore/tools/abstractignore.py +166 -0
- abstractcore/tools/arg_canonicalizer.py +61 -0
- abstractcore/tools/common_tools.py +2443 -742
- abstractcore/tools/core.py +109 -13
- abstractcore/tools/handler.py +17 -3
- abstractcore/tools/parser.py +894 -159
- abstractcore/tools/registry.py +122 -18
- abstractcore/tools/syntax_rewriter.py +68 -6
- abstractcore/tools/tag_rewriter.py +186 -1
- abstractcore/utils/jsonish.py +111 -0
- abstractcore/utils/version.py +1 -1
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/METADATA +56 -2
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/RECORD +46 -37
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/WHEEL +0 -0
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/entry_points.txt +0 -0
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/licenses/LICENSE +0 -0
- {abstractcore-2.6.9.dist-info → abstractcore-2.9.1.dist-info}/top_level.txt +0 -0
abstractcore/providers/base.py
CHANGED
|
@@ -5,8 +5,12 @@ Base provider with integrated telemetry, events, and exception handling.
|
|
|
5
5
|
import time
|
|
6
6
|
import uuid
|
|
7
7
|
import asyncio
|
|
8
|
+
import warnings
|
|
9
|
+
import json
|
|
10
|
+
import re
|
|
11
|
+
import socket
|
|
8
12
|
from collections import deque
|
|
9
|
-
from typing import List, Dict, Any, Optional, Union, Iterator, AsyncIterator, Type
|
|
13
|
+
from typing import List, Dict, Any, Optional, Union, Iterator, AsyncIterator, Type, TYPE_CHECKING
|
|
10
14
|
from abc import ABC, abstractmethod
|
|
11
15
|
|
|
12
16
|
try:
|
|
@@ -21,6 +25,7 @@ from ..core.types import GenerateResponse
|
|
|
21
25
|
from ..events import EventType, Event
|
|
22
26
|
from datetime import datetime
|
|
23
27
|
from ..utils.structured_logging import get_logger
|
|
28
|
+
from ..utils.jsonish import loads_dict_like
|
|
24
29
|
from ..exceptions import (
|
|
25
30
|
ProviderAPIError,
|
|
26
31
|
AuthenticationError,
|
|
@@ -32,6 +37,10 @@ from ..architectures import detect_architecture, get_architecture_format, get_mo
|
|
|
32
37
|
from ..tools import execute_tools
|
|
33
38
|
from ..core.retry import RetryManager, RetryConfig
|
|
34
39
|
|
|
40
|
+
if TYPE_CHECKING: # pragma: no cover
|
|
41
|
+
# Imported for type checking only to avoid hard dependencies in minimal installs.
|
|
42
|
+
from ..media.types import MediaContent
|
|
43
|
+
|
|
35
44
|
|
|
36
45
|
class BaseProvider(AbstractCoreInterface, ABC):
|
|
37
46
|
"""
|
|
@@ -51,15 +60,64 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
51
60
|
self.architecture_config = get_architecture_format(self.architecture)
|
|
52
61
|
self.model_capabilities = get_model_capabilities(model)
|
|
53
62
|
|
|
54
|
-
# Setup timeout configuration
|
|
55
|
-
#
|
|
56
|
-
|
|
57
|
-
|
|
63
|
+
# Setup timeout configuration (centralized defaults).
|
|
64
|
+
#
|
|
65
|
+
# Semantics:
|
|
66
|
+
# - If the caller passes `timeout=...`, we respect it (including `None` for unlimited).
|
|
67
|
+
# - If the caller omits `timeout`, we use AbstractCore's global config default.
|
|
68
|
+
# - Same logic for `tool_timeout`.
|
|
69
|
+
timeout_provided = "timeout" in kwargs
|
|
70
|
+
tool_timeout_provided = "tool_timeout" in kwargs
|
|
71
|
+
|
|
72
|
+
timeout_value = kwargs.get("timeout", None) if timeout_provided else None
|
|
73
|
+
tool_timeout_value = kwargs.get("tool_timeout", None) if tool_timeout_provided else None
|
|
74
|
+
|
|
75
|
+
if not timeout_provided or not tool_timeout_provided:
|
|
76
|
+
try:
|
|
77
|
+
from ..config.manager import get_config_manager
|
|
78
|
+
|
|
79
|
+
cfg = get_config_manager()
|
|
80
|
+
except Exception:
|
|
81
|
+
cfg = None
|
|
82
|
+
|
|
83
|
+
if not timeout_provided:
|
|
84
|
+
try:
|
|
85
|
+
timeout_value = float(cfg.get_default_timeout()) if cfg is not None else None
|
|
86
|
+
except Exception:
|
|
87
|
+
timeout_value = None
|
|
88
|
+
|
|
89
|
+
if not tool_timeout_provided:
|
|
90
|
+
try:
|
|
91
|
+
tool_timeout_value = float(cfg.get_tool_timeout()) if cfg is not None else None
|
|
92
|
+
except Exception:
|
|
93
|
+
tool_timeout_value = None
|
|
94
|
+
|
|
95
|
+
# Validate timeouts: non-positive numbers become "unlimited" (None).
|
|
96
|
+
try:
|
|
97
|
+
if isinstance(timeout_value, (int, float)) and float(timeout_value) <= 0:
|
|
98
|
+
timeout_value = None
|
|
99
|
+
except Exception:
|
|
100
|
+
pass
|
|
101
|
+
try:
|
|
102
|
+
if isinstance(tool_timeout_value, (int, float)) and float(tool_timeout_value) <= 0:
|
|
103
|
+
tool_timeout_value = None
|
|
104
|
+
except Exception:
|
|
105
|
+
pass
|
|
106
|
+
|
|
107
|
+
self._timeout = timeout_value # None = unlimited HTTP requests
|
|
108
|
+
self._tool_timeout = tool_timeout_value # None = unlimited tool execution
|
|
58
109
|
|
|
59
110
|
# Setup tool execution mode
|
|
60
111
|
# execute_tools: True = AbstractCore executes tools (legacy mode)
|
|
61
112
|
# False = Pass-through mode (default - for API server / agentic CLI)
|
|
62
113
|
self.execute_tools = kwargs.get('execute_tools', False)
|
|
114
|
+
if self.execute_tools:
|
|
115
|
+
warnings.warn(
|
|
116
|
+
"execute_tools=True is deprecated. Prefer passing tools explicitly to generate() "
|
|
117
|
+
"and executing tool calls in the host/runtime via a ToolExecutor.",
|
|
118
|
+
DeprecationWarning,
|
|
119
|
+
stacklevel=2,
|
|
120
|
+
)
|
|
63
121
|
|
|
64
122
|
# Setup retry manager with optional configuration
|
|
65
123
|
retry_config = kwargs.get('retry_config', None)
|
|
@@ -202,6 +260,12 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
202
260
|
"""
|
|
203
261
|
trace_id = str(uuid.uuid4())
|
|
204
262
|
|
|
263
|
+
# If trace retention is disabled, still return a trace_id for correlation
|
|
264
|
+
# without constructing/storing a full trace payload.
|
|
265
|
+
maxlen = getattr(getattr(self, "_traces", None), "maxlen", None)
|
|
266
|
+
if maxlen == 0:
|
|
267
|
+
return trace_id
|
|
268
|
+
|
|
205
269
|
# Extract generation parameters
|
|
206
270
|
temperature = kwargs.get('temperature', self.temperature)
|
|
207
271
|
max_tokens = kwargs.get('max_tokens', self.max_tokens)
|
|
@@ -285,11 +349,64 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
285
349
|
Returns:
|
|
286
350
|
Custom exception
|
|
287
351
|
"""
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
352
|
+
def _provider_label() -> str:
|
|
353
|
+
raw = getattr(self, "provider", None)
|
|
354
|
+
if isinstance(raw, str) and raw.strip():
|
|
355
|
+
return raw.strip()
|
|
356
|
+
name = self.__class__.__name__
|
|
357
|
+
return name[:-8] if name.endswith("Provider") else name
|
|
358
|
+
|
|
359
|
+
def _configured_timeout_s() -> Optional[float]:
|
|
360
|
+
v = getattr(self, "_timeout", None)
|
|
361
|
+
if v is None:
|
|
362
|
+
return None
|
|
363
|
+
try:
|
|
364
|
+
f = float(v)
|
|
365
|
+
except Exception:
|
|
366
|
+
return None
|
|
367
|
+
return f if f > 0 else None
|
|
368
|
+
|
|
369
|
+
def _looks_like_timeout(exc: Exception) -> bool:
|
|
370
|
+
# Type-based (preferred)
|
|
371
|
+
if isinstance(exc, (TimeoutError, asyncio.TimeoutError, socket.timeout)):
|
|
372
|
+
return True
|
|
373
|
+
cls = exc.__class__
|
|
374
|
+
name = (getattr(cls, "__name__", "") or "").lower()
|
|
375
|
+
mod = (getattr(cls, "__module__", "") or "").lower()
|
|
376
|
+
if "timeout" in name:
|
|
377
|
+
return True
|
|
378
|
+
if mod.startswith(("httpx", "requests", "aiohttp")) and ("timeout" in name):
|
|
379
|
+
return True
|
|
380
|
+
|
|
381
|
+
# String-based fallback (covers wrapped SDK exceptions)
|
|
382
|
+
msg = str(exc or "").lower()
|
|
383
|
+
return ("timed out" in msg) or ("timeout" in msg) or ("time out" in msg)
|
|
384
|
+
|
|
385
|
+
def _has_explicit_duration(msg: str) -> bool:
|
|
386
|
+
# e.g. "... after 300s" or "... after 300.0s"
|
|
387
|
+
return bool(re.search(r"\bafter\s+\d+(?:\.\d+)?\s*s\b", msg))
|
|
388
|
+
|
|
389
|
+
# Preserve typed custom exceptions, but allow ProviderAPIError timeout messages
|
|
390
|
+
# to be normalized centrally (avoid per-provider inconsistencies).
|
|
391
|
+
if isinstance(error, ProviderAPIError):
|
|
392
|
+
msg = str(error)
|
|
393
|
+
if _looks_like_timeout(error) and not _has_explicit_duration(msg):
|
|
394
|
+
t = _configured_timeout_s()
|
|
395
|
+
if t is not None:
|
|
396
|
+
return ProviderAPIError(f"{_provider_label()} API error: timed out after {t}s")
|
|
397
|
+
return ProviderAPIError(f"{_provider_label()} API error: timed out")
|
|
291
398
|
return error
|
|
292
399
|
|
|
400
|
+
if isinstance(error, (ModelNotFoundError, AuthenticationError, RateLimitError, InvalidRequestError)):
|
|
401
|
+
return error
|
|
402
|
+
|
|
403
|
+
# Central timeout normalization for all providers (httpx/requests/SDKs).
|
|
404
|
+
if _looks_like_timeout(error):
|
|
405
|
+
t = _configured_timeout_s()
|
|
406
|
+
if t is not None:
|
|
407
|
+
return ProviderAPIError(f"{_provider_label()} API error: timed out after {t}s")
|
|
408
|
+
return ProviderAPIError(f"{_provider_label()} API error: timed out")
|
|
409
|
+
|
|
293
410
|
error_str = str(error).lower()
|
|
294
411
|
|
|
295
412
|
if "rate" in error_str and "limit" in error_str:
|
|
@@ -331,6 +448,16 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
331
448
|
execute_tools: Whether to execute tools automatically (True) or let agent handle execution (False)
|
|
332
449
|
glyph_compression: Glyph compression preference ("auto", "always", "never")
|
|
333
450
|
"""
|
|
451
|
+
# Normalize token limit naming at the provider boundary.
|
|
452
|
+
#
|
|
453
|
+
# - OpenAI-style APIs use `max_tokens` for the output-token cap.
|
|
454
|
+
# - AbstractCore's unified internal name is `max_output_tokens`.
|
|
455
|
+
#
|
|
456
|
+
# AbstractRuntime (and some hosts) may still emit `max_tokens` in effect payloads.
|
|
457
|
+
# That translation is a provider integration concern, so keep it in AbstractCore.
|
|
458
|
+
if "max_output_tokens" not in kwargs and "max_tokens" in kwargs and kwargs.get("max_tokens") is not None:
|
|
459
|
+
kwargs["max_output_tokens"] = kwargs.pop("max_tokens")
|
|
460
|
+
|
|
334
461
|
# Handle structured output request
|
|
335
462
|
if response_model is not None:
|
|
336
463
|
if not PYDANTIC_AVAILABLE:
|
|
@@ -408,6 +535,13 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
408
535
|
|
|
409
536
|
# Handle tool execution control
|
|
410
537
|
should_execute_tools = execute_tools if execute_tools is not None else self.execute_tools
|
|
538
|
+
if should_execute_tools and converted_tools:
|
|
539
|
+
warnings.warn(
|
|
540
|
+
"execute_tools=True is deprecated. Prefer passing tools explicitly to generate() "
|
|
541
|
+
"and executing tool calls in the host/runtime via a ToolExecutor.",
|
|
542
|
+
DeprecationWarning,
|
|
543
|
+
stacklevel=2,
|
|
544
|
+
)
|
|
411
545
|
if not should_execute_tools and converted_tools:
|
|
412
546
|
# If tools are provided but execution is disabled,
|
|
413
547
|
# we still pass them to the provider for generation but won't execute them
|
|
@@ -416,6 +550,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
416
550
|
# Define generation function for retry wrapper
|
|
417
551
|
def _execute_generation():
|
|
418
552
|
start_time = time.time()
|
|
553
|
+
start_perf = time.perf_counter()
|
|
419
554
|
|
|
420
555
|
# Emit generation started event (covers request received)
|
|
421
556
|
event_data = {
|
|
@@ -443,7 +578,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
443
578
|
**kwargs
|
|
444
579
|
)
|
|
445
580
|
|
|
446
|
-
return response, start_time
|
|
581
|
+
return response, start_time, start_perf
|
|
447
582
|
|
|
448
583
|
except Exception as e:
|
|
449
584
|
# Convert to custom exception and re-raise for retry handling
|
|
@@ -452,7 +587,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
452
587
|
|
|
453
588
|
# Execute with retry
|
|
454
589
|
try:
|
|
455
|
-
response, start_time = self.retry_manager.execute_with_retry(
|
|
590
|
+
response, start_time, start_perf = self.retry_manager.execute_with_retry(
|
|
456
591
|
_execute_generation,
|
|
457
592
|
provider_key=self.provider_key
|
|
458
593
|
)
|
|
@@ -475,7 +610,22 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
475
610
|
)
|
|
476
611
|
|
|
477
612
|
# Process stream with incremental tool detection and execution
|
|
613
|
+
ttft_ms: Optional[float] = None
|
|
478
614
|
for processed_chunk in processor.process_stream(response, converted_tools):
|
|
615
|
+
if isinstance(processed_chunk.content, str) and processed_chunk.content:
|
|
616
|
+
processed_chunk.content = self._strip_output_wrappers(processed_chunk.content)
|
|
617
|
+
if ttft_ms is None:
|
|
618
|
+
has_content = isinstance(processed_chunk.content, str) and bool(processed_chunk.content)
|
|
619
|
+
has_tools = isinstance(processed_chunk.tool_calls, list) and bool(processed_chunk.tool_calls)
|
|
620
|
+
if has_content or has_tools:
|
|
621
|
+
ttft_ms = round((time.perf_counter() - start_perf) * 1000, 1)
|
|
622
|
+
meta = processed_chunk.metadata if isinstance(processed_chunk.metadata, dict) else {}
|
|
623
|
+
timing = meta.get("_timing") if isinstance(meta.get("_timing"), dict) else {}
|
|
624
|
+
merged = dict(timing)
|
|
625
|
+
merged.setdefault("source", "client_wall")
|
|
626
|
+
merged["ttft_ms"] = ttft_ms
|
|
627
|
+
meta["_timing"] = merged
|
|
628
|
+
processed_chunk.metadata = meta
|
|
479
629
|
yield processed_chunk
|
|
480
630
|
|
|
481
631
|
# Track generation after streaming completes
|
|
@@ -488,10 +638,22 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
488
638
|
|
|
489
639
|
return unified_stream()
|
|
490
640
|
else:
|
|
491
|
-
# Non-streaming:
|
|
492
|
-
if response and
|
|
493
|
-
|
|
494
|
-
|
|
641
|
+
# Non-streaming: normalize tool calls into structured form.
|
|
642
|
+
if response and converted_tools:
|
|
643
|
+
response = self._normalize_tool_calls_passthrough(
|
|
644
|
+
response=response,
|
|
645
|
+
tools=converted_tools,
|
|
646
|
+
tool_call_tags=tool_call_tags,
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
# Optional: rewrite tool-call tags in content for downstream clients that parse tags.
|
|
650
|
+
# Note: when tool_call_tags is None (default), we return cleaned content.
|
|
651
|
+
if tool_call_tags and response.content and not self._should_clean_tool_call_markup(tool_call_tags):
|
|
652
|
+
response = self._apply_non_streaming_tag_rewriting(response, tool_call_tags)
|
|
653
|
+
|
|
654
|
+
# Strip model-specific output wrappers (e.g. GLM <|begin_of_box|>…<|end_of_box|>).
|
|
655
|
+
if response and isinstance(response.content, str) and response.content:
|
|
656
|
+
response.content = self._strip_output_wrappers(response.content)
|
|
495
657
|
|
|
496
658
|
# Add visual token calculation if media metadata is available
|
|
497
659
|
if media_metadata and response:
|
|
@@ -796,10 +958,26 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
796
958
|
|
|
797
959
|
# Override max_output_tokens if provided in kwargs
|
|
798
960
|
effective_max_output = kwargs.get("max_output_tokens", max_output_tokens)
|
|
961
|
+
# Safety clamp: never exceed the provider/model's configured max_output_tokens.
|
|
962
|
+
#
|
|
963
|
+
# Upstream callers (runtimes/agents) may request large output budgets based on
|
|
964
|
+
# stale capabilities or user configuration. Providers should not forward values
|
|
965
|
+
# that violate the model's hard limits (Anthropic returns 400 for this).
|
|
966
|
+
try:
|
|
967
|
+
if effective_max_output is None:
|
|
968
|
+
effective_max_output_i = int(max_output_tokens)
|
|
969
|
+
else:
|
|
970
|
+
effective_max_output_i = int(effective_max_output)
|
|
971
|
+
except Exception:
|
|
972
|
+
effective_max_output_i = int(max_output_tokens)
|
|
973
|
+
if effective_max_output_i <= 0:
|
|
974
|
+
effective_max_output_i = int(max_output_tokens)
|
|
975
|
+
if effective_max_output_i > int(max_output_tokens):
|
|
976
|
+
effective_max_output_i = int(max_output_tokens)
|
|
799
977
|
|
|
800
978
|
# Return base kwargs with unified parameter
|
|
801
979
|
result_kwargs = kwargs.copy()
|
|
802
|
-
result_kwargs["max_output_tokens"] =
|
|
980
|
+
result_kwargs["max_output_tokens"] = effective_max_output_i
|
|
803
981
|
|
|
804
982
|
# Add unified generation parameters with fallback hierarchy: kwargs → instance → defaults
|
|
805
983
|
result_kwargs["temperature"] = result_kwargs.get("temperature", self.temperature)
|
|
@@ -1328,6 +1506,352 @@ class BaseProvider(AbstractCoreInterface, ABC):
|
|
|
1328
1506
|
# Return original response if rewriting fails
|
|
1329
1507
|
return response
|
|
1330
1508
|
|
|
1509
|
+
def _strip_output_wrappers(self, content: str) -> str:
|
|
1510
|
+
"""Strip known model-specific wrapper tokens around assistant output.
|
|
1511
|
+
|
|
1512
|
+
Some model/server combinations emit wrapper tokens like:
|
|
1513
|
+
<|begin_of_box|> ... <|end_of_box|>
|
|
1514
|
+
We remove these only when they appear as leading/trailing wrappers (not when
|
|
1515
|
+
embedded mid-text).
|
|
1516
|
+
"""
|
|
1517
|
+
if not isinstance(content, str) or not content:
|
|
1518
|
+
return content
|
|
1519
|
+
|
|
1520
|
+
wrappers: Dict[str, str] = {}
|
|
1521
|
+
for src in (self.architecture_config, self.model_capabilities):
|
|
1522
|
+
if not isinstance(src, dict):
|
|
1523
|
+
continue
|
|
1524
|
+
w = src.get("output_wrappers")
|
|
1525
|
+
if not isinstance(w, dict):
|
|
1526
|
+
continue
|
|
1527
|
+
start = w.get("start")
|
|
1528
|
+
end = w.get("end")
|
|
1529
|
+
if isinstance(start, str) and start.strip():
|
|
1530
|
+
wrappers.setdefault("start", start.strip())
|
|
1531
|
+
if isinstance(end, str) and end.strip():
|
|
1532
|
+
wrappers.setdefault("end", end.strip())
|
|
1533
|
+
|
|
1534
|
+
if not wrappers:
|
|
1535
|
+
return content
|
|
1536
|
+
|
|
1537
|
+
out = content
|
|
1538
|
+
start_token = wrappers.get("start")
|
|
1539
|
+
end_token = wrappers.get("end")
|
|
1540
|
+
|
|
1541
|
+
if isinstance(start_token, str) and start_token:
|
|
1542
|
+
out = re.sub(r"^\s*" + re.escape(start_token) + r"\s*", "", out, count=1)
|
|
1543
|
+
if isinstance(end_token, str) and end_token:
|
|
1544
|
+
out = re.sub(r"\s*" + re.escape(end_token) + r"\s*$", "", out, count=1)
|
|
1545
|
+
|
|
1546
|
+
return out
|
|
1547
|
+
|
|
1548
|
+
def _normalize_tool_calls_passthrough(
|
|
1549
|
+
self,
|
|
1550
|
+
*,
|
|
1551
|
+
response: GenerateResponse,
|
|
1552
|
+
tools: List[Dict[str, Any]],
|
|
1553
|
+
tool_call_tags: Optional[str] = None,
|
|
1554
|
+
) -> GenerateResponse:
|
|
1555
|
+
"""Populate `response.tool_calls` (and usually clean `response.content`) in passthrough mode.
|
|
1556
|
+
|
|
1557
|
+
Contract:
|
|
1558
|
+
- AbstractCore always returns structured `tool_calls` when tools are provided and the model emits tool syntax,
|
|
1559
|
+
even for prompted tool calling (tool calls embedded in `content`).
|
|
1560
|
+
- By default (`tool_call_tags is None`), tool-call markup is stripped from `content` for clean UX/history.
|
|
1561
|
+
- When `tool_call_tags` is set, we preserve `content` (for clients that parse tags) but still populate
|
|
1562
|
+
structured `tool_calls`.
|
|
1563
|
+
"""
|
|
1564
|
+
|
|
1565
|
+
# Only normalize when tools were actually provided.
|
|
1566
|
+
if not tools:
|
|
1567
|
+
return response
|
|
1568
|
+
|
|
1569
|
+
allowed_names = self._get_allowed_tool_names(tools)
|
|
1570
|
+
|
|
1571
|
+
# 1) If provider already returned tool_calls (native tools), normalize shape + args.
|
|
1572
|
+
normalized_existing = self._normalize_tool_calls_payload(
|
|
1573
|
+
response.tool_calls,
|
|
1574
|
+
allowed_tool_names=allowed_names,
|
|
1575
|
+
)
|
|
1576
|
+
if normalized_existing:
|
|
1577
|
+
response.tool_calls = normalized_existing
|
|
1578
|
+
|
|
1579
|
+
# Clean any echoed tool syntax from content unless the caller explicitly requested tag passthrough.
|
|
1580
|
+
if self._should_clean_tool_call_markup(tool_call_tags) and isinstance(response.content, str) and response.content.strip():
|
|
1581
|
+
cleaned = self._clean_content_using_tool_calls(response.content, normalized_existing)
|
|
1582
|
+
response.content = cleaned
|
|
1583
|
+
|
|
1584
|
+
return response
|
|
1585
|
+
|
|
1586
|
+
# 2) Prompted tools: parse tool calls embedded in content.
|
|
1587
|
+
content = response.content
|
|
1588
|
+
if not isinstance(content, str) or not content.strip():
|
|
1589
|
+
return response
|
|
1590
|
+
|
|
1591
|
+
tool_handler = getattr(self, "tool_handler", None)
|
|
1592
|
+
if tool_handler is None:
|
|
1593
|
+
return response
|
|
1594
|
+
|
|
1595
|
+
try:
|
|
1596
|
+
parsed = tool_handler.parse_response(content, mode="prompted")
|
|
1597
|
+
except Exception:
|
|
1598
|
+
return response
|
|
1599
|
+
|
|
1600
|
+
parsed_calls = getattr(parsed, "tool_calls", None)
|
|
1601
|
+
if not isinstance(parsed_calls, list) or not parsed_calls:
|
|
1602
|
+
return response
|
|
1603
|
+
|
|
1604
|
+
normalized_parsed = self._normalize_tool_calls_payload(
|
|
1605
|
+
parsed_calls,
|
|
1606
|
+
allowed_tool_names=allowed_names,
|
|
1607
|
+
)
|
|
1608
|
+
if normalized_parsed:
|
|
1609
|
+
response.tool_calls = normalized_parsed
|
|
1610
|
+
|
|
1611
|
+
# Always use the cleaned content from AbstractCore parsing when we are not explicitly preserving tags.
|
|
1612
|
+
if self._should_clean_tool_call_markup(tool_call_tags):
|
|
1613
|
+
cleaned_content = getattr(parsed, "content", None)
|
|
1614
|
+
if isinstance(cleaned_content, str):
|
|
1615
|
+
response.content = cleaned_content
|
|
1616
|
+
|
|
1617
|
+
return response
|
|
1618
|
+
|
|
1619
|
+
def _should_clean_tool_call_markup(self, tool_call_tags: Optional[str]) -> bool:
|
|
1620
|
+
"""Return True when we should strip tool-call markup from assistant content."""
|
|
1621
|
+
if tool_call_tags is None:
|
|
1622
|
+
return True
|
|
1623
|
+
# OpenAI/Codex formats carry tool calls in structured fields, not in content.
|
|
1624
|
+
value = str(tool_call_tags).strip().lower()
|
|
1625
|
+
return value in {"openai", "codex"}
|
|
1626
|
+
|
|
1627
|
+
def _get_allowed_tool_names(self, tools: List[Dict[str, Any]]) -> set[str]:
|
|
1628
|
+
"""Extract allowed tool names from provider-normalized tool definitions."""
|
|
1629
|
+
names: set[str] = set()
|
|
1630
|
+
for tool in tools or []:
|
|
1631
|
+
if not isinstance(tool, dict):
|
|
1632
|
+
continue
|
|
1633
|
+
name = tool.get("name")
|
|
1634
|
+
if isinstance(name, str) and name.strip():
|
|
1635
|
+
names.add(name.strip())
|
|
1636
|
+
continue
|
|
1637
|
+
func = tool.get("function") if isinstance(tool.get("function"), dict) else None
|
|
1638
|
+
fname = func.get("name") if isinstance(func, dict) else None
|
|
1639
|
+
if isinstance(fname, str) and fname.strip():
|
|
1640
|
+
names.add(fname.strip())
|
|
1641
|
+
return names
|
|
1642
|
+
|
|
1643
|
+
def _normalize_tool_calls_payload(
|
|
1644
|
+
self,
|
|
1645
|
+
tool_calls: Any,
|
|
1646
|
+
*,
|
|
1647
|
+
allowed_tool_names: Optional[set[str]] = None,
|
|
1648
|
+
) -> Optional[List[Dict[str, Any]]]:
|
|
1649
|
+
"""Normalize tool call shapes into a canonical dict form.
|
|
1650
|
+
|
|
1651
|
+
Canonical shape:
|
|
1652
|
+
{"name": str, "arguments": dict, "call_id": Optional[str]}
|
|
1653
|
+
"""
|
|
1654
|
+
if tool_calls is None or not isinstance(tool_calls, list):
|
|
1655
|
+
return None
|
|
1656
|
+
|
|
1657
|
+
def _unwrap_arguments(arguments: Any, *, expected_tool_name: Optional[str]) -> Any:
|
|
1658
|
+
"""Unwrap common wrapper payloads to get tool kwargs.
|
|
1659
|
+
|
|
1660
|
+
Some providers/models emit nested wrappers like:
|
|
1661
|
+
{"name":"tool","arguments":{...},"call_id": "..."}
|
|
1662
|
+
inside the tool call `arguments` field (or even multiple times).
|
|
1663
|
+
|
|
1664
|
+
We unwrap when the object looks like a wrapper (only wrapper keys) OR when
|
|
1665
|
+
it includes wrapper metadata fields (e.g. "name"/"call_id") and an inner
|
|
1666
|
+
"arguments" dict. When wrapper fields and tool kwargs are partially mixed,
|
|
1667
|
+
we merge the outer kwargs into the inner dict (inner takes precedence).
|
|
1668
|
+
"""
|
|
1669
|
+
if not isinstance(arguments, dict):
|
|
1670
|
+
return arguments
|
|
1671
|
+
|
|
1672
|
+
wrapper_keys = {"name", "arguments", "call_id", "id"}
|
|
1673
|
+
current = arguments
|
|
1674
|
+
for _ in range(4):
|
|
1675
|
+
if not isinstance(current, dict):
|
|
1676
|
+
break
|
|
1677
|
+
keys = set(current.keys())
|
|
1678
|
+
if "arguments" not in current:
|
|
1679
|
+
break
|
|
1680
|
+
inner = current.get("arguments")
|
|
1681
|
+
if isinstance(inner, dict) or isinstance(inner, str):
|
|
1682
|
+
inner_dict: Any = inner
|
|
1683
|
+
if isinstance(inner, str):
|
|
1684
|
+
parsed = loads_dict_like(inner)
|
|
1685
|
+
inner_dict = parsed if isinstance(parsed, dict) else None
|
|
1686
|
+
if not isinstance(inner_dict, dict):
|
|
1687
|
+
break
|
|
1688
|
+
|
|
1689
|
+
name_matches = False
|
|
1690
|
+
raw_name = current.get("name")
|
|
1691
|
+
if isinstance(raw_name, str) and expected_tool_name and raw_name.strip() == expected_tool_name:
|
|
1692
|
+
name_matches = True
|
|
1693
|
+
|
|
1694
|
+
wrapperish = keys.issubset(wrapper_keys) or name_matches or bool(keys & {"call_id", "id"})
|
|
1695
|
+
if not wrapperish:
|
|
1696
|
+
break
|
|
1697
|
+
|
|
1698
|
+
# Merge any outer kwargs that were accidentally placed alongside wrapper fields.
|
|
1699
|
+
extras = {k: v for k, v in current.items() if k not in wrapper_keys}
|
|
1700
|
+
if extras:
|
|
1701
|
+
merged = dict(inner_dict)
|
|
1702
|
+
for k, v in extras.items():
|
|
1703
|
+
merged.setdefault(k, v)
|
|
1704
|
+
current = merged
|
|
1705
|
+
else:
|
|
1706
|
+
current = inner_dict
|
|
1707
|
+
continue
|
|
1708
|
+
break
|
|
1709
|
+
|
|
1710
|
+
return current
|
|
1711
|
+
|
|
1712
|
+
def _map_wrapped_name_to_allowed(raw: str, allowed: set[str]) -> Optional[str]:
|
|
1713
|
+
"""Best-effort mapping when a provider returns a wrapped tool name.
|
|
1714
|
+
|
|
1715
|
+
Some OpenAI-compatible servers/models occasionally return tool names wrapped in
|
|
1716
|
+
extra tokens/text (e.g. "{function-name: write_file}"). If we can confidently
|
|
1717
|
+
detect an allowed tool name as a standalone token within the raw string, map it
|
|
1718
|
+
back to the exact allowed name so tool execution can proceed.
|
|
1719
|
+
"""
|
|
1720
|
+
s = str(raw or "").strip()
|
|
1721
|
+
if not s:
|
|
1722
|
+
return None
|
|
1723
|
+
if s in allowed:
|
|
1724
|
+
return s
|
|
1725
|
+
|
|
1726
|
+
try:
|
|
1727
|
+
import re
|
|
1728
|
+
|
|
1729
|
+
# Prefer exact token-boundary matches (tool names are usually snake_case).
|
|
1730
|
+
candidates: List[str] = []
|
|
1731
|
+
for name in allowed:
|
|
1732
|
+
if not isinstance(name, str) or not name:
|
|
1733
|
+
continue
|
|
1734
|
+
pat = r"(^|[^\w])" + re.escape(name) + r"([^\w]|$)"
|
|
1735
|
+
if re.search(pat, s):
|
|
1736
|
+
candidates.append(name)
|
|
1737
|
+
if candidates:
|
|
1738
|
+
# Prefer the most specific (longest) match deterministically.
|
|
1739
|
+
return max(candidates, key=lambda n: (len(n), n))
|
|
1740
|
+
except Exception:
|
|
1741
|
+
return None
|
|
1742
|
+
|
|
1743
|
+
return None
|
|
1744
|
+
|
|
1745
|
+
normalized: List[Dict[str, Any]] = []
|
|
1746
|
+
|
|
1747
|
+
for tc in tool_calls:
|
|
1748
|
+
name: Optional[str] = None
|
|
1749
|
+
arguments: Any = None
|
|
1750
|
+
call_id: Any = None
|
|
1751
|
+
|
|
1752
|
+
if isinstance(tc, dict):
|
|
1753
|
+
call_id = tc.get("call_id", None)
|
|
1754
|
+
if call_id is None:
|
|
1755
|
+
call_id = tc.get("id", None)
|
|
1756
|
+
|
|
1757
|
+
raw_name = tc.get("name")
|
|
1758
|
+
raw_args = tc.get("arguments")
|
|
1759
|
+
|
|
1760
|
+
func = tc.get("function") if isinstance(tc.get("function"), dict) else None
|
|
1761
|
+
if func and (not isinstance(raw_name, str) or not raw_name.strip()):
|
|
1762
|
+
raw_name = func.get("name")
|
|
1763
|
+
if func and raw_args is None:
|
|
1764
|
+
raw_args = func.get("arguments")
|
|
1765
|
+
|
|
1766
|
+
if isinstance(raw_name, str) and raw_name.strip():
|
|
1767
|
+
name = raw_name.strip()
|
|
1768
|
+
arguments = raw_args if raw_args is not None else {}
|
|
1769
|
+
else:
|
|
1770
|
+
raw_name = getattr(tc, "name", None)
|
|
1771
|
+
raw_args = getattr(tc, "arguments", None)
|
|
1772
|
+
call_id = getattr(tc, "call_id", None)
|
|
1773
|
+
if isinstance(raw_name, str) and raw_name.strip():
|
|
1774
|
+
name = raw_name.strip()
|
|
1775
|
+
arguments = raw_args if raw_args is not None else {}
|
|
1776
|
+
|
|
1777
|
+
if not isinstance(name, str) or not name:
|
|
1778
|
+
continue
|
|
1779
|
+
if isinstance(allowed_tool_names, set) and allowed_tool_names and name not in allowed_tool_names:
|
|
1780
|
+
mapped = _map_wrapped_name_to_allowed(name, allowed_tool_names)
|
|
1781
|
+
if not isinstance(mapped, str) or not mapped:
|
|
1782
|
+
continue
|
|
1783
|
+
name = mapped
|
|
1784
|
+
|
|
1785
|
+
if isinstance(arguments, str):
|
|
1786
|
+
parsed = loads_dict_like(arguments)
|
|
1787
|
+
arguments = parsed if isinstance(parsed, dict) else {}
|
|
1788
|
+
|
|
1789
|
+
# Recover tool kwargs from nested wrapper payloads when present.
|
|
1790
|
+
if isinstance(arguments, dict) and call_id is None:
|
|
1791
|
+
wrapper_id = arguments.get("call_id") or arguments.get("id")
|
|
1792
|
+
if isinstance(wrapper_id, str) and wrapper_id.strip():
|
|
1793
|
+
call_id = wrapper_id.strip()
|
|
1794
|
+
arguments = _unwrap_arguments(arguments, expected_tool_name=name)
|
|
1795
|
+
if not isinstance(arguments, dict):
|
|
1796
|
+
arguments = {}
|
|
1797
|
+
|
|
1798
|
+
try:
|
|
1799
|
+
from ..tools.arg_canonicalizer import canonicalize_tool_arguments
|
|
1800
|
+
|
|
1801
|
+
arguments = canonicalize_tool_arguments(name, arguments)
|
|
1802
|
+
except Exception:
|
|
1803
|
+
pass
|
|
1804
|
+
|
|
1805
|
+
normalized.append(
|
|
1806
|
+
{
|
|
1807
|
+
"name": name,
|
|
1808
|
+
"arguments": arguments,
|
|
1809
|
+
"call_id": str(call_id) if call_id is not None else None,
|
|
1810
|
+
}
|
|
1811
|
+
)
|
|
1812
|
+
|
|
1813
|
+
if not normalized:
|
|
1814
|
+
return None
|
|
1815
|
+
|
|
1816
|
+
# Defense-in-depth: remove accidental duplicates introduced by overlapping parsing paths.
|
|
1817
|
+
unique: List[Dict[str, Any]] = []
|
|
1818
|
+
seen: set[tuple[str, str]] = set()
|
|
1819
|
+
for tc in normalized:
|
|
1820
|
+
try:
|
|
1821
|
+
args_key = json.dumps(tc.get("arguments", {}), sort_keys=True, ensure_ascii=False)
|
|
1822
|
+
except Exception:
|
|
1823
|
+
args_key = str(tc.get("arguments", {}))
|
|
1824
|
+
key = (str(tc.get("name") or ""), args_key)
|
|
1825
|
+
if key in seen:
|
|
1826
|
+
continue
|
|
1827
|
+
seen.add(key)
|
|
1828
|
+
unique.append(tc)
|
|
1829
|
+
|
|
1830
|
+
return unique or None
|
|
1831
|
+
|
|
1832
|
+
def _clean_content_using_tool_calls(self, content: str, tool_calls: List[Dict[str, Any]]) -> str:
|
|
1833
|
+
"""Strip tool-call markup from assistant content using known tool calls."""
|
|
1834
|
+
try:
|
|
1835
|
+
from ..tools.core import ToolCall as CoreToolCall
|
|
1836
|
+
from ..tools.parser import clean_tool_syntax
|
|
1837
|
+
|
|
1838
|
+
core_calls: List[CoreToolCall] = []
|
|
1839
|
+
for tc in tool_calls or []:
|
|
1840
|
+
if not isinstance(tc, dict):
|
|
1841
|
+
continue
|
|
1842
|
+
name = tc.get("name")
|
|
1843
|
+
if not isinstance(name, str) or not name.strip():
|
|
1844
|
+
continue
|
|
1845
|
+
args = tc.get("arguments")
|
|
1846
|
+
args_dict = dict(args) if isinstance(args, dict) else {}
|
|
1847
|
+
core_calls.append(CoreToolCall(name=name.strip(), arguments=args_dict, call_id=tc.get("call_id")))
|
|
1848
|
+
|
|
1849
|
+
if not core_calls:
|
|
1850
|
+
return content
|
|
1851
|
+
return clean_tool_syntax(content, core_calls)
|
|
1852
|
+
except Exception:
|
|
1853
|
+
return content
|
|
1854
|
+
|
|
1331
1855
|
def _handle_tools_with_structured_output(self,
|
|
1332
1856
|
prompt: str,
|
|
1333
1857
|
messages: Optional[List[Dict[str, str]]] = None,
|
|
@@ -1574,4 +2098,4 @@ Please provide a structured response."""
|
|
|
1574
2098
|
# Yield chunks asynchronously
|
|
1575
2099
|
for chunk in sync_gen:
|
|
1576
2100
|
yield chunk
|
|
1577
|
-
await asyncio.sleep(0) # Yield control to event loop
|
|
2101
|
+
await asyncio.sleep(0) # Yield control to event loop
|