abstractcore 2.6.9__py3-none-any.whl → 2.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. abstractcore/apps/summarizer.py +69 -27
  2. abstractcore/architectures/detection.py +190 -25
  3. abstractcore/assets/architecture_formats.json +129 -6
  4. abstractcore/assets/model_capabilities.json +803 -141
  5. abstractcore/config/main.py +2 -2
  6. abstractcore/config/manager.py +3 -1
  7. abstractcore/events/__init__.py +7 -1
  8. abstractcore/mcp/__init__.py +30 -0
  9. abstractcore/mcp/client.py +213 -0
  10. abstractcore/mcp/factory.py +64 -0
  11. abstractcore/mcp/naming.py +28 -0
  12. abstractcore/mcp/stdio_client.py +336 -0
  13. abstractcore/mcp/tool_source.py +164 -0
  14. abstractcore/processing/__init__.py +2 -2
  15. abstractcore/processing/basic_deepsearch.py +1 -1
  16. abstractcore/processing/basic_summarizer.py +379 -93
  17. abstractcore/providers/anthropic_provider.py +91 -10
  18. abstractcore/providers/base.py +540 -16
  19. abstractcore/providers/huggingface_provider.py +17 -8
  20. abstractcore/providers/lmstudio_provider.py +170 -25
  21. abstractcore/providers/mlx_provider.py +13 -10
  22. abstractcore/providers/ollama_provider.py +42 -26
  23. abstractcore/providers/openai_compatible_provider.py +87 -22
  24. abstractcore/providers/openai_provider.py +12 -9
  25. abstractcore/providers/streaming.py +201 -39
  26. abstractcore/providers/vllm_provider.py +78 -21
  27. abstractcore/server/app.py +116 -30
  28. abstractcore/structured/retry.py +20 -7
  29. abstractcore/tools/__init__.py +46 -24
  30. abstractcore/tools/abstractignore.py +166 -0
  31. abstractcore/tools/arg_canonicalizer.py +61 -0
  32. abstractcore/tools/common_tools.py +2443 -742
  33. abstractcore/tools/core.py +109 -13
  34. abstractcore/tools/handler.py +17 -3
  35. abstractcore/tools/parser.py +894 -159
  36. abstractcore/tools/registry.py +122 -18
  37. abstractcore/tools/syntax_rewriter.py +68 -6
  38. abstractcore/tools/tag_rewriter.py +186 -1
  39. abstractcore/utils/jsonish.py +111 -0
  40. abstractcore/utils/version.py +1 -1
  41. {abstractcore-2.6.9.dist-info → abstractcore-2.9.0.dist-info}/METADATA +55 -2
  42. {abstractcore-2.6.9.dist-info → abstractcore-2.9.0.dist-info}/RECORD +46 -37
  43. {abstractcore-2.6.9.dist-info → abstractcore-2.9.0.dist-info}/WHEEL +0 -0
  44. {abstractcore-2.6.9.dist-info → abstractcore-2.9.0.dist-info}/entry_points.txt +0 -0
  45. {abstractcore-2.6.9.dist-info → abstractcore-2.9.0.dist-info}/licenses/LICENSE +0 -0
  46. {abstractcore-2.6.9.dist-info → abstractcore-2.9.0.dist-info}/top_level.txt +0 -0
@@ -5,8 +5,12 @@ Base provider with integrated telemetry, events, and exception handling.
5
5
  import time
6
6
  import uuid
7
7
  import asyncio
8
+ import warnings
9
+ import json
10
+ import re
11
+ import socket
8
12
  from collections import deque
9
- from typing import List, Dict, Any, Optional, Union, Iterator, AsyncIterator, Type
13
+ from typing import List, Dict, Any, Optional, Union, Iterator, AsyncIterator, Type, TYPE_CHECKING
10
14
  from abc import ABC, abstractmethod
11
15
 
12
16
  try:
@@ -21,6 +25,7 @@ from ..core.types import GenerateResponse
21
25
  from ..events import EventType, Event
22
26
  from datetime import datetime
23
27
  from ..utils.structured_logging import get_logger
28
+ from ..utils.jsonish import loads_dict_like
24
29
  from ..exceptions import (
25
30
  ProviderAPIError,
26
31
  AuthenticationError,
@@ -32,6 +37,10 @@ from ..architectures import detect_architecture, get_architecture_format, get_mo
32
37
  from ..tools import execute_tools
33
38
  from ..core.retry import RetryManager, RetryConfig
34
39
 
40
+ if TYPE_CHECKING: # pragma: no cover
41
+ # Imported for type checking only to avoid hard dependencies in minimal installs.
42
+ from ..media.types import MediaContent
43
+
35
44
 
36
45
  class BaseProvider(AbstractCoreInterface, ABC):
37
46
  """
@@ -51,15 +60,64 @@ class BaseProvider(AbstractCoreInterface, ABC):
51
60
  self.architecture_config = get_architecture_format(self.architecture)
52
61
  self.model_capabilities = get_model_capabilities(model)
53
62
 
54
- # Setup timeout configuration
55
- # Default to None for unlimited timeout
56
- self._timeout = kwargs.get('timeout', None) # Default None for unlimited HTTP requests
57
- self._tool_timeout = kwargs.get('tool_timeout', None) # Default None for unlimited tool execution
63
+ # Setup timeout configuration (centralized defaults).
64
+ #
65
+ # Semantics:
66
+ # - If the caller passes `timeout=...`, we respect it (including `None` for unlimited).
67
+ # - If the caller omits `timeout`, we use AbstractCore's global config default.
68
+ # - Same logic for `tool_timeout`.
69
+ timeout_provided = "timeout" in kwargs
70
+ tool_timeout_provided = "tool_timeout" in kwargs
71
+
72
+ timeout_value = kwargs.get("timeout", None) if timeout_provided else None
73
+ tool_timeout_value = kwargs.get("tool_timeout", None) if tool_timeout_provided else None
74
+
75
+ if not timeout_provided or not tool_timeout_provided:
76
+ try:
77
+ from ..config.manager import get_config_manager
78
+
79
+ cfg = get_config_manager()
80
+ except Exception:
81
+ cfg = None
82
+
83
+ if not timeout_provided:
84
+ try:
85
+ timeout_value = float(cfg.get_default_timeout()) if cfg is not None else None
86
+ except Exception:
87
+ timeout_value = None
88
+
89
+ if not tool_timeout_provided:
90
+ try:
91
+ tool_timeout_value = float(cfg.get_tool_timeout()) if cfg is not None else None
92
+ except Exception:
93
+ tool_timeout_value = None
94
+
95
+ # Validate timeouts: non-positive numbers become "unlimited" (None).
96
+ try:
97
+ if isinstance(timeout_value, (int, float)) and float(timeout_value) <= 0:
98
+ timeout_value = None
99
+ except Exception:
100
+ pass
101
+ try:
102
+ if isinstance(tool_timeout_value, (int, float)) and float(tool_timeout_value) <= 0:
103
+ tool_timeout_value = None
104
+ except Exception:
105
+ pass
106
+
107
+ self._timeout = timeout_value # None = unlimited HTTP requests
108
+ self._tool_timeout = tool_timeout_value # None = unlimited tool execution
58
109
 
59
110
  # Setup tool execution mode
60
111
  # execute_tools: True = AbstractCore executes tools (legacy mode)
61
112
  # False = Pass-through mode (default - for API server / agentic CLI)
62
113
  self.execute_tools = kwargs.get('execute_tools', False)
114
+ if self.execute_tools:
115
+ warnings.warn(
116
+ "execute_tools=True is deprecated. Prefer passing tools explicitly to generate() "
117
+ "and executing tool calls in the host/runtime via a ToolExecutor.",
118
+ DeprecationWarning,
119
+ stacklevel=2,
120
+ )
63
121
 
64
122
  # Setup retry manager with optional configuration
65
123
  retry_config = kwargs.get('retry_config', None)
@@ -202,6 +260,12 @@ class BaseProvider(AbstractCoreInterface, ABC):
202
260
  """
203
261
  trace_id = str(uuid.uuid4())
204
262
 
263
+ # If trace retention is disabled, still return a trace_id for correlation
264
+ # without constructing/storing a full trace payload.
265
+ maxlen = getattr(getattr(self, "_traces", None), "maxlen", None)
266
+ if maxlen == 0:
267
+ return trace_id
268
+
205
269
  # Extract generation parameters
206
270
  temperature = kwargs.get('temperature', self.temperature)
207
271
  max_tokens = kwargs.get('max_tokens', self.max_tokens)
@@ -285,11 +349,64 @@ class BaseProvider(AbstractCoreInterface, ABC):
285
349
  Returns:
286
350
  Custom exception
287
351
  """
288
- # Don't re-wrap our custom exceptions
289
- if isinstance(error, (ModelNotFoundError, AuthenticationError, RateLimitError,
290
- InvalidRequestError, ProviderAPIError)):
352
+ def _provider_label() -> str:
353
+ raw = getattr(self, "provider", None)
354
+ if isinstance(raw, str) and raw.strip():
355
+ return raw.strip()
356
+ name = self.__class__.__name__
357
+ return name[:-8] if name.endswith("Provider") else name
358
+
359
+ def _configured_timeout_s() -> Optional[float]:
360
+ v = getattr(self, "_timeout", None)
361
+ if v is None:
362
+ return None
363
+ try:
364
+ f = float(v)
365
+ except Exception:
366
+ return None
367
+ return f if f > 0 else None
368
+
369
+ def _looks_like_timeout(exc: Exception) -> bool:
370
+ # Type-based (preferred)
371
+ if isinstance(exc, (TimeoutError, asyncio.TimeoutError, socket.timeout)):
372
+ return True
373
+ cls = exc.__class__
374
+ name = (getattr(cls, "__name__", "") or "").lower()
375
+ mod = (getattr(cls, "__module__", "") or "").lower()
376
+ if "timeout" in name:
377
+ return True
378
+ if mod.startswith(("httpx", "requests", "aiohttp")) and ("timeout" in name):
379
+ return True
380
+
381
+ # String-based fallback (covers wrapped SDK exceptions)
382
+ msg = str(exc or "").lower()
383
+ return ("timed out" in msg) or ("timeout" in msg) or ("time out" in msg)
384
+
385
+ def _has_explicit_duration(msg: str) -> bool:
386
+ # e.g. "... after 300s" or "... after 300.0s"
387
+ return bool(re.search(r"\bafter\s+\d+(?:\.\d+)?\s*s\b", msg))
388
+
389
+ # Preserve typed custom exceptions, but allow ProviderAPIError timeout messages
390
+ # to be normalized centrally (avoid per-provider inconsistencies).
391
+ if isinstance(error, ProviderAPIError):
392
+ msg = str(error)
393
+ if _looks_like_timeout(error) and not _has_explicit_duration(msg):
394
+ t = _configured_timeout_s()
395
+ if t is not None:
396
+ return ProviderAPIError(f"{_provider_label()} API error: timed out after {t}s")
397
+ return ProviderAPIError(f"{_provider_label()} API error: timed out")
291
398
  return error
292
399
 
400
+ if isinstance(error, (ModelNotFoundError, AuthenticationError, RateLimitError, InvalidRequestError)):
401
+ return error
402
+
403
+ # Central timeout normalization for all providers (httpx/requests/SDKs).
404
+ if _looks_like_timeout(error):
405
+ t = _configured_timeout_s()
406
+ if t is not None:
407
+ return ProviderAPIError(f"{_provider_label()} API error: timed out after {t}s")
408
+ return ProviderAPIError(f"{_provider_label()} API error: timed out")
409
+
293
410
  error_str = str(error).lower()
294
411
 
295
412
  if "rate" in error_str and "limit" in error_str:
@@ -331,6 +448,16 @@ class BaseProvider(AbstractCoreInterface, ABC):
331
448
  execute_tools: Whether to execute tools automatically (True) or let agent handle execution (False)
332
449
  glyph_compression: Glyph compression preference ("auto", "always", "never")
333
450
  """
451
+ # Normalize token limit naming at the provider boundary.
452
+ #
453
+ # - OpenAI-style APIs use `max_tokens` for the output-token cap.
454
+ # - AbstractCore's unified internal name is `max_output_tokens`.
455
+ #
456
+ # AbstractRuntime (and some hosts) may still emit `max_tokens` in effect payloads.
457
+ # That translation is a provider integration concern, so keep it in AbstractCore.
458
+ if "max_output_tokens" not in kwargs and "max_tokens" in kwargs and kwargs.get("max_tokens") is not None:
459
+ kwargs["max_output_tokens"] = kwargs.pop("max_tokens")
460
+
334
461
  # Handle structured output request
335
462
  if response_model is not None:
336
463
  if not PYDANTIC_AVAILABLE:
@@ -408,6 +535,13 @@ class BaseProvider(AbstractCoreInterface, ABC):
408
535
 
409
536
  # Handle tool execution control
410
537
  should_execute_tools = execute_tools if execute_tools is not None else self.execute_tools
538
+ if should_execute_tools and converted_tools:
539
+ warnings.warn(
540
+ "execute_tools=True is deprecated. Prefer passing tools explicitly to generate() "
541
+ "and executing tool calls in the host/runtime via a ToolExecutor.",
542
+ DeprecationWarning,
543
+ stacklevel=2,
544
+ )
411
545
  if not should_execute_tools and converted_tools:
412
546
  # If tools are provided but execution is disabled,
413
547
  # we still pass them to the provider for generation but won't execute them
@@ -416,6 +550,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
416
550
  # Define generation function for retry wrapper
417
551
  def _execute_generation():
418
552
  start_time = time.time()
553
+ start_perf = time.perf_counter()
419
554
 
420
555
  # Emit generation started event (covers request received)
421
556
  event_data = {
@@ -443,7 +578,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
443
578
  **kwargs
444
579
  )
445
580
 
446
- return response, start_time
581
+ return response, start_time, start_perf
447
582
 
448
583
  except Exception as e:
449
584
  # Convert to custom exception and re-raise for retry handling
@@ -452,7 +587,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
452
587
 
453
588
  # Execute with retry
454
589
  try:
455
- response, start_time = self.retry_manager.execute_with_retry(
590
+ response, start_time, start_perf = self.retry_manager.execute_with_retry(
456
591
  _execute_generation,
457
592
  provider_key=self.provider_key
458
593
  )
@@ -475,7 +610,22 @@ class BaseProvider(AbstractCoreInterface, ABC):
475
610
  )
476
611
 
477
612
  # Process stream with incremental tool detection and execution
613
+ ttft_ms: Optional[float] = None
478
614
  for processed_chunk in processor.process_stream(response, converted_tools):
615
+ if isinstance(processed_chunk.content, str) and processed_chunk.content:
616
+ processed_chunk.content = self._strip_output_wrappers(processed_chunk.content)
617
+ if ttft_ms is None:
618
+ has_content = isinstance(processed_chunk.content, str) and bool(processed_chunk.content)
619
+ has_tools = isinstance(processed_chunk.tool_calls, list) and bool(processed_chunk.tool_calls)
620
+ if has_content or has_tools:
621
+ ttft_ms = round((time.perf_counter() - start_perf) * 1000, 1)
622
+ meta = processed_chunk.metadata if isinstance(processed_chunk.metadata, dict) else {}
623
+ timing = meta.get("_timing") if isinstance(meta.get("_timing"), dict) else {}
624
+ merged = dict(timing)
625
+ merged.setdefault("source", "client_wall")
626
+ merged["ttft_ms"] = ttft_ms
627
+ meta["_timing"] = merged
628
+ processed_chunk.metadata = meta
479
629
  yield processed_chunk
480
630
 
481
631
  # Track generation after streaming completes
@@ -488,10 +638,22 @@ class BaseProvider(AbstractCoreInterface, ABC):
488
638
 
489
639
  return unified_stream()
490
640
  else:
491
- # Non-streaming: apply tag rewriting if needed
492
- if response and response.content and converted_tools:
493
- # Apply default qwen3 rewriting for non-streaming responses
494
- response = self._apply_non_streaming_tag_rewriting(response, tool_call_tags)
641
+ # Non-streaming: normalize tool calls into structured form.
642
+ if response and converted_tools:
643
+ response = self._normalize_tool_calls_passthrough(
644
+ response=response,
645
+ tools=converted_tools,
646
+ tool_call_tags=tool_call_tags,
647
+ )
648
+
649
+ # Optional: rewrite tool-call tags in content for downstream clients that parse tags.
650
+ # Note: when tool_call_tags is None (default), we return cleaned content.
651
+ if tool_call_tags and response.content and not self._should_clean_tool_call_markup(tool_call_tags):
652
+ response = self._apply_non_streaming_tag_rewriting(response, tool_call_tags)
653
+
654
+ # Strip model-specific output wrappers (e.g. GLM <|begin_of_box|>…<|end_of_box|>).
655
+ if response and isinstance(response.content, str) and response.content:
656
+ response.content = self._strip_output_wrappers(response.content)
495
657
 
496
658
  # Add visual token calculation if media metadata is available
497
659
  if media_metadata and response:
@@ -796,10 +958,26 @@ class BaseProvider(AbstractCoreInterface, ABC):
796
958
 
797
959
  # Override max_output_tokens if provided in kwargs
798
960
  effective_max_output = kwargs.get("max_output_tokens", max_output_tokens)
961
+ # Safety clamp: never exceed the provider/model's configured max_output_tokens.
962
+ #
963
+ # Upstream callers (runtimes/agents) may request large output budgets based on
964
+ # stale capabilities or user configuration. Providers should not forward values
965
+ # that violate the model's hard limits (Anthropic returns 400 for this).
966
+ try:
967
+ if effective_max_output is None:
968
+ effective_max_output_i = int(max_output_tokens)
969
+ else:
970
+ effective_max_output_i = int(effective_max_output)
971
+ except Exception:
972
+ effective_max_output_i = int(max_output_tokens)
973
+ if effective_max_output_i <= 0:
974
+ effective_max_output_i = int(max_output_tokens)
975
+ if effective_max_output_i > int(max_output_tokens):
976
+ effective_max_output_i = int(max_output_tokens)
799
977
 
800
978
  # Return base kwargs with unified parameter
801
979
  result_kwargs = kwargs.copy()
802
- result_kwargs["max_output_tokens"] = effective_max_output
980
+ result_kwargs["max_output_tokens"] = effective_max_output_i
803
981
 
804
982
  # Add unified generation parameters with fallback hierarchy: kwargs → instance → defaults
805
983
  result_kwargs["temperature"] = result_kwargs.get("temperature", self.temperature)
@@ -1328,6 +1506,352 @@ class BaseProvider(AbstractCoreInterface, ABC):
1328
1506
  # Return original response if rewriting fails
1329
1507
  return response
1330
1508
 
1509
+ def _strip_output_wrappers(self, content: str) -> str:
1510
+ """Strip known model-specific wrapper tokens around assistant output.
1511
+
1512
+ Some model/server combinations emit wrapper tokens like:
1513
+ <|begin_of_box|> ... <|end_of_box|>
1514
+ We remove these only when they appear as leading/trailing wrappers (not when
1515
+ embedded mid-text).
1516
+ """
1517
+ if not isinstance(content, str) or not content:
1518
+ return content
1519
+
1520
+ wrappers: Dict[str, str] = {}
1521
+ for src in (self.architecture_config, self.model_capabilities):
1522
+ if not isinstance(src, dict):
1523
+ continue
1524
+ w = src.get("output_wrappers")
1525
+ if not isinstance(w, dict):
1526
+ continue
1527
+ start = w.get("start")
1528
+ end = w.get("end")
1529
+ if isinstance(start, str) and start.strip():
1530
+ wrappers.setdefault("start", start.strip())
1531
+ if isinstance(end, str) and end.strip():
1532
+ wrappers.setdefault("end", end.strip())
1533
+
1534
+ if not wrappers:
1535
+ return content
1536
+
1537
+ out = content
1538
+ start_token = wrappers.get("start")
1539
+ end_token = wrappers.get("end")
1540
+
1541
+ if isinstance(start_token, str) and start_token:
1542
+ out = re.sub(r"^\s*" + re.escape(start_token) + r"\s*", "", out, count=1)
1543
+ if isinstance(end_token, str) and end_token:
1544
+ out = re.sub(r"\s*" + re.escape(end_token) + r"\s*$", "", out, count=1)
1545
+
1546
+ return out
1547
+
1548
+ def _normalize_tool_calls_passthrough(
1549
+ self,
1550
+ *,
1551
+ response: GenerateResponse,
1552
+ tools: List[Dict[str, Any]],
1553
+ tool_call_tags: Optional[str] = None,
1554
+ ) -> GenerateResponse:
1555
+ """Populate `response.tool_calls` (and usually clean `response.content`) in passthrough mode.
1556
+
1557
+ Contract:
1558
+ - AbstractCore always returns structured `tool_calls` when tools are provided and the model emits tool syntax,
1559
+ even for prompted tool calling (tool calls embedded in `content`).
1560
+ - By default (`tool_call_tags is None`), tool-call markup is stripped from `content` for clean UX/history.
1561
+ - When `tool_call_tags` is set, we preserve `content` (for clients that parse tags) but still populate
1562
+ structured `tool_calls`.
1563
+ """
1564
+
1565
+ # Only normalize when tools were actually provided.
1566
+ if not tools:
1567
+ return response
1568
+
1569
+ allowed_names = self._get_allowed_tool_names(tools)
1570
+
1571
+ # 1) If provider already returned tool_calls (native tools), normalize shape + args.
1572
+ normalized_existing = self._normalize_tool_calls_payload(
1573
+ response.tool_calls,
1574
+ allowed_tool_names=allowed_names,
1575
+ )
1576
+ if normalized_existing:
1577
+ response.tool_calls = normalized_existing
1578
+
1579
+ # Clean any echoed tool syntax from content unless the caller explicitly requested tag passthrough.
1580
+ if self._should_clean_tool_call_markup(tool_call_tags) and isinstance(response.content, str) and response.content.strip():
1581
+ cleaned = self._clean_content_using_tool_calls(response.content, normalized_existing)
1582
+ response.content = cleaned
1583
+
1584
+ return response
1585
+
1586
+ # 2) Prompted tools: parse tool calls embedded in content.
1587
+ content = response.content
1588
+ if not isinstance(content, str) or not content.strip():
1589
+ return response
1590
+
1591
+ tool_handler = getattr(self, "tool_handler", None)
1592
+ if tool_handler is None:
1593
+ return response
1594
+
1595
+ try:
1596
+ parsed = tool_handler.parse_response(content, mode="prompted")
1597
+ except Exception:
1598
+ return response
1599
+
1600
+ parsed_calls = getattr(parsed, "tool_calls", None)
1601
+ if not isinstance(parsed_calls, list) or not parsed_calls:
1602
+ return response
1603
+
1604
+ normalized_parsed = self._normalize_tool_calls_payload(
1605
+ parsed_calls,
1606
+ allowed_tool_names=allowed_names,
1607
+ )
1608
+ if normalized_parsed:
1609
+ response.tool_calls = normalized_parsed
1610
+
1611
+ # Always use the cleaned content from AbstractCore parsing when we are not explicitly preserving tags.
1612
+ if self._should_clean_tool_call_markup(tool_call_tags):
1613
+ cleaned_content = getattr(parsed, "content", None)
1614
+ if isinstance(cleaned_content, str):
1615
+ response.content = cleaned_content
1616
+
1617
+ return response
1618
+
1619
+ def _should_clean_tool_call_markup(self, tool_call_tags: Optional[str]) -> bool:
1620
+ """Return True when we should strip tool-call markup from assistant content."""
1621
+ if tool_call_tags is None:
1622
+ return True
1623
+ # OpenAI/Codex formats carry tool calls in structured fields, not in content.
1624
+ value = str(tool_call_tags).strip().lower()
1625
+ return value in {"openai", "codex"}
1626
+
1627
+ def _get_allowed_tool_names(self, tools: List[Dict[str, Any]]) -> set[str]:
1628
+ """Extract allowed tool names from provider-normalized tool definitions."""
1629
+ names: set[str] = set()
1630
+ for tool in tools or []:
1631
+ if not isinstance(tool, dict):
1632
+ continue
1633
+ name = tool.get("name")
1634
+ if isinstance(name, str) and name.strip():
1635
+ names.add(name.strip())
1636
+ continue
1637
+ func = tool.get("function") if isinstance(tool.get("function"), dict) else None
1638
+ fname = func.get("name") if isinstance(func, dict) else None
1639
+ if isinstance(fname, str) and fname.strip():
1640
+ names.add(fname.strip())
1641
+ return names
1642
+
1643
+ def _normalize_tool_calls_payload(
1644
+ self,
1645
+ tool_calls: Any,
1646
+ *,
1647
+ allowed_tool_names: Optional[set[str]] = None,
1648
+ ) -> Optional[List[Dict[str, Any]]]:
1649
+ """Normalize tool call shapes into a canonical dict form.
1650
+
1651
+ Canonical shape:
1652
+ {"name": str, "arguments": dict, "call_id": Optional[str]}
1653
+ """
1654
+ if tool_calls is None or not isinstance(tool_calls, list):
1655
+ return None
1656
+
1657
+ def _unwrap_arguments(arguments: Any, *, expected_tool_name: Optional[str]) -> Any:
1658
+ """Unwrap common wrapper payloads to get tool kwargs.
1659
+
1660
+ Some providers/models emit nested wrappers like:
1661
+ {"name":"tool","arguments":{...},"call_id": "..."}
1662
+ inside the tool call `arguments` field (or even multiple times).
1663
+
1664
+ We unwrap when the object looks like a wrapper (only wrapper keys) OR when
1665
+ it includes wrapper metadata fields (e.g. "name"/"call_id") and an inner
1666
+ "arguments" dict. When wrapper fields and tool kwargs are partially mixed,
1667
+ we merge the outer kwargs into the inner dict (inner takes precedence).
1668
+ """
1669
+ if not isinstance(arguments, dict):
1670
+ return arguments
1671
+
1672
+ wrapper_keys = {"name", "arguments", "call_id", "id"}
1673
+ current = arguments
1674
+ for _ in range(4):
1675
+ if not isinstance(current, dict):
1676
+ break
1677
+ keys = set(current.keys())
1678
+ if "arguments" not in current:
1679
+ break
1680
+ inner = current.get("arguments")
1681
+ if isinstance(inner, dict) or isinstance(inner, str):
1682
+ inner_dict: Any = inner
1683
+ if isinstance(inner, str):
1684
+ parsed = loads_dict_like(inner)
1685
+ inner_dict = parsed if isinstance(parsed, dict) else None
1686
+ if not isinstance(inner_dict, dict):
1687
+ break
1688
+
1689
+ name_matches = False
1690
+ raw_name = current.get("name")
1691
+ if isinstance(raw_name, str) and expected_tool_name and raw_name.strip() == expected_tool_name:
1692
+ name_matches = True
1693
+
1694
+ wrapperish = keys.issubset(wrapper_keys) or name_matches or bool(keys & {"call_id", "id"})
1695
+ if not wrapperish:
1696
+ break
1697
+
1698
+ # Merge any outer kwargs that were accidentally placed alongside wrapper fields.
1699
+ extras = {k: v for k, v in current.items() if k not in wrapper_keys}
1700
+ if extras:
1701
+ merged = dict(inner_dict)
1702
+ for k, v in extras.items():
1703
+ merged.setdefault(k, v)
1704
+ current = merged
1705
+ else:
1706
+ current = inner_dict
1707
+ continue
1708
+ break
1709
+
1710
+ return current
1711
+
1712
+ def _map_wrapped_name_to_allowed(raw: str, allowed: set[str]) -> Optional[str]:
1713
+ """Best-effort mapping when a provider returns a wrapped tool name.
1714
+
1715
+ Some OpenAI-compatible servers/models occasionally return tool names wrapped in
1716
+ extra tokens/text (e.g. "{function-name: write_file}"). If we can confidently
1717
+ detect an allowed tool name as a standalone token within the raw string, map it
1718
+ back to the exact allowed name so tool execution can proceed.
1719
+ """
1720
+ s = str(raw or "").strip()
1721
+ if not s:
1722
+ return None
1723
+ if s in allowed:
1724
+ return s
1725
+
1726
+ try:
1727
+ import re
1728
+
1729
+ # Prefer exact token-boundary matches (tool names are usually snake_case).
1730
+ candidates: List[str] = []
1731
+ for name in allowed:
1732
+ if not isinstance(name, str) or not name:
1733
+ continue
1734
+ pat = r"(^|[^\w])" + re.escape(name) + r"([^\w]|$)"
1735
+ if re.search(pat, s):
1736
+ candidates.append(name)
1737
+ if candidates:
1738
+ # Prefer the most specific (longest) match deterministically.
1739
+ return max(candidates, key=lambda n: (len(n), n))
1740
+ except Exception:
1741
+ return None
1742
+
1743
+ return None
1744
+
1745
+ normalized: List[Dict[str, Any]] = []
1746
+
1747
+ for tc in tool_calls:
1748
+ name: Optional[str] = None
1749
+ arguments: Any = None
1750
+ call_id: Any = None
1751
+
1752
+ if isinstance(tc, dict):
1753
+ call_id = tc.get("call_id", None)
1754
+ if call_id is None:
1755
+ call_id = tc.get("id", None)
1756
+
1757
+ raw_name = tc.get("name")
1758
+ raw_args = tc.get("arguments")
1759
+
1760
+ func = tc.get("function") if isinstance(tc.get("function"), dict) else None
1761
+ if func and (not isinstance(raw_name, str) or not raw_name.strip()):
1762
+ raw_name = func.get("name")
1763
+ if func and raw_args is None:
1764
+ raw_args = func.get("arguments")
1765
+
1766
+ if isinstance(raw_name, str) and raw_name.strip():
1767
+ name = raw_name.strip()
1768
+ arguments = raw_args if raw_args is not None else {}
1769
+ else:
1770
+ raw_name = getattr(tc, "name", None)
1771
+ raw_args = getattr(tc, "arguments", None)
1772
+ call_id = getattr(tc, "call_id", None)
1773
+ if isinstance(raw_name, str) and raw_name.strip():
1774
+ name = raw_name.strip()
1775
+ arguments = raw_args if raw_args is not None else {}
1776
+
1777
+ if not isinstance(name, str) or not name:
1778
+ continue
1779
+ if isinstance(allowed_tool_names, set) and allowed_tool_names and name not in allowed_tool_names:
1780
+ mapped = _map_wrapped_name_to_allowed(name, allowed_tool_names)
1781
+ if not isinstance(mapped, str) or not mapped:
1782
+ continue
1783
+ name = mapped
1784
+
1785
+ if isinstance(arguments, str):
1786
+ parsed = loads_dict_like(arguments)
1787
+ arguments = parsed if isinstance(parsed, dict) else {}
1788
+
1789
+ # Recover tool kwargs from nested wrapper payloads when present.
1790
+ if isinstance(arguments, dict) and call_id is None:
1791
+ wrapper_id = arguments.get("call_id") or arguments.get("id")
1792
+ if isinstance(wrapper_id, str) and wrapper_id.strip():
1793
+ call_id = wrapper_id.strip()
1794
+ arguments = _unwrap_arguments(arguments, expected_tool_name=name)
1795
+ if not isinstance(arguments, dict):
1796
+ arguments = {}
1797
+
1798
+ try:
1799
+ from ..tools.arg_canonicalizer import canonicalize_tool_arguments
1800
+
1801
+ arguments = canonicalize_tool_arguments(name, arguments)
1802
+ except Exception:
1803
+ pass
1804
+
1805
+ normalized.append(
1806
+ {
1807
+ "name": name,
1808
+ "arguments": arguments,
1809
+ "call_id": str(call_id) if call_id is not None else None,
1810
+ }
1811
+ )
1812
+
1813
+ if not normalized:
1814
+ return None
1815
+
1816
+ # Defense-in-depth: remove accidental duplicates introduced by overlapping parsing paths.
1817
+ unique: List[Dict[str, Any]] = []
1818
+ seen: set[tuple[str, str]] = set()
1819
+ for tc in normalized:
1820
+ try:
1821
+ args_key = json.dumps(tc.get("arguments", {}), sort_keys=True, ensure_ascii=False)
1822
+ except Exception:
1823
+ args_key = str(tc.get("arguments", {}))
1824
+ key = (str(tc.get("name") or ""), args_key)
1825
+ if key in seen:
1826
+ continue
1827
+ seen.add(key)
1828
+ unique.append(tc)
1829
+
1830
+ return unique or None
1831
+
1832
+ def _clean_content_using_tool_calls(self, content: str, tool_calls: List[Dict[str, Any]]) -> str:
1833
+ """Strip tool-call markup from assistant content using known tool calls."""
1834
+ try:
1835
+ from ..tools.core import ToolCall as CoreToolCall
1836
+ from ..tools.parser import clean_tool_syntax
1837
+
1838
+ core_calls: List[CoreToolCall] = []
1839
+ for tc in tool_calls or []:
1840
+ if not isinstance(tc, dict):
1841
+ continue
1842
+ name = tc.get("name")
1843
+ if not isinstance(name, str) or not name.strip():
1844
+ continue
1845
+ args = tc.get("arguments")
1846
+ args_dict = dict(args) if isinstance(args, dict) else {}
1847
+ core_calls.append(CoreToolCall(name=name.strip(), arguments=args_dict, call_id=tc.get("call_id")))
1848
+
1849
+ if not core_calls:
1850
+ return content
1851
+ return clean_tool_syntax(content, core_calls)
1852
+ except Exception:
1853
+ return content
1854
+
1331
1855
  def _handle_tools_with_structured_output(self,
1332
1856
  prompt: str,
1333
1857
  messages: Optional[List[Dict[str, str]]] = None,
@@ -1574,4 +2098,4 @@ Please provide a structured response."""
1574
2098
  # Yield chunks asynchronously
1575
2099
  for chunk in sync_gen:
1576
2100
  yield chunk
1577
- await asyncio.sleep(0) # Yield control to event loop
2101
+ await asyncio.sleep(0) # Yield control to event loop