abstractcore 2.6.8__py3-none-any.whl → 2.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. abstractcore/apps/summarizer.py +69 -27
  2. abstractcore/architectures/detection.py +190 -25
  3. abstractcore/assets/architecture_formats.json +129 -6
  4. abstractcore/assets/model_capabilities.json +789 -136
  5. abstractcore/config/main.py +2 -2
  6. abstractcore/config/manager.py +3 -1
  7. abstractcore/events/__init__.py +7 -1
  8. abstractcore/mcp/__init__.py +30 -0
  9. abstractcore/mcp/client.py +213 -0
  10. abstractcore/mcp/factory.py +64 -0
  11. abstractcore/mcp/naming.py +28 -0
  12. abstractcore/mcp/stdio_client.py +336 -0
  13. abstractcore/mcp/tool_source.py +164 -0
  14. abstractcore/processing/basic_deepsearch.py +1 -1
  15. abstractcore/processing/basic_summarizer.py +300 -83
  16. abstractcore/providers/anthropic_provider.py +91 -10
  17. abstractcore/providers/base.py +537 -16
  18. abstractcore/providers/huggingface_provider.py +17 -8
  19. abstractcore/providers/lmstudio_provider.py +170 -25
  20. abstractcore/providers/mlx_provider.py +13 -10
  21. abstractcore/providers/ollama_provider.py +42 -26
  22. abstractcore/providers/openai_compatible_provider.py +87 -22
  23. abstractcore/providers/openai_provider.py +12 -9
  24. abstractcore/providers/streaming.py +201 -39
  25. abstractcore/providers/vllm_provider.py +78 -21
  26. abstractcore/server/app.py +65 -28
  27. abstractcore/structured/retry.py +20 -7
  28. abstractcore/tools/__init__.py +5 -4
  29. abstractcore/tools/abstractignore.py +166 -0
  30. abstractcore/tools/arg_canonicalizer.py +61 -0
  31. abstractcore/tools/common_tools.py +2311 -772
  32. abstractcore/tools/core.py +109 -13
  33. abstractcore/tools/handler.py +17 -3
  34. abstractcore/tools/parser.py +798 -155
  35. abstractcore/tools/registry.py +107 -2
  36. abstractcore/tools/syntax_rewriter.py +68 -6
  37. abstractcore/tools/tag_rewriter.py +186 -1
  38. abstractcore/utils/jsonish.py +111 -0
  39. abstractcore/utils/version.py +1 -1
  40. {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/METADATA +11 -2
  41. {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/RECORD +45 -36
  42. {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/WHEEL +0 -0
  43. {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/entry_points.txt +0 -0
  44. {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/licenses/LICENSE +0 -0
  45. {abstractcore-2.6.8.dist-info → abstractcore-2.9.0.dist-info}/top_level.txt +0 -0
@@ -6,8 +6,11 @@ import time
6
6
  import uuid
7
7
  import asyncio
8
8
  import warnings
9
+ import json
10
+ import re
11
+ import socket
9
12
  from collections import deque
10
- from typing import List, Dict, Any, Optional, Union, Iterator, AsyncIterator, Type
13
+ from typing import List, Dict, Any, Optional, Union, Iterator, AsyncIterator, Type, TYPE_CHECKING
11
14
  from abc import ABC, abstractmethod
12
15
 
13
16
  try:
@@ -22,6 +25,7 @@ from ..core.types import GenerateResponse
22
25
  from ..events import EventType, Event
23
26
  from datetime import datetime
24
27
  from ..utils.structured_logging import get_logger
28
+ from ..utils.jsonish import loads_dict_like
25
29
  from ..exceptions import (
26
30
  ProviderAPIError,
27
31
  AuthenticationError,
@@ -33,6 +37,10 @@ from ..architectures import detect_architecture, get_architecture_format, get_mo
33
37
  from ..tools import execute_tools
34
38
  from ..core.retry import RetryManager, RetryConfig
35
39
 
40
+ if TYPE_CHECKING: # pragma: no cover
41
+ # Imported for type checking only to avoid hard dependencies in minimal installs.
42
+ from ..media.types import MediaContent
43
+
36
44
 
37
45
  class BaseProvider(AbstractCoreInterface, ABC):
38
46
  """
@@ -52,10 +60,52 @@ class BaseProvider(AbstractCoreInterface, ABC):
52
60
  self.architecture_config = get_architecture_format(self.architecture)
53
61
  self.model_capabilities = get_model_capabilities(model)
54
62
 
55
- # Setup timeout configuration
56
- # Default to None for unlimited timeout
57
- self._timeout = kwargs.get('timeout', None) # Default None for unlimited HTTP requests
58
- self._tool_timeout = kwargs.get('tool_timeout', None) # Default None for unlimited tool execution
63
+ # Setup timeout configuration (centralized defaults).
64
+ #
65
+ # Semantics:
66
+ # - If the caller passes `timeout=...`, we respect it (including `None` for unlimited).
67
+ # - If the caller omits `timeout`, we use AbstractCore's global config default.
68
+ # - Same logic for `tool_timeout`.
69
+ timeout_provided = "timeout" in kwargs
70
+ tool_timeout_provided = "tool_timeout" in kwargs
71
+
72
+ timeout_value = kwargs.get("timeout", None) if timeout_provided else None
73
+ tool_timeout_value = kwargs.get("tool_timeout", None) if tool_timeout_provided else None
74
+
75
+ if not timeout_provided or not tool_timeout_provided:
76
+ try:
77
+ from ..config.manager import get_config_manager
78
+
79
+ cfg = get_config_manager()
80
+ except Exception:
81
+ cfg = None
82
+
83
+ if not timeout_provided:
84
+ try:
85
+ timeout_value = float(cfg.get_default_timeout()) if cfg is not None else None
86
+ except Exception:
87
+ timeout_value = None
88
+
89
+ if not tool_timeout_provided:
90
+ try:
91
+ tool_timeout_value = float(cfg.get_tool_timeout()) if cfg is not None else None
92
+ except Exception:
93
+ tool_timeout_value = None
94
+
95
+ # Validate timeouts: non-positive numbers become "unlimited" (None).
96
+ try:
97
+ if isinstance(timeout_value, (int, float)) and float(timeout_value) <= 0:
98
+ timeout_value = None
99
+ except Exception:
100
+ pass
101
+ try:
102
+ if isinstance(tool_timeout_value, (int, float)) and float(tool_timeout_value) <= 0:
103
+ tool_timeout_value = None
104
+ except Exception:
105
+ pass
106
+
107
+ self._timeout = timeout_value # None = unlimited HTTP requests
108
+ self._tool_timeout = tool_timeout_value # None = unlimited tool execution
59
109
 
60
110
  # Setup tool execution mode
61
111
  # execute_tools: True = AbstractCore executes tools (legacy mode)
@@ -299,11 +349,64 @@ class BaseProvider(AbstractCoreInterface, ABC):
299
349
  Returns:
300
350
  Custom exception
301
351
  """
302
- # Don't re-wrap our custom exceptions
303
- if isinstance(error, (ModelNotFoundError, AuthenticationError, RateLimitError,
304
- InvalidRequestError, ProviderAPIError)):
352
+ def _provider_label() -> str:
353
+ raw = getattr(self, "provider", None)
354
+ if isinstance(raw, str) and raw.strip():
355
+ return raw.strip()
356
+ name = self.__class__.__name__
357
+ return name[:-8] if name.endswith("Provider") else name
358
+
359
+ def _configured_timeout_s() -> Optional[float]:
360
+ v = getattr(self, "_timeout", None)
361
+ if v is None:
362
+ return None
363
+ try:
364
+ f = float(v)
365
+ except Exception:
366
+ return None
367
+ return f if f > 0 else None
368
+
369
+ def _looks_like_timeout(exc: Exception) -> bool:
370
+ # Type-based (preferred)
371
+ if isinstance(exc, (TimeoutError, asyncio.TimeoutError, socket.timeout)):
372
+ return True
373
+ cls = exc.__class__
374
+ name = (getattr(cls, "__name__", "") or "").lower()
375
+ mod = (getattr(cls, "__module__", "") or "").lower()
376
+ if "timeout" in name:
377
+ return True
378
+ if mod.startswith(("httpx", "requests", "aiohttp")) and ("timeout" in name):
379
+ return True
380
+
381
+ # String-based fallback (covers wrapped SDK exceptions)
382
+ msg = str(exc or "").lower()
383
+ return ("timed out" in msg) or ("timeout" in msg) or ("time out" in msg)
384
+
385
+ def _has_explicit_duration(msg: str) -> bool:
386
+ # e.g. "... after 300s" or "... after 300.0s"
387
+ return bool(re.search(r"\bafter\s+\d+(?:\.\d+)?\s*s\b", msg))
388
+
389
+ # Preserve typed custom exceptions, but allow ProviderAPIError timeout messages
390
+ # to be normalized centrally (avoid per-provider inconsistencies).
391
+ if isinstance(error, ProviderAPIError):
392
+ msg = str(error)
393
+ if _looks_like_timeout(error) and not _has_explicit_duration(msg):
394
+ t = _configured_timeout_s()
395
+ if t is not None:
396
+ return ProviderAPIError(f"{_provider_label()} API error: timed out after {t}s")
397
+ return ProviderAPIError(f"{_provider_label()} API error: timed out")
398
+ return error
399
+
400
+ if isinstance(error, (ModelNotFoundError, AuthenticationError, RateLimitError, InvalidRequestError)):
305
401
  return error
306
402
 
403
+ # Central timeout normalization for all providers (httpx/requests/SDKs).
404
+ if _looks_like_timeout(error):
405
+ t = _configured_timeout_s()
406
+ if t is not None:
407
+ return ProviderAPIError(f"{_provider_label()} API error: timed out after {t}s")
408
+ return ProviderAPIError(f"{_provider_label()} API error: timed out")
409
+
307
410
  error_str = str(error).lower()
308
411
 
309
412
  if "rate" in error_str and "limit" in error_str:
@@ -345,6 +448,16 @@ class BaseProvider(AbstractCoreInterface, ABC):
345
448
  execute_tools: Whether to execute tools automatically (True) or let agent handle execution (False)
346
449
  glyph_compression: Glyph compression preference ("auto", "always", "never")
347
450
  """
451
+ # Normalize token limit naming at the provider boundary.
452
+ #
453
+ # - OpenAI-style APIs use `max_tokens` for the output-token cap.
454
+ # - AbstractCore's unified internal name is `max_output_tokens`.
455
+ #
456
+ # AbstractRuntime (and some hosts) may still emit `max_tokens` in effect payloads.
457
+ # That translation is a provider integration concern, so keep it in AbstractCore.
458
+ if "max_output_tokens" not in kwargs and "max_tokens" in kwargs and kwargs.get("max_tokens") is not None:
459
+ kwargs["max_output_tokens"] = kwargs.pop("max_tokens")
460
+
348
461
  # Handle structured output request
349
462
  if response_model is not None:
350
463
  if not PYDANTIC_AVAILABLE:
@@ -437,6 +550,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
437
550
  # Define generation function for retry wrapper
438
551
  def _execute_generation():
439
552
  start_time = time.time()
553
+ start_perf = time.perf_counter()
440
554
 
441
555
  # Emit generation started event (covers request received)
442
556
  event_data = {
@@ -464,7 +578,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
464
578
  **kwargs
465
579
  )
466
580
 
467
- return response, start_time
581
+ return response, start_time, start_perf
468
582
 
469
583
  except Exception as e:
470
584
  # Convert to custom exception and re-raise for retry handling
@@ -473,7 +587,7 @@ class BaseProvider(AbstractCoreInterface, ABC):
473
587
 
474
588
  # Execute with retry
475
589
  try:
476
- response, start_time = self.retry_manager.execute_with_retry(
590
+ response, start_time, start_perf = self.retry_manager.execute_with_retry(
477
591
  _execute_generation,
478
592
  provider_key=self.provider_key
479
593
  )
@@ -496,7 +610,22 @@ class BaseProvider(AbstractCoreInterface, ABC):
496
610
  )
497
611
 
498
612
  # Process stream with incremental tool detection and execution
613
+ ttft_ms: Optional[float] = None
499
614
  for processed_chunk in processor.process_stream(response, converted_tools):
615
+ if isinstance(processed_chunk.content, str) and processed_chunk.content:
616
+ processed_chunk.content = self._strip_output_wrappers(processed_chunk.content)
617
+ if ttft_ms is None:
618
+ has_content = isinstance(processed_chunk.content, str) and bool(processed_chunk.content)
619
+ has_tools = isinstance(processed_chunk.tool_calls, list) and bool(processed_chunk.tool_calls)
620
+ if has_content or has_tools:
621
+ ttft_ms = round((time.perf_counter() - start_perf) * 1000, 1)
622
+ meta = processed_chunk.metadata if isinstance(processed_chunk.metadata, dict) else {}
623
+ timing = meta.get("_timing") if isinstance(meta.get("_timing"), dict) else {}
624
+ merged = dict(timing)
625
+ merged.setdefault("source", "client_wall")
626
+ merged["ttft_ms"] = ttft_ms
627
+ meta["_timing"] = merged
628
+ processed_chunk.metadata = meta
500
629
  yield processed_chunk
501
630
 
502
631
  # Track generation after streaming completes
@@ -509,10 +638,22 @@ class BaseProvider(AbstractCoreInterface, ABC):
509
638
 
510
639
  return unified_stream()
511
640
  else:
512
- # Non-streaming: apply tag rewriting if needed
513
- if response and response.content and converted_tools:
514
- # Apply default qwen3 rewriting for non-streaming responses
515
- response = self._apply_non_streaming_tag_rewriting(response, tool_call_tags)
641
+ # Non-streaming: normalize tool calls into structured form.
642
+ if response and converted_tools:
643
+ response = self._normalize_tool_calls_passthrough(
644
+ response=response,
645
+ tools=converted_tools,
646
+ tool_call_tags=tool_call_tags,
647
+ )
648
+
649
+ # Optional: rewrite tool-call tags in content for downstream clients that parse tags.
650
+ # Note: when tool_call_tags is None (default), we return cleaned content.
651
+ if tool_call_tags and response.content and not self._should_clean_tool_call_markup(tool_call_tags):
652
+ response = self._apply_non_streaming_tag_rewriting(response, tool_call_tags)
653
+
654
+ # Strip model-specific output wrappers (e.g. GLM <|begin_of_box|>…<|end_of_box|>).
655
+ if response and isinstance(response.content, str) and response.content:
656
+ response.content = self._strip_output_wrappers(response.content)
516
657
 
517
658
  # Add visual token calculation if media metadata is available
518
659
  if media_metadata and response:
@@ -817,10 +958,26 @@ class BaseProvider(AbstractCoreInterface, ABC):
817
958
 
818
959
  # Override max_output_tokens if provided in kwargs
819
960
  effective_max_output = kwargs.get("max_output_tokens", max_output_tokens)
961
+ # Safety clamp: never exceed the provider/model's configured max_output_tokens.
962
+ #
963
+ # Upstream callers (runtimes/agents) may request large output budgets based on
964
+ # stale capabilities or user configuration. Providers should not forward values
965
+ # that violate the model's hard limits (Anthropic returns 400 for this).
966
+ try:
967
+ if effective_max_output is None:
968
+ effective_max_output_i = int(max_output_tokens)
969
+ else:
970
+ effective_max_output_i = int(effective_max_output)
971
+ except Exception:
972
+ effective_max_output_i = int(max_output_tokens)
973
+ if effective_max_output_i <= 0:
974
+ effective_max_output_i = int(max_output_tokens)
975
+ if effective_max_output_i > int(max_output_tokens):
976
+ effective_max_output_i = int(max_output_tokens)
820
977
 
821
978
  # Return base kwargs with unified parameter
822
979
  result_kwargs = kwargs.copy()
823
- result_kwargs["max_output_tokens"] = effective_max_output
980
+ result_kwargs["max_output_tokens"] = effective_max_output_i
824
981
 
825
982
  # Add unified generation parameters with fallback hierarchy: kwargs → instance → defaults
826
983
  result_kwargs["temperature"] = result_kwargs.get("temperature", self.temperature)
@@ -1349,6 +1506,352 @@ class BaseProvider(AbstractCoreInterface, ABC):
1349
1506
  # Return original response if rewriting fails
1350
1507
  return response
1351
1508
 
1509
+ def _strip_output_wrappers(self, content: str) -> str:
1510
+ """Strip known model-specific wrapper tokens around assistant output.
1511
+
1512
+ Some model/server combinations emit wrapper tokens like:
1513
+ <|begin_of_box|> ... <|end_of_box|>
1514
+ We remove these only when they appear as leading/trailing wrappers (not when
1515
+ embedded mid-text).
1516
+ """
1517
+ if not isinstance(content, str) or not content:
1518
+ return content
1519
+
1520
+ wrappers: Dict[str, str] = {}
1521
+ for src in (self.architecture_config, self.model_capabilities):
1522
+ if not isinstance(src, dict):
1523
+ continue
1524
+ w = src.get("output_wrappers")
1525
+ if not isinstance(w, dict):
1526
+ continue
1527
+ start = w.get("start")
1528
+ end = w.get("end")
1529
+ if isinstance(start, str) and start.strip():
1530
+ wrappers.setdefault("start", start.strip())
1531
+ if isinstance(end, str) and end.strip():
1532
+ wrappers.setdefault("end", end.strip())
1533
+
1534
+ if not wrappers:
1535
+ return content
1536
+
1537
+ out = content
1538
+ start_token = wrappers.get("start")
1539
+ end_token = wrappers.get("end")
1540
+
1541
+ if isinstance(start_token, str) and start_token:
1542
+ out = re.sub(r"^\s*" + re.escape(start_token) + r"\s*", "", out, count=1)
1543
+ if isinstance(end_token, str) and end_token:
1544
+ out = re.sub(r"\s*" + re.escape(end_token) + r"\s*$", "", out, count=1)
1545
+
1546
+ return out
1547
+
1548
+ def _normalize_tool_calls_passthrough(
1549
+ self,
1550
+ *,
1551
+ response: GenerateResponse,
1552
+ tools: List[Dict[str, Any]],
1553
+ tool_call_tags: Optional[str] = None,
1554
+ ) -> GenerateResponse:
1555
+ """Populate `response.tool_calls` (and usually clean `response.content`) in passthrough mode.
1556
+
1557
+ Contract:
1558
+ - AbstractCore always returns structured `tool_calls` when tools are provided and the model emits tool syntax,
1559
+ even for prompted tool calling (tool calls embedded in `content`).
1560
+ - By default (`tool_call_tags is None`), tool-call markup is stripped from `content` for clean UX/history.
1561
+ - When `tool_call_tags` is set, we preserve `content` (for clients that parse tags) but still populate
1562
+ structured `tool_calls`.
1563
+ """
1564
+
1565
+ # Only normalize when tools were actually provided.
1566
+ if not tools:
1567
+ return response
1568
+
1569
+ allowed_names = self._get_allowed_tool_names(tools)
1570
+
1571
+ # 1) If provider already returned tool_calls (native tools), normalize shape + args.
1572
+ normalized_existing = self._normalize_tool_calls_payload(
1573
+ response.tool_calls,
1574
+ allowed_tool_names=allowed_names,
1575
+ )
1576
+ if normalized_existing:
1577
+ response.tool_calls = normalized_existing
1578
+
1579
+ # Clean any echoed tool syntax from content unless the caller explicitly requested tag passthrough.
1580
+ if self._should_clean_tool_call_markup(tool_call_tags) and isinstance(response.content, str) and response.content.strip():
1581
+ cleaned = self._clean_content_using_tool_calls(response.content, normalized_existing)
1582
+ response.content = cleaned
1583
+
1584
+ return response
1585
+
1586
+ # 2) Prompted tools: parse tool calls embedded in content.
1587
+ content = response.content
1588
+ if not isinstance(content, str) or not content.strip():
1589
+ return response
1590
+
1591
+ tool_handler = getattr(self, "tool_handler", None)
1592
+ if tool_handler is None:
1593
+ return response
1594
+
1595
+ try:
1596
+ parsed = tool_handler.parse_response(content, mode="prompted")
1597
+ except Exception:
1598
+ return response
1599
+
1600
+ parsed_calls = getattr(parsed, "tool_calls", None)
1601
+ if not isinstance(parsed_calls, list) or not parsed_calls:
1602
+ return response
1603
+
1604
+ normalized_parsed = self._normalize_tool_calls_payload(
1605
+ parsed_calls,
1606
+ allowed_tool_names=allowed_names,
1607
+ )
1608
+ if normalized_parsed:
1609
+ response.tool_calls = normalized_parsed
1610
+
1611
+ # Always use the cleaned content from AbstractCore parsing when we are not explicitly preserving tags.
1612
+ if self._should_clean_tool_call_markup(tool_call_tags):
1613
+ cleaned_content = getattr(parsed, "content", None)
1614
+ if isinstance(cleaned_content, str):
1615
+ response.content = cleaned_content
1616
+
1617
+ return response
1618
+
1619
+ def _should_clean_tool_call_markup(self, tool_call_tags: Optional[str]) -> bool:
1620
+ """Return True when we should strip tool-call markup from assistant content."""
1621
+ if tool_call_tags is None:
1622
+ return True
1623
+ # OpenAI/Codex formats carry tool calls in structured fields, not in content.
1624
+ value = str(tool_call_tags).strip().lower()
1625
+ return value in {"openai", "codex"}
1626
+
1627
+ def _get_allowed_tool_names(self, tools: List[Dict[str, Any]]) -> set[str]:
1628
+ """Extract allowed tool names from provider-normalized tool definitions."""
1629
+ names: set[str] = set()
1630
+ for tool in tools or []:
1631
+ if not isinstance(tool, dict):
1632
+ continue
1633
+ name = tool.get("name")
1634
+ if isinstance(name, str) and name.strip():
1635
+ names.add(name.strip())
1636
+ continue
1637
+ func = tool.get("function") if isinstance(tool.get("function"), dict) else None
1638
+ fname = func.get("name") if isinstance(func, dict) else None
1639
+ if isinstance(fname, str) and fname.strip():
1640
+ names.add(fname.strip())
1641
+ return names
1642
+
1643
+ def _normalize_tool_calls_payload(
1644
+ self,
1645
+ tool_calls: Any,
1646
+ *,
1647
+ allowed_tool_names: Optional[set[str]] = None,
1648
+ ) -> Optional[List[Dict[str, Any]]]:
1649
+ """Normalize tool call shapes into a canonical dict form.
1650
+
1651
+ Canonical shape:
1652
+ {"name": str, "arguments": dict, "call_id": Optional[str]}
1653
+ """
1654
+ if tool_calls is None or not isinstance(tool_calls, list):
1655
+ return None
1656
+
1657
+ def _unwrap_arguments(arguments: Any, *, expected_tool_name: Optional[str]) -> Any:
1658
+ """Unwrap common wrapper payloads to get tool kwargs.
1659
+
1660
+ Some providers/models emit nested wrappers like:
1661
+ {"name":"tool","arguments":{...},"call_id": "..."}
1662
+ inside the tool call `arguments` field (or even multiple times).
1663
+
1664
+ We unwrap when the object looks like a wrapper (only wrapper keys) OR when
1665
+ it includes wrapper metadata fields (e.g. "name"/"call_id") and an inner
1666
+ "arguments" dict. When wrapper fields and tool kwargs are partially mixed,
1667
+ we merge the outer kwargs into the inner dict (inner takes precedence).
1668
+ """
1669
+ if not isinstance(arguments, dict):
1670
+ return arguments
1671
+
1672
+ wrapper_keys = {"name", "arguments", "call_id", "id"}
1673
+ current = arguments
1674
+ for _ in range(4):
1675
+ if not isinstance(current, dict):
1676
+ break
1677
+ keys = set(current.keys())
1678
+ if "arguments" not in current:
1679
+ break
1680
+ inner = current.get("arguments")
1681
+ if isinstance(inner, dict) or isinstance(inner, str):
1682
+ inner_dict: Any = inner
1683
+ if isinstance(inner, str):
1684
+ parsed = loads_dict_like(inner)
1685
+ inner_dict = parsed if isinstance(parsed, dict) else None
1686
+ if not isinstance(inner_dict, dict):
1687
+ break
1688
+
1689
+ name_matches = False
1690
+ raw_name = current.get("name")
1691
+ if isinstance(raw_name, str) and expected_tool_name and raw_name.strip() == expected_tool_name:
1692
+ name_matches = True
1693
+
1694
+ wrapperish = keys.issubset(wrapper_keys) or name_matches or bool(keys & {"call_id", "id"})
1695
+ if not wrapperish:
1696
+ break
1697
+
1698
+ # Merge any outer kwargs that were accidentally placed alongside wrapper fields.
1699
+ extras = {k: v for k, v in current.items() if k not in wrapper_keys}
1700
+ if extras:
1701
+ merged = dict(inner_dict)
1702
+ for k, v in extras.items():
1703
+ merged.setdefault(k, v)
1704
+ current = merged
1705
+ else:
1706
+ current = inner_dict
1707
+ continue
1708
+ break
1709
+
1710
+ return current
1711
+
1712
+ def _map_wrapped_name_to_allowed(raw: str, allowed: set[str]) -> Optional[str]:
1713
+ """Best-effort mapping when a provider returns a wrapped tool name.
1714
+
1715
+ Some OpenAI-compatible servers/models occasionally return tool names wrapped in
1716
+ extra tokens/text (e.g. "{function-name: write_file}"). If we can confidently
1717
+ detect an allowed tool name as a standalone token within the raw string, map it
1718
+ back to the exact allowed name so tool execution can proceed.
1719
+ """
1720
+ s = str(raw or "").strip()
1721
+ if not s:
1722
+ return None
1723
+ if s in allowed:
1724
+ return s
1725
+
1726
+ try:
1727
+ import re
1728
+
1729
+ # Prefer exact token-boundary matches (tool names are usually snake_case).
1730
+ candidates: List[str] = []
1731
+ for name in allowed:
1732
+ if not isinstance(name, str) or not name:
1733
+ continue
1734
+ pat = r"(^|[^\w])" + re.escape(name) + r"([^\w]|$)"
1735
+ if re.search(pat, s):
1736
+ candidates.append(name)
1737
+ if candidates:
1738
+ # Prefer the most specific (longest) match deterministically.
1739
+ return max(candidates, key=lambda n: (len(n), n))
1740
+ except Exception:
1741
+ return None
1742
+
1743
+ return None
1744
+
1745
+ normalized: List[Dict[str, Any]] = []
1746
+
1747
+ for tc in tool_calls:
1748
+ name: Optional[str] = None
1749
+ arguments: Any = None
1750
+ call_id: Any = None
1751
+
1752
+ if isinstance(tc, dict):
1753
+ call_id = tc.get("call_id", None)
1754
+ if call_id is None:
1755
+ call_id = tc.get("id", None)
1756
+
1757
+ raw_name = tc.get("name")
1758
+ raw_args = tc.get("arguments")
1759
+
1760
+ func = tc.get("function") if isinstance(tc.get("function"), dict) else None
1761
+ if func and (not isinstance(raw_name, str) or not raw_name.strip()):
1762
+ raw_name = func.get("name")
1763
+ if func and raw_args is None:
1764
+ raw_args = func.get("arguments")
1765
+
1766
+ if isinstance(raw_name, str) and raw_name.strip():
1767
+ name = raw_name.strip()
1768
+ arguments = raw_args if raw_args is not None else {}
1769
+ else:
1770
+ raw_name = getattr(tc, "name", None)
1771
+ raw_args = getattr(tc, "arguments", None)
1772
+ call_id = getattr(tc, "call_id", None)
1773
+ if isinstance(raw_name, str) and raw_name.strip():
1774
+ name = raw_name.strip()
1775
+ arguments = raw_args if raw_args is not None else {}
1776
+
1777
+ if not isinstance(name, str) or not name:
1778
+ continue
1779
+ if isinstance(allowed_tool_names, set) and allowed_tool_names and name not in allowed_tool_names:
1780
+ mapped = _map_wrapped_name_to_allowed(name, allowed_tool_names)
1781
+ if not isinstance(mapped, str) or not mapped:
1782
+ continue
1783
+ name = mapped
1784
+
1785
+ if isinstance(arguments, str):
1786
+ parsed = loads_dict_like(arguments)
1787
+ arguments = parsed if isinstance(parsed, dict) else {}
1788
+
1789
+ # Recover tool kwargs from nested wrapper payloads when present.
1790
+ if isinstance(arguments, dict) and call_id is None:
1791
+ wrapper_id = arguments.get("call_id") or arguments.get("id")
1792
+ if isinstance(wrapper_id, str) and wrapper_id.strip():
1793
+ call_id = wrapper_id.strip()
1794
+ arguments = _unwrap_arguments(arguments, expected_tool_name=name)
1795
+ if not isinstance(arguments, dict):
1796
+ arguments = {}
1797
+
1798
+ try:
1799
+ from ..tools.arg_canonicalizer import canonicalize_tool_arguments
1800
+
1801
+ arguments = canonicalize_tool_arguments(name, arguments)
1802
+ except Exception:
1803
+ pass
1804
+
1805
+ normalized.append(
1806
+ {
1807
+ "name": name,
1808
+ "arguments": arguments,
1809
+ "call_id": str(call_id) if call_id is not None else None,
1810
+ }
1811
+ )
1812
+
1813
+ if not normalized:
1814
+ return None
1815
+
1816
+ # Defense-in-depth: remove accidental duplicates introduced by overlapping parsing paths.
1817
+ unique: List[Dict[str, Any]] = []
1818
+ seen: set[tuple[str, str]] = set()
1819
+ for tc in normalized:
1820
+ try:
1821
+ args_key = json.dumps(tc.get("arguments", {}), sort_keys=True, ensure_ascii=False)
1822
+ except Exception:
1823
+ args_key = str(tc.get("arguments", {}))
1824
+ key = (str(tc.get("name") or ""), args_key)
1825
+ if key in seen:
1826
+ continue
1827
+ seen.add(key)
1828
+ unique.append(tc)
1829
+
1830
+ return unique or None
1831
+
1832
+ def _clean_content_using_tool_calls(self, content: str, tool_calls: List[Dict[str, Any]]) -> str:
1833
+ """Strip tool-call markup from assistant content using known tool calls."""
1834
+ try:
1835
+ from ..tools.core import ToolCall as CoreToolCall
1836
+ from ..tools.parser import clean_tool_syntax
1837
+
1838
+ core_calls: List[CoreToolCall] = []
1839
+ for tc in tool_calls or []:
1840
+ if not isinstance(tc, dict):
1841
+ continue
1842
+ name = tc.get("name")
1843
+ if not isinstance(name, str) or not name.strip():
1844
+ continue
1845
+ args = tc.get("arguments")
1846
+ args_dict = dict(args) if isinstance(args, dict) else {}
1847
+ core_calls.append(CoreToolCall(name=name.strip(), arguments=args_dict, call_id=tc.get("call_id")))
1848
+
1849
+ if not core_calls:
1850
+ return content
1851
+ return clean_tool_syntax(content, core_calls)
1852
+ except Exception:
1853
+ return content
1854
+
1352
1855
  def _handle_tools_with_structured_output(self,
1353
1856
  prompt: str,
1354
1857
  messages: Optional[List[Dict[str, str]]] = None,
@@ -1511,10 +2014,28 @@ Please provide a structured response."""
1511
2014
  Returns:
1512
2015
  GenerateResponse, AsyncIterator[GenerateResponse] for streaming, or BaseModel for structured output
1513
2016
  """
1514
- return await self._agenerate_internal(
2017
+ response = await self._agenerate_internal(
1515
2018
  prompt, messages, system_prompt, tools, media, stream, **kwargs
1516
2019
  )
1517
2020
 
2021
+ # Capture interaction trace if enabled (match sync generate_with_telemetry behavior)
2022
+ # Only for non-streaming responses that are GenerateResponse objects
2023
+ if not stream and self.enable_tracing and response and isinstance(response, GenerateResponse):
2024
+ trace_id = self._capture_trace(
2025
+ prompt=prompt,
2026
+ messages=messages,
2027
+ system_prompt=system_prompt,
2028
+ tools=tools,
2029
+ response=response,
2030
+ kwargs=kwargs
2031
+ )
2032
+ # Attach trace_id to response metadata
2033
+ if not response.metadata:
2034
+ response.metadata = {}
2035
+ response.metadata['trace_id'] = trace_id
2036
+
2037
+ return response
2038
+
1518
2039
  async def _agenerate_internal(self,
1519
2040
  prompt: str,
1520
2041
  messages: Optional[List[Dict]],