PyPI - camel-ai - Versions diffs - 0.2.75a5__py3-none-any.whl → 0.2.76__py3-none-any.whl - Mend

camel-ai 0.2.75a5py3-none-any.whl → 0.2.76py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of camel-ai might be problematic. Click here for more details.

Files changed (103) hide show

camel/__init__.py +1 -1
camel/agents/chat_agent.py +1148 -298
camel/agents/mcp_agent.py +30 -27
camel/configs/__init__.py +9 -0
camel/configs/amd_config.py +70 -0
camel/configs/cometapi_config.py +104 -0
camel/configs/nebius_config.py +103 -0
camel/data_collectors/alpaca_collector.py +15 -6
camel/environments/tic_tac_toe.py +1 -1
camel/interpreters/__init__.py +2 -0
camel/interpreters/docker/Dockerfile +3 -12
camel/interpreters/microsandbox_interpreter.py +395 -0
camel/loaders/__init__.py +11 -2
camel/loaders/chunkr_reader.py +9 -0
camel/memories/__init__.py +2 -1
camel/memories/agent_memories.py +3 -1
camel/memories/blocks/chat_history_block.py +21 -3
camel/memories/records.py +88 -8
camel/messages/base.py +127 -34
camel/models/__init__.py +6 -0
camel/models/amd_model.py +101 -0
camel/models/azure_openai_model.py +0 -6
camel/models/base_model.py +30 -0
camel/models/cometapi_model.py +83 -0
camel/models/model_factory.py +6 -0
camel/models/nebius_model.py +83 -0
camel/models/ollama_model.py +3 -3
camel/models/openai_compatible_model.py +0 -6
camel/models/openai_model.py +0 -6
camel/models/zhipuai_model.py +61 -2
camel/parsers/__init__.py +18 -0
camel/parsers/mcp_tool_call_parser.py +176 -0
camel/retrievers/auto_retriever.py +1 -0
camel/runtimes/daytona_runtime.py +11 -12
camel/societies/workforce/prompts.py +131 -50
camel/societies/workforce/single_agent_worker.py +434 -49
camel/societies/workforce/structured_output_handler.py +30 -18
camel/societies/workforce/task_channel.py +163 -27
camel/societies/workforce/utils.py +105 -12
camel/societies/workforce/workforce.py +1357 -314
camel/societies/workforce/workforce_logger.py +24 -5
camel/storages/key_value_storages/json.py +15 -2
camel/storages/object_storages/google_cloud.py +1 -1
camel/storages/vectordb_storages/oceanbase.py +10 -11
camel/storages/vectordb_storages/tidb.py +8 -6
camel/tasks/task.py +4 -3
camel/toolkits/__init__.py +18 -5
camel/toolkits/aci_toolkit.py +45 -0
camel/toolkits/code_execution.py +28 -1
camel/toolkits/context_summarizer_toolkit.py +684 -0
camel/toolkits/dingtalk.py +1135 -0
camel/toolkits/edgeone_pages_mcp_toolkit.py +11 -31
camel/toolkits/{file_write_toolkit.py → file_toolkit.py} +194 -34
camel/toolkits/function_tool.py +6 -1
camel/toolkits/github_toolkit.py +104 -17
camel/toolkits/google_drive_mcp_toolkit.py +12 -31
camel/toolkits/hybrid_browser_toolkit/config_loader.py +12 -0
camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +79 -2
camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +95 -59
camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +5 -612
camel/toolkits/hybrid_browser_toolkit/ts/package.json +0 -1
camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +619 -95
camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +7 -2
camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +115 -219
camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +1 -0
camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +39 -6
camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +412 -133
camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +9 -5
camel/toolkits/{openai_image_toolkit.py → image_generation_toolkit.py} +98 -31
camel/toolkits/markitdown_toolkit.py +27 -1
camel/toolkits/math_toolkit.py +64 -10
camel/toolkits/mcp_toolkit.py +348 -348
camel/toolkits/message_integration.py +3 -0
camel/toolkits/minimax_mcp_toolkit.py +195 -0
camel/toolkits/note_taking_toolkit.py +18 -8
camel/toolkits/notion_mcp_toolkit.py +16 -26
camel/toolkits/origene_mcp_toolkit.py +8 -49
camel/toolkits/playwright_mcp_toolkit.py +12 -31
camel/toolkits/resend_toolkit.py +168 -0
camel/toolkits/search_toolkit.py +13 -2
camel/toolkits/slack_toolkit.py +50 -1
camel/toolkits/terminal_toolkit/__init__.py +18 -0
camel/toolkits/terminal_toolkit/terminal_toolkit.py +924 -0
camel/toolkits/terminal_toolkit/utils.py +532 -0
camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
camel/toolkits/video_analysis_toolkit.py +17 -11
camel/toolkits/wechat_official_toolkit.py +483 -0
camel/types/enums.py +155 -1
camel/types/unified_model_type.py +10 -0
camel/utils/commons.py +17 -0
camel/utils/context_utils.py +804 -0
camel/utils/mcp.py +136 -2
camel/utils/token_counting.py +25 -17
{camel_ai-0.2.75a5.dist-info → camel_ai-0.2.76.dist-info}/METADATA +158 -67
{camel_ai-0.2.75a5.dist-info → camel_ai-0.2.76.dist-info}/RECORD +101 -80
camel/loaders/pandas_reader.py +0 -368
camel/toolkits/terminal_toolkit.py +0 -1788
{camel_ai-0.2.75a5.dist-info → camel_ai-0.2.76.dist-info}/WHEEL +0 -0
{camel_ai-0.2.75a5.dist-info → camel_ai-0.2.76.dist-info}/licenses/LICENSE +0 -0

camel/agents/chat_agent.py CHANGED Viewed

@@ -14,13 +14,24 @@
 from __future__ import annotations
 import asyncio
+import atexit
+import base64
+import concurrent.futures
+import hashlib
+import inspect
 import json
-import logging
-import queue
+import math
+import os
+import random
+import re
+import tempfile
 import textwrap
 import threading
 import time
 import uuid
+import warnings
+from dataclasses import dataclass
+from datetime import datetime
 from pathlib import Path
 from typing import (
     TYPE_CHECKING,
@@ -40,6 +51,7 @@ from typing import (
 from openai import (
     AsyncStream,
+    RateLimitError,
     Stream,
 )
 from pydantic import BaseModel, ValidationError
@@ -60,6 +72,7 @@ from camel.memories import (
     MemoryRecord,
     ScoreBasedContextCreator,
 )
+from camel.memories.blocks.chat_history_block import EmptyMemoryWarning
 from camel.messages import (
     BaseMessage,
     FunctionCallingMessage,
@@ -89,16 +102,31 @@ from camel.utils import (
     model_from_json_schema,
 )
 from camel.utils.commons import dependencies_required
+from camel.utils.context_utils import ContextUtility
 if TYPE_CHECKING:
     from camel.terminators import ResponseTerminator
 logger = get_logger(__name__)
+# Cleanup temp files on exit
+_temp_files: Set[str] = set()
+_temp_files_lock = threading.Lock()
+def _cleanup_temp_files():
+    with _temp_files_lock:
+        for path in _temp_files:
+            try:
+                os.unlink(path)
+            except Exception:
+                pass
+atexit.register(_cleanup_temp_files)
 # AgentOps decorator setting
 try:
-    import os
     if os.getenv("AGENTOPS_API_KEY") is not None:
         from agentops import track_agent
     else:
@@ -132,13 +160,60 @@ SIMPLE_FORMAT_PROMPT = TextPrompt(
 )
+@dataclass
+class _ToolOutputHistoryEntry:
+    tool_name: str
+    tool_call_id: str
+    result_text: str
+    record_uuids: List[str]
+    record_timestamps: List[float]
+    preview_text: str
+    cached: bool = False
+    cache_id: Optional[str] = None
+class _ToolOutputCacheManager:
+    r"""Minimal persistent store for caching verbose tool outputs."""
+    def __init__(self, base_dir: Union[str, Path]) -> None:
+        self.base_dir = Path(base_dir).expanduser().resolve()
+        self.base_dir.mkdir(parents=True, exist_ok=True)
+    def save(
+        self,
+        tool_name: str,
+        tool_call_id: str,
+        content: str,
+    ) -> Tuple[str, Path]:
+        cache_id = uuid.uuid4().hex
+        filename = f"{cache_id}.txt"
+        path = self.base_dir / filename
+        header = (
+            f"# Cached tool output\n"
+            f"tool_name: {tool_name}\n"
+            f"tool_call_id: {tool_call_id}\n"
+            f"cache_id: {cache_id}\n"
+            f"---\n"
+        )
+        path.write_text(f"{header}{content}", encoding="utf-8")
+        return cache_id, path
+    def load(self, cache_id: str) -> str:
+        path = self.base_dir / f"{cache_id}.txt"
+        if not path.exists():
+            raise FileNotFoundError(
+                f"Cached tool output {cache_id} not found at {path}"
+            )
+        return path.read_text(encoding="utf-8")
 class StreamContentAccumulator:
     r"""Manages content accumulation across streaming responses to ensure
     all responses contain complete cumulative content."""
     def __init__(self):
         self.base_content = ""  # Content before tool calls
-        self.current_content = ""  # Current streaming content
+        self.current_content = []  # Accumulated streaming fragments
         self.tool_status_messages = []  # Accumulated tool status messages
     def set_base_content(self, content: str):
@@ -147,7 +222,7 @@ class StreamContentAccumulator:
     def add_streaming_content(self, new_content: str):
         r"""Add new streaming content."""
-        self.current_content += new_content
+        self.current_content.append(new_content)
     def add_tool_status(self, status_message: str):
         r"""Add a tool status message."""
@@ -156,16 +231,18 @@ class StreamContentAccumulator:
     def get_full_content(self) -> str:
         r"""Get the complete accumulated content."""
         tool_messages = "".join(self.tool_status_messages)
-        return self.base_content + tool_messages + self.current_content
+        current = "".join(self.current_content)
+        return self.base_content + tool_messages + current
     def get_content_with_new_status(self, status_message: str) -> str:
         r"""Get content with a new status message appended."""
         tool_messages = "".join([*self.tool_status_messages, status_message])
-        return self.base_content + tool_messages + self.current_content
+        current = "".join(self.current_content)
+        return self.base_content + tool_messages + current
     def reset_streaming_content(self):
         r"""Reset only the streaming content, keep base and tool status."""
-        self.current_content = ""
+        self.current_content = []
 class StreamingChatAgentResponse:
@@ -186,13 +263,10 @@ class StreamingChatAgentResponse:
     def _ensure_latest_response(self):
         r"""Ensure we have the latest response by consuming the generator."""
         if not self._consumed:
-            try:
-                for response in self._generator:
-                    self._responses.append(response)
-                    self._current_response = response
-                self._consumed = True
-            except StopIteration:
-                self._consumed = True
+            for response in self._generator:
+                self._responses.append(response)
+                self._current_response = response
+            self._consumed = True
     @property
     def msgs(self) -> List[BaseMessage]:
@@ -230,17 +304,14 @@ class StreamingChatAgentResponse:
         r"""Make this object iterable."""
         if self._consumed:
             # If already consumed, iterate over stored responses
-            return iter(self._responses)
+            yield from self._responses
         else:
             # If not consumed, consume and yield
-            try:
-                for response in self._generator:
-                    self._responses.append(response)
-                    self._current_response = response
-                    yield response
-                self._consumed = True
-            except StopIteration:
-                self._consumed = True
+            for response in self._generator:
+                self._responses.append(response)
+                self._current_response = response
+                yield response
+            self._consumed = True
     def __getattr__(self, name):
         r"""Forward any other attribute access to the latest response."""
@@ -271,13 +342,10 @@ class AsyncStreamingChatAgentResponse:
     async def _ensure_latest_response(self):
         r"""Ensure the latest response by consuming the async generator."""
         if not self._consumed:
-            try:
-                async for response in self._async_generator:
-                    self._responses.append(response)
-                    self._current_response = response
-                self._consumed = True
-            except StopAsyncIteration:
-                self._consumed = True
+            async for response in self._async_generator:
+                self._responses.append(response)
+                self._current_response = response
+            self._consumed = True
     async def _get_final_response(self) -> ChatAgentResponse:
         r"""Get the final response after consuming the entire stream."""
@@ -303,14 +371,11 @@ class AsyncStreamingChatAgentResponse:
         else:
             # If not consumed, consume and yield
             async def _consume_and_yield():
-                try:
-                    async for response in self._async_generator:
-                        self._responses.append(response)
-                        self._current_response = response
-                        yield response
-                    self._consumed = True
-                except StopAsyncIteration:
-                    self._consumed = True
+                async for response in self._async_generator:
+                    self._responses.append(response)
+                    self._current_response = response
+                    yield response
+                self._consumed = True
             return _consume_and_yield()
@@ -378,14 +443,39 @@ class ChatAgent(BaseAgent):
             for individual tool execution. If None, wait indefinitely.
         mask_tool_output (Optional[bool]): Whether to return a sanitized
             placeholder instead of the raw tool output. (default: :obj:`False`)
-        pause_event (Optional[asyncio.Event]): Event to signal pause of the
-            agent's operation. When clear, the agent will pause its execution.
-            (default: :obj:`None`)
+        pause_event (Optional[Union[threading.Event, asyncio.Event]]): Event to
+            signal pause of the agent's operation. When clear, the agent will
+            pause its execution. Use threading.Event for sync operations or
+            asyncio.Event for async operations. (default: :obj:`None`)
         prune_tool_calls_from_memory (bool): Whether to clean tool
             call messages from memory after response generation to save token
             usage. When enabled, removes FUNCTION/TOOL role messages and
             ASSISTANT messages with tool_calls after each step.
             (default: :obj:`False`)
+        enable_tool_output_cache (bool, optional): Whether to offload verbose
+            historical tool outputs to a local cache and replace them with
+            lightweight references in memory. Only older tool results whose
+            payload length exceeds ``tool_output_cache_threshold`` are cached.
+            (default: :obj:`True`)
+        tool_output_cache_threshold (int, optional): Minimum character length
+            of a tool result before it becomes eligible for caching. Values
+            below or equal to zero disable caching regardless of the toggle.
+            (default: :obj:`2000`)
+        tool_output_cache_dir (Optional[Union[str, Path]], optional): Target
+            directory for cached tool outputs. When omitted, a ``tool_cache``
+            directory relative to the current working directory is used.
+            (default: :obj:`None`)
+        retry_attempts (int, optional): Maximum number of retry attempts for
+            rate limit errors. (default: :obj:`3`)
+        retry_delay (float, optional): Initial delay in seconds between
+            retries. Uses exponential backoff. (default: :obj:`1.0`)
+        step_timeout (Optional[float], optional): Timeout in seconds for the
+            entire step operation. If None, no timeout is applied.
+            (default: :obj:`None`)
+        stream_accumulate (bool, optional): When True, partial streaming
+            updates return accumulated content (current behavior). When False,
+            partial updates return only the incremental delta. (default:
+            :obj:`True`)
     """
     def __init__(
@@ -424,8 +514,15 @@ class ChatAgent(BaseAgent):
         stop_event: Optional[threading.Event] = None,
         tool_execution_timeout: Optional[float] = None,
         mask_tool_output: bool = False,
-        pause_event: Optional[asyncio.Event] = None,
+        pause_event: Optional[Union[threading.Event, asyncio.Event]] = None,
         prune_tool_calls_from_memory: bool = False,
+        enable_tool_output_cache: bool = True,
+        tool_output_cache_threshold: int = 2000,
+        tool_output_cache_dir: Optional[Union[str, Path]] = None,
+        retry_attempts: int = 3,
+        retry_delay: float = 1.0,
+        step_timeout: Optional[float] = None,
+        stream_accumulate: bool = True,
     ) -> None:
         if isinstance(model, ModelManager):
             self.model_backend = model
@@ -441,6 +538,28 @@ class ChatAgent(BaseAgent):
         # Assign unique ID
         self.agent_id = agent_id if agent_id else str(uuid.uuid4())
+        self._tool_output_cache_enabled = (
+            enable_tool_output_cache and tool_output_cache_threshold > 0
+        )
+        self._tool_output_cache_threshold = max(0, tool_output_cache_threshold)
+        self._tool_output_cache_dir: Optional[Path]
+        self._tool_output_cache_manager: Optional[_ToolOutputCacheManager]
+        if self._tool_output_cache_enabled:
+            cache_dir = (
+                Path(tool_output_cache_dir).expanduser()
+                if tool_output_cache_dir is not None
+                else Path("tool_cache")
+            )
+            self._tool_output_cache_dir = cache_dir
+            self._tool_output_cache_manager = _ToolOutputCacheManager(
+                cache_dir
+            )
+        else:
+            self._tool_output_cache_dir = None
+            self._tool_output_cache_manager = None
+        self._tool_output_history: List[_ToolOutputHistoryEntry] = []
+        self._cache_lookup_tool_name = "retrieve_cached_tool_output"
         # Set up memory
         context_creator = ScoreBasedContextCreator(
             self.model_backend.token_counter,
@@ -487,6 +606,8 @@ class ChatAgent(BaseAgent):
                 convert_to_function_tool(tool) for tool in (tools or [])
             ]
         }
+        if self._tool_output_cache_enabled:
+            self._ensure_tool_cache_lookup_tool()
         # Register agent with toolkits that have RegisteredAgentToolkit mixin
         if toolkits_to_register_agent:
@@ -509,13 +630,22 @@ class ChatAgent(BaseAgent):
         self.tool_execution_timeout = tool_execution_timeout
         self.mask_tool_output = mask_tool_output
         self._secure_result_store: Dict[str, Any] = {}
+        self._secure_result_store_lock = threading.Lock()
         self.pause_event = pause_event
         self.prune_tool_calls_from_memory = prune_tool_calls_from_memory
+        self.retry_attempts = max(1, retry_attempts)
+        self.retry_delay = max(0.0, retry_delay)
+        self.step_timeout = step_timeout
+        self._context_utility: Optional[ContextUtility] = None
+        self._context_summary_agent: Optional["ChatAgent"] = None
+        self.stream_accumulate = stream_accumulate
     def reset(self):
         r"""Resets the :obj:`ChatAgent` to its initial state."""
         self.terminated = False
         self.init_messages()
+        if self._tool_output_cache_enabled:
+            self._tool_output_history.clear()
         for terminator in self.response_terminators:
             terminator.reset()
@@ -699,6 +829,20 @@ class ChatAgent(BaseAgent):
         # Ensure the new memory has the system message
         self.init_messages()
+    def set_context_utility(
+        self, context_utility: Optional[ContextUtility]
+    ) -> None:
+        r"""Set the context utility for the agent.
+        This allows external components (like SingleAgentWorker) to provide
+        a shared context utility instance for workflow management.
+        Args:
+            context_utility (ContextUtility, optional): The context utility
+                to use. If None, the agent will create its own when needed.
+        """
+        self._context_utility = context_utility
     def _get_full_tool_schemas(self) -> List[Dict[str, Any]]:
         r"""Returns a list of tool schemas of all tools, including internal
         and external tools.
@@ -722,6 +866,178 @@ class ChatAgent(BaseAgent):
         for tool in tools:
             self.add_tool(tool)
+    def retrieve_cached_tool_output(self, cache_id: str) -> str:
+        r"""Load a cached tool output by its cache identifier.
+        Args:
+            cache_id (str): Identifier provided in cached tool messages.
+        Returns:
+            str: The cached content or an explanatory error message.
+        """
+        if not self._tool_output_cache_manager:
+            return "Tool output caching is disabled for this agent instance."
+        normalized_cache_id = cache_id.strip()
+        if not normalized_cache_id:
+            return "Please provide a non-empty cache_id."
+        try:
+            return self._tool_output_cache_manager.load(normalized_cache_id)
+        except FileNotFoundError:
+            return (
+                f"Cache entry '{normalized_cache_id}' was not found. "
+                "Verify the identifier and try again."
+            )
+    def _ensure_tool_cache_lookup_tool(self) -> None:
+        if not self._tool_output_cache_enabled:
+            return
+        lookup_name = self._cache_lookup_tool_name
+        if lookup_name in self._internal_tools:
+            return
+        lookup_tool = convert_to_function_tool(
+            self.retrieve_cached_tool_output
+        )
+        self._internal_tools[lookup_tool.get_function_name()] = lookup_tool
+    def _serialize_tool_result(self, result: Any) -> str:
+        if isinstance(result, str):
+            return result
+        try:
+            return json.dumps(result, ensure_ascii=False)
+        except (TypeError, ValueError):
+            return str(result)
+    def _summarize_tool_result(self, text: str, limit: int = 160) -> str:
+        normalized = re.sub(r"\s+", " ", text).strip()
+        if len(normalized) <= limit:
+            return normalized
+        return normalized[: max(0, limit - 3)].rstrip() + "..."
+    def _register_tool_output_for_cache(
+        self,
+        func_name: str,
+        tool_call_id: str,
+        result_text: str,
+        records: List[MemoryRecord],
+    ) -> None:
+        if not records:
+            return
+        entry = _ToolOutputHistoryEntry(
+            tool_name=func_name,
+            tool_call_id=tool_call_id,
+            result_text=result_text,
+            record_uuids=[str(record.uuid) for record in records],
+            record_timestamps=[record.timestamp for record in records],
+            preview_text=self._summarize_tool_result(result_text),
+        )
+        self._tool_output_history.append(entry)
+        self._process_tool_output_cache()
+    def _process_tool_output_cache(self) -> None:
+        if (
+            not self._tool_output_cache_enabled
+            or not self._tool_output_history
+            or self._tool_output_cache_manager is None
+        ):
+            return
+        # Only cache older results; keep the latest expanded for immediate use.
+        for entry in self._tool_output_history[:-1]:
+            if entry.cached:
+                continue
+            if len(entry.result_text) < self._tool_output_cache_threshold:
+                continue
+            self._cache_tool_output_entry(entry)
+    def _cache_tool_output_entry(self, entry: _ToolOutputHistoryEntry) -> None:
+        if self._tool_output_cache_manager is None or not entry.record_uuids:
+            return
+        try:
+            cache_id, cache_path = self._tool_output_cache_manager.save(
+                entry.tool_name,
+                entry.tool_call_id,
+                entry.result_text,
+            )
+        except Exception as exc:  # pragma: no cover - defensive
+            logger.warning(
+                "Failed to persist cached tool output for %s (%s): %s",
+                entry.tool_name,
+                entry.tool_call_id,
+                exc,
+            )
+            return
+        timestamp = (
+            entry.record_timestamps[0]
+            if entry.record_timestamps
+            else time.time_ns() / 1_000_000_000
+        )
+        reference_message = FunctionCallingMessage(
+            role_name=self.role_name,
+            role_type=self.role_type,
+            meta_dict={
+                "cache_id": cache_id,
+                "cached_preview": entry.preview_text,
+                "cached_tool_output_path": str(cache_path),
+            },
+            content="",
+            func_name=entry.tool_name,
+            result=self._build_cache_reference_text(entry, cache_id),
+            tool_call_id=entry.tool_call_id,
+        )
+        chat_history_block = getattr(self.memory, "_chat_history_block", None)
+        storage = getattr(chat_history_block, "storage", None)
+        if storage is None:
+            return
+        existing_records = storage.load()
+        updated_records = [
+            record
+            for record in existing_records
+            if record["uuid"] not in entry.record_uuids
+        ]
+        new_record = MemoryRecord(
+            message=reference_message,
+            role_at_backend=OpenAIBackendRole.FUNCTION,
+            timestamp=timestamp,
+            agent_id=self.agent_id,
+        )
+        updated_records.append(new_record.to_dict())
+        updated_records.sort(key=lambda record: record["timestamp"])
+        storage.clear()
+        storage.save(updated_records)
+        logger.info(
+            "Cached tool output '%s' (%s) to %s with cache_id=%s",
+            entry.tool_name,
+            entry.tool_call_id,
+            cache_path,
+            cache_id,
+        )
+        entry.cached = True
+        entry.cache_id = cache_id
+        entry.record_uuids = [str(new_record.uuid)]
+        entry.record_timestamps = [timestamp]
+    def _build_cache_reference_text(
+        self, entry: _ToolOutputHistoryEntry, cache_id: str
+    ) -> str:
+        preview = entry.preview_text or "[no preview available]"
+        return (
+            "[cached tool output]\n"
+            f"tool: {entry.tool_name}\n"
+            f"cache_id: {cache_id}\n"
+            f"preview: {preview}\n"
+            f"Use `{self._cache_lookup_tool_name}` with this cache_id to "
+            "retrieve the full content."
+        )
     def add_external_tool(
         self, tool: Union[FunctionTool, Callable, Dict[str, Any]]
     ) -> None:
@@ -766,7 +1082,8 @@ class ChatAgent(BaseAgent):
         message: BaseMessage,
         role: OpenAIBackendRole,
         timestamp: Optional[float] = None,
-    ) -> None:
+        return_records: bool = False,
+    ) -> Optional[List[MemoryRecord]]:
         r"""Updates the agent memory with a new message.
         If the single *message* exceeds the model's context window, it will
@@ -786,24 +1103,29 @@ class ChatAgent(BaseAgent):
             timestamp (Optional[float], optional): Custom timestamp for the
                 memory record. If `None`, the current time will be used.
                 (default: :obj:`None`)
-                    (default: obj:`None`)
+            return_records (bool, optional): When ``True`` the method returns
+                the list of :class:`MemoryRecord` objects written to memory.
+                (default: :obj:`False`)
+        Returns:
+            Optional[List[MemoryRecord]]: The records that were written when
+            ``return_records`` is ``True``; otherwise ``None``.
         """
-        import math
-        import time
-        import uuid as _uuid
+        written_records: List[MemoryRecord] = []
         # 1. Helper to write a record to memory
         def _write_single_record(
             message: BaseMessage, role: OpenAIBackendRole, timestamp: float
         ):
-            self.memory.write_record(
-                MemoryRecord(
-                    message=message,
-                    role_at_backend=role,
-                    timestamp=timestamp,
-                    agent_id=self.agent_id,
-                )
+            record = MemoryRecord(
+                message=message,
+                role_at_backend=role,
+                timestamp=timestamp,
+                agent_id=self.agent_id,
             )
+            written_records.append(record)
+            self.memory.write_record(record)
         base_ts = (
             timestamp
@@ -818,26 +1140,30 @@ class ChatAgent(BaseAgent):
             token_limit = context_creator.token_limit
         except AttributeError:
             _write_single_record(message, role, base_ts)
-            return
+            return written_records if return_records else None
         # 3. Check if slicing is necessary
         try:
             current_tokens = token_counter.count_tokens_from_messages(
                 [message.to_openai_message(role)]
             )
-            _, ctx_tokens = self.memory.get_context()
+            with warnings.catch_warnings():
+                warnings.filterwarnings("ignore", category=EmptyMemoryWarning)
+                _, ctx_tokens = self.memory.get_context()
             remaining_budget = max(0, token_limit - ctx_tokens)
             if current_tokens <= remaining_budget:
                 _write_single_record(message, role, base_ts)
-                return
+                return written_records if return_records else None
         except Exception as e:
             logger.warning(
                 f"Token calculation failed before chunking, "
                 f"writing message as-is. Error: {e}"
             )
             _write_single_record(message, role, base_ts)
-            return
+            return written_records if return_records else None
         # 4. Perform slicing
         logger.warning(
@@ -858,18 +1184,18 @@ class ChatAgent(BaseAgent):
         if not text_to_chunk or not text_to_chunk.strip():
             _write_single_record(message, role, base_ts)
-            return
+            return written_records if return_records else None
         # Encode the entire text to get a list of all token IDs
         try:
             all_token_ids = token_counter.encode(text_to_chunk)
         except Exception as e:
             logger.error(f"Failed to encode text for chunking: {e}")
             _write_single_record(message, role, base_ts)  # Fallback
-            return
+            return written_records if return_records else None
         if not all_token_ids:
             _write_single_record(message, role, base_ts)  # Nothing to chunk
-            return
+            return written_records if return_records else None
         # 1.  Base chunk size: one-tenth of the smaller of (a) total token
         # limit and (b) current remaining budget.  This prevents us from
@@ -894,7 +1220,7 @@ class ChatAgent(BaseAgent):
         # 4.  Calculate how many chunks we will need with this body size.
         num_chunks = math.ceil(len(all_token_ids) / chunk_body_limit)
-        group_id = str(_uuid.uuid4())
+        group_id = str(uuid.uuid4())
         for i in range(num_chunks):
             start_idx = i * chunk_body_limit
@@ -935,6 +1261,8 @@ class ChatAgent(BaseAgent):
             # Increment timestamp slightly to maintain order
             _write_single_record(new_msg, role, base_ts + i * 1e-6)
+        return written_records if return_records else None
     def load_memory(self, memory: AgentMemory) -> None:
         r"""Load the provided memory into the agent.
@@ -1012,6 +1340,242 @@ class ChatAgent(BaseAgent):
         json_store.save(to_save)
         logger.info(f"Memory saved to {path}")
+    def summarize(
+        self,
+        filename: Optional[str] = None,
+        summary_prompt: Optional[str] = None,
+        response_format: Optional[Type[BaseModel]] = None,
+        working_directory: Optional[Union[str, Path]] = None,
+    ) -> Dict[str, Any]:
+        r"""Summarize the agent's current conversation context and persist it
+        to a markdown file.
+        Args:
+            filename (Optional[str]): The base filename (without extension) to
+                use for the markdown file. Defaults to a timestamped name when
+                not provided.
+            summary_prompt (Optional[str]): Custom prompt for the summarizer.
+                When omitted, a default prompt highlighting key decisions,
+                action items, and open questions is used.
+            response_format (Optional[Type[BaseModel]]): A Pydantic model
+                defining the expected structure of the response. If provided,
+                the summary will be generated as structured output and included
+                in the result.
+            working_directory (Optional[str|Path]): Optional directory to save
+                the markdown summary file. If provided, overrides the default
+                directory used by ContextUtility.
+        Returns:
+            Dict[str, Any]: A dictionary containing the summary text, file
+                path, status message, and optionally structured_summary if
+                response_format was provided.
+        """
+        result: Dict[str, Any] = {
+            "summary": "",
+            "file_path": None,
+            "status": "",
+        }
+        try:
+            # Use external context if set, otherwise create local one
+            if self._context_utility is None:
+                if working_directory is not None:
+                    self._context_utility = ContextUtility(
+                        working_directory=str(working_directory)
+                    )
+                else:
+                    self._context_utility = ContextUtility()
+            context_util = self._context_utility
+            # Get conversation directly from agent's memory
+            messages, _ = self.memory.get_context()
+            if not messages:
+                status_message = (
+                    "No conversation context available to summarize."
+                )
+                result["status"] = status_message
+                return result
+            # Convert messages to conversation text
+            conversation_lines = []
+            for message in messages:
+                role = message.get('role', 'unknown')
+                content = message.get('content', '')
+                # Handle tool call messages (assistant calling tools)
+                tool_calls = message.get('tool_calls')
+                if tool_calls and isinstance(tool_calls, (list, tuple)):
+                    for tool_call in tool_calls:
+                        # Handle both dict and object formats
+                        if isinstance(tool_call, dict):
+                            func_name = tool_call.get('function', {}).get(
+                                'name', 'unknown_tool'
+                            )
+                            func_args_str = tool_call.get('function', {}).get(
+                                'arguments', '{}'
+                            )
+                        else:
+                            # Handle object format (Pydantic or similar)
+                            func_name = getattr(
+                                getattr(tool_call, 'function', None),
+                                'name',
+                                'unknown_tool',
+                            )
+                            func_args_str = getattr(
+                                getattr(tool_call, 'function', None),
+                                'arguments',
+                                '{}',
+                            )
+                        # Parse and format arguments for readability
+                        try:
+                            import json
+                            args_dict = json.loads(func_args_str)
+                            args_formatted = ', '.join(
+                                f"{k}={v}" for k, v in args_dict.items()
+                            )
+                        except (json.JSONDecodeError, ValueError, TypeError):
+                            args_formatted = func_args_str
+                        conversation_lines.append(
+                            f"[TOOL CALL] {func_name}({args_formatted})"
+                        )
+                # Handle tool response messages
+                elif role == 'tool':
+                    tool_name = message.get('name', 'unknown_tool')
+                    if not content:
+                        content = str(message.get('content', ''))
+                    conversation_lines.append(
+                        f"[TOOL RESULT] {tool_name} → {content}"
+                    )
+                # Handle regular content messages (user/assistant/system)
+                elif content:
+                    conversation_lines.append(f"{role}: {content}")
+            conversation_text = "\n".join(conversation_lines).strip()
+            if not conversation_text:
+                status_message = (
+                    "Conversation context is empty; skipping summary."
+                )
+                result["status"] = status_message
+                return result
+            if self._context_summary_agent is None:
+                self._context_summary_agent = ChatAgent(
+                    system_message=(
+                        "You are a helpful assistant that summarizes "
+                        "conversations"
+                    ),
+                    model=self.model_backend,
+                    agent_id=f"{self.agent_id}_context_summarizer",
+                )
+            else:
+                self._context_summary_agent.reset()
+            if summary_prompt:
+                prompt_text = (
+                    f"{summary_prompt.rstrip()}\n\n"
+                    f"AGENT CONVERSATION TO BE SUMMARIZED:\n"
+                    f"{conversation_text}"
+                )
+            else:
+                prompt_text = (
+                    "Summarize the context information in concise markdown "
+                    "bullet points highlighting key decisions, action items.\n"
+                    f"Context information:\n{conversation_text}"
+                )
+            try:
+                # Use structured output if response_format is provided
+                if response_format:
+                    response = self._context_summary_agent.step(
+                        prompt_text, response_format=response_format
+                    )
+                else:
+                    response = self._context_summary_agent.step(prompt_text)
+            except Exception as step_exc:
+                error_message = (
+                    f"Failed to generate summary using model: {step_exc}"
+                )
+                logger.error(error_message)
+                result["status"] = error_message
+                return result
+            if not response.msgs:
+                status_message = (
+                    "Failed to generate summary from model response."
+                )
+                result["status"] = status_message
+                return result
+            summary_content = response.msgs[-1].content.strip()
+            if not summary_content:
+                status_message = "Generated summary is empty."
+                result["status"] = status_message
+                return result
+            base_filename = (
+                filename
+                if filename
+                else f"context_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}"  # noqa: E501
+            )
+            base_filename = Path(base_filename).with_suffix("").name
+            metadata = context_util.get_session_metadata()
+            metadata.update(
+                {
+                    "agent_id": self.agent_id,
+                    "message_count": len(messages),
+                }
+            )
+            # Handle structured output if response_format was provided
+            structured_output = None
+            if response_format and response.msgs[-1].parsed:
+                structured_output = response.msgs[-1].parsed
+                # Convert structured output to custom markdown
+                summary_content = context_util.structured_output_to_markdown(
+                    structured_data=structured_output, metadata=metadata
+                )
+            # Save the markdown (either custom structured or default)
+            save_status = context_util.save_markdown_file(
+                base_filename,
+                summary_content,
+                title="Conversation Summary"
+                if not structured_output
+                else None,
+                metadata=metadata if not structured_output else None,
+            )
+            file_path = (
+                context_util.get_working_directory() / f"{base_filename}.md"
+            )
+            # Prepare result dictionary
+            result_dict = {
+                "summary": summary_content,
+                "file_path": str(file_path),
+                "status": save_status,
+                "structured_summary": structured_output,
+            }
+            result.update(result_dict)
+            logger.info("Conversation summary saved to %s", file_path)
+            return result
+        except Exception as exc:
+            error_message = f"Failed to summarize conversation context: {exc}"
+            logger.error(error_message)
+            result["status"] = error_message
+            return result
     def clear_memory(self) -> None:
         r"""Clear the agent's memory and reset to initial state.
@@ -1019,6 +1583,9 @@ class ChatAgent(BaseAgent):
             None
         """
         self.memory.clear()
+        if self._tool_output_cache_enabled:
+            self._tool_output_history.clear()
         if self.system_message is not None:
             self.update_memory(self.system_message, OpenAIBackendRole.SYSTEM)
@@ -1054,8 +1621,6 @@ class ChatAgent(BaseAgent):
         r"""Initializes the stored messages list with the current system
         message.
         """
-        import time
         self.memory.clear()
         # avoid UserWarning: The `ChatHistoryMemory` is empty.
         if self.system_message is not None:
@@ -1068,6 +1633,17 @@ class ChatAgent(BaseAgent):
                 )
             )
+    def reset_to_original_system_message(self) -> None:
+        r"""Reset system message to original, removing any appended context.
+        This method reverts the agent's system message back to its original
+        state, removing any workflow context or other modifications that may
+        have been appended. Useful for resetting agent state in multi-turn
+        scenarios.
+        """
+        self._system_message = self._original_system_message
+        self.init_messages()
     def record_message(self, message: BaseMessage) -> None:
         r"""Records the externally provided message into the agent memory as if
         it were an answer of the :obj:`ChatAgent` from the backend. Currently,
@@ -1129,7 +1705,7 @@ class ChatAgent(BaseAgent):
             # Create a prompt based on the schema
             format_instruction = (
-                "\n\nPlease respond in the following JSON format:\n" "{\n"
+                "\n\nPlease respond in the following JSON format:\n{\n"
             )
             properties = schema.get("properties", {})
@@ -1216,6 +1792,33 @@ class ChatAgent(BaseAgent):
         # and True to indicate we used prompt formatting
         return modified_message, None, True
+    def _is_called_from_registered_toolkit(self) -> bool:
+        r"""Check if current step/astep call originates from a
+        RegisteredAgentToolkit.
+        This method uses stack inspection to detect if the current call
+        is originating from a toolkit that inherits from
+        RegisteredAgentToolkit. When detected, tools should be disabled to
+        prevent recursive calls.
+        Returns:
+            bool: True if called from a RegisteredAgentToolkit, False otherwise
+        """
+        from camel.toolkits.base import RegisteredAgentToolkit
+        try:
+            for frame_info in inspect.stack():
+                frame_locals = frame_info.frame.f_locals
+                if 'self' in frame_locals:
+                    caller_self = frame_locals['self']
+                    if isinstance(caller_self, RegisteredAgentToolkit):
+                        return True
+        except Exception:
+            return False
+        return False
     def _apply_prompt_based_parsing(
         self,
         response: ModelResponse,
@@ -1232,7 +1835,6 @@ class ChatAgent(BaseAgent):
                 try:
                     # Try to extract JSON from the response content
                     import json
-                    import re
                     from pydantic import ValidationError
@@ -1271,8 +1873,7 @@ class ChatAgent(BaseAgent):
                     if not message.parsed:
                         logger.warning(
-                            f"Failed to parse JSON from response: "
-                            f"{content}"
+                            f"Failed to parse JSON from response: {content}"
                         )
                 except Exception as e:
@@ -1365,6 +1966,9 @@ class ChatAgent(BaseAgent):
                 a StreamingChatAgentResponse that behaves like
                 ChatAgentResponse but can also be iterated for
                 streaming updates.
+        Raises:
+            TimeoutError: If the step operation exceeds the configured timeout.
         """
         stream = self.model_backend.model_config_dict.get("stream", False)
@@ -1374,6 +1978,30 @@ class ChatAgent(BaseAgent):
             generator = self._stream(input_message, response_format)
             return StreamingChatAgentResponse(generator)
+        # Execute with timeout if configured
+        if self.step_timeout is not None:
+            with concurrent.futures.ThreadPoolExecutor(
+                max_workers=1
+            ) as executor:
+                future = executor.submit(
+                    self._step_impl, input_message, response_format
+                )
+                try:
+                    return future.result(timeout=self.step_timeout)
+                except concurrent.futures.TimeoutError:
+                    future.cancel()
+                    raise TimeoutError(
+                        f"Step timed out after {self.step_timeout}s"
+                    )
+        else:
+            return self._step_impl(input_message, response_format)
+    def _step_impl(
+        self,
+        input_message: Union[BaseMessage, str],
+        response_format: Optional[Type[BaseModel]] = None,
+    ) -> ChatAgentResponse:
+        r"""Implementation of non-streaming step logic."""
         # Set Langfuse session_id using agent_id for trace grouping
         try:
             from camel.utils.langfuse import set_current_agent_session_id
@@ -1382,6 +2010,10 @@ class ChatAgent(BaseAgent):
         except ImportError:
             pass  # Langfuse not available
+        # Check if this call is from a RegisteredAgentToolkit to prevent tool
+        # use
+        disable_tools = self._is_called_from_registered_toolkit()
         # Handle response format compatibility with non-strict tools
         original_response_format = response_format
         input_message, response_format, used_prompt_formatting = (
@@ -1413,8 +2045,13 @@ class ChatAgent(BaseAgent):
         while True:
             if self.pause_event is not None and not self.pause_event.is_set():
-                while not self.pause_event.is_set():
-                    time.sleep(0.001)
+                # Use efficient blocking wait for threading.Event
+                if isinstance(self.pause_event, threading.Event):
+                    self.pause_event.wait()
+                else:
+                    # Fallback for asyncio.Event in sync context
+                    while not self.pause_event.is_set():
+                        time.sleep(0.001)
             try:
                 openai_messages, num_tokens = self.memory.get_context()
@@ -1429,7 +2066,9 @@ class ChatAgent(BaseAgent):
                 num_tokens=num_tokens,
                 current_iteration=iteration_count,
                 response_format=response_format,
-                tool_schemas=self._get_full_tool_schemas(),
+                tool_schemas=[]
+                if disable_tools
+                else self._get_full_tool_schemas(),
                 prev_num_openai_messages=prev_num_openai_messages,
             )
             prev_num_openai_messages = len(openai_messages)
@@ -1444,7 +2083,7 @@ class ChatAgent(BaseAgent):
             if self.stop_event and self.stop_event.is_set():
                 # Use the _step_terminate to terminate the agent with reason
                 logger.info(
-                    f"Termination triggered at iteration " f"{iteration_count}"
+                    f"Termination triggered at iteration {iteration_count}"
                 )
                 return self._step_terminate(
                     accumulated_context_tokens,
@@ -1467,8 +2106,11 @@ class ChatAgent(BaseAgent):
                             self.pause_event is not None
                             and not self.pause_event.is_set()
                         ):
-                            while not self.pause_event.is_set():
-                                time.sleep(0.001)
+                            if isinstance(self.pause_event, threading.Event):
+                                self.pause_event.wait()
+                            else:
+                                while not self.pause_event.is_set():
+                                    time.sleep(0.001)
                         result = self._execute_tool(tool_call_request)
                         tool_call_records.append(result)
@@ -1544,6 +2186,10 @@ class ChatAgent(BaseAgent):
                 True, returns an AsyncStreamingChatAgentResponse that can be
                 awaited for the final result or async iterated for streaming
                 updates.
+        Raises:
+            asyncio.TimeoutError: If the step operation exceeds the configured
+                timeout.
         """
         try:
@@ -1559,9 +2205,22 @@ class ChatAgent(BaseAgent):
             async_generator = self._astream(input_message, response_format)
             return AsyncStreamingChatAgentResponse(async_generator)
         else:
-            return await self._astep_non_streaming_task(
-                input_message, response_format
-            )
+            if self.step_timeout is not None:
+                try:
+                    return await asyncio.wait_for(
+                        self._astep_non_streaming_task(
+                            input_message, response_format
+                        ),
+                        timeout=self.step_timeout,
+                    )
+                except asyncio.TimeoutError:
+                    raise asyncio.TimeoutError(
+                        f"Async step timed out after {self.step_timeout}s"
+                    )
+            else:
+                return await self._astep_non_streaming_task(
+                    input_message, response_format
+                )
     async def _astep_non_streaming_task(
         self,
@@ -1577,6 +2236,10 @@ class ChatAgent(BaseAgent):
         except ImportError:
             pass  # Langfuse not available
+        # Check if this call is from a RegisteredAgentToolkit to prevent tool
+        # use
+        disable_tools = self._is_called_from_registered_toolkit()
         # Handle response format compatibility with non-strict tools
         original_response_format = response_format
         input_message, response_format, used_prompt_formatting = (
@@ -1604,7 +2267,12 @@ class ChatAgent(BaseAgent):
         prev_num_openai_messages: int = 0
         while True:
             if self.pause_event is not None and not self.pause_event.is_set():
-                await self.pause_event.wait()
+                if isinstance(self.pause_event, asyncio.Event):
+                    await self.pause_event.wait()
+                elif isinstance(self.pause_event, threading.Event):
+                    # For threading.Event in async context, run in executor
+                    loop = asyncio.get_event_loop()
+                    await loop.run_in_executor(None, self.pause_event.wait)
             try:
                 openai_messages, num_tokens = self.memory.get_context()
                 accumulated_context_tokens += num_tokens
@@ -1612,13 +2280,14 @@ class ChatAgent(BaseAgent):
                 return self._step_terminate(
                     e.args[1], tool_call_records, "max_tokens_exceeded"
                 )
             response = await self._aget_model_response(
                 openai_messages,
                 num_tokens=num_tokens,
                 current_iteration=iteration_count,
                 response_format=response_format,
-                tool_schemas=self._get_full_tool_schemas(),
+                tool_schemas=[]
+                if disable_tools
+                else self._get_full_tool_schemas(),
                 prev_num_openai_messages=prev_num_openai_messages,
             )
             prev_num_openai_messages = len(openai_messages)
@@ -1633,7 +2302,7 @@ class ChatAgent(BaseAgent):
             if self.stop_event and self.stop_event.is_set():
                 # Use the _step_terminate to terminate the agent with reason
                 logger.info(
-                    f"Termination triggered at iteration " f"{iteration_count}"
+                    f"Termination triggered at iteration {iteration_count}"
                 )
                 return self._step_terminate(
                     accumulated_context_tokens,
@@ -1656,7 +2325,13 @@ class ChatAgent(BaseAgent):
                             self.pause_event is not None
                             and not self.pause_event.is_set()
                         ):
-                            await self.pause_event.wait()
+                            if isinstance(self.pause_event, asyncio.Event):
+                                await self.pause_event.wait()
+                            elif isinstance(self.pause_event, threading.Event):
+                                loop = asyncio.get_event_loop()
+                                await loop.run_in_executor(
+                                    None, self.pause_event.wait
+                                )
                         tool_call_record = await self._aexecute_tool(
                             tool_call_request
                         )
@@ -1776,64 +2451,61 @@ class ChatAgent(BaseAgent):
         tool_schemas: Optional[List[Dict[str, Any]]] = None,
         prev_num_openai_messages: int = 0,
     ) -> ModelResponse:
-        r"""Internal function for agent step model response.
-        Args:
-            openai_messages (List[OpenAIMessage]): The OpenAI
-                messages to process.
-            num_tokens (int): The number of tokens in the context.
-            current_iteration (int): The current iteration of the step.
-            response_format (Optional[Type[BaseModel]]): The response
-                format to use.
-            tool_schemas (Optional[List[Dict[str, Any]]]): The tool
-                schemas to use.
-            prev_num_openai_messages (int): The number of openai messages
-                logged in the previous iteration.
-        Returns:
-            ModelResponse: The model response.
-        """
+        r"""Internal function for agent step model response."""
+        last_error = None
-        response = None
-        try:
-            response = self.model_backend.run(
-                openai_messages, response_format, tool_schemas or None
-            )
-        except Exception as exc:
-            logger.error(
-                f"An error occurred while running model "
-                f"{self.model_backend.model_type}, "
-                f"index: {self.model_backend.current_model_index}",
-                exc_info=exc,
-            )
-            error_info = str(exc)
-        if not response and self.model_backend.num_models > 1:
-            raise ModelProcessingError(
-                "Unable to process messages: none of the provided models "
-                "run successfully."
-            )
-        elif not response:
+        for attempt in range(self.retry_attempts):
+            try:
+                response = self.model_backend.run(
+                    openai_messages, response_format, tool_schemas or None
+                )
+                if response:
+                    break
+            except RateLimitError as e:
+                last_error = e
+                if attempt < self.retry_attempts - 1:
+                    delay = min(self.retry_delay * (2**attempt), 60.0)
+                    delay = random.uniform(0, delay)  # Add jitter
+                    logger.warning(
+                        f"Rate limit hit (attempt {attempt + 1}"
+                        f"/{self.retry_attempts}). Retrying in {delay:.1f}s"
+                    )
+                    time.sleep(delay)
+                else:
+                    logger.error(
+                        f"Rate limit exhausted after "
+                        f"{self.retry_attempts} attempts"
+                    )
+            except Exception:
+                logger.error(
+                    f"Model error: {self.model_backend.model_type}",
+                    exc_info=True,
+                )
+                raise
+        else:
+            # Loop completed without success
             raise ModelProcessingError(
-                f"Unable to process messages: the only provided model "
-                f"did not run successfully. Error: {error_info}"
+                f"Unable to process messages: "
+                f"{str(last_error) if last_error else 'Unknown error'}"
             )
-        sanitized_messages = self._sanitize_messages_for_logging(
+        # Log success
+        sanitized = self._sanitize_messages_for_logging(
             openai_messages, prev_num_openai_messages
         )
         logger.info(
-            f"Model {self.model_backend.model_type}, "
-            f"index {self.model_backend.current_model_index}, "
-            f"iteration {current_iteration}, "
-            f"processed these messages: {sanitized_messages}"
+            f"Model {self.model_backend.model_type} "
+            f"[{current_iteration}]: {sanitized}"
         )
         if not isinstance(response, ChatCompletion):
             raise TypeError(
-                f"Expected response to be a `ChatCompletion` object, but "
-                f"got {type(response).__name__} instead."
+                f"Expected ChatCompletion, got {type(response).__name__}"
             )
         return self._handle_batch_response(response)
+    @observe()
     async def _aget_model_response(
         self,
         openai_messages: List[OpenAIMessage],
@@ -1843,62 +2515,59 @@ class ChatAgent(BaseAgent):
         tool_schemas: Optional[List[Dict[str, Any]]] = None,
         prev_num_openai_messages: int = 0,
     ) -> ModelResponse:
-        r"""Internal function for agent async step model response.
-        Args:
-            openai_messages (List[OpenAIMessage]): The OpenAI messages
-                to process.
-            num_tokens (int): The number of tokens in the context.
-            current_iteration (int): The current iteration of the step.
-            response_format (Optional[Type[BaseModel]]): The response
-                format to use.
-            tool_schemas (Optional[List[Dict[str, Any]]]): The tool schemas
-                to use.
-            prev_num_openai_messages (int): The number of openai messages
-                logged in the previous iteration.
-        Returns:
-            ModelResponse: The model response.
-        """
+        r"""Internal function for agent async step model response."""
+        last_error = None
-        response = None
-        try:
-            response = await self.model_backend.arun(
-                openai_messages, response_format, tool_schemas or None
-            )
-        except Exception as exc:
-            logger.error(
-                f"An error occurred while running model "
-                f"{self.model_backend.model_type}, "
-                f"index: {self.model_backend.current_model_index}",
-                exc_info=exc,
-            )
-            error_info = str(exc)
-        if not response and self.model_backend.num_models > 1:
-            raise ModelProcessingError(
-                "Unable to process messages: none of the provided models "
-                "run successfully."
-            )
-        elif not response:
+        for attempt in range(self.retry_attempts):
+            try:
+                response = await self.model_backend.arun(
+                    openai_messages, response_format, tool_schemas or None
+                )
+                if response:
+                    break
+            except RateLimitError as e:
+                last_error = e
+                if attempt < self.retry_attempts - 1:
+                    delay = min(self.retry_delay * (2**attempt), 60.0)
+                    delay = random.uniform(0, delay)  # Add jitter
+                    logger.warning(
+                        f"Rate limit hit (attempt {attempt + 1}"
+                        f"/{self.retry_attempts}). "
+                        f"Retrying in {delay:.1f}s"
+                    )
+                    await asyncio.sleep(delay)
+                else:
+                    logger.error(
+                        f"Rate limit exhausted after "
+                        f"{self.retry_attempts} attempts"
+                    )
+            except Exception:
+                logger.error(
+                    f"Model error: {self.model_backend.model_type}",
+                    exc_info=True,
+                )
+                raise
+        else:
+            # Loop completed without success
             raise ModelProcessingError(
-                f"Unable to process messages: the only provided model "
-                f"did not run successfully. Error: {error_info}"
+                f"Unable to process messages: "
+                f"{str(last_error) if last_error else 'Unknown error'}"
             )
-        sanitized_messages = self._sanitize_messages_for_logging(
+        # Log success
+        sanitized = self._sanitize_messages_for_logging(
             openai_messages, prev_num_openai_messages
         )
         logger.info(
-            f"Model {self.model_backend.model_type}, "
-            f"index {self.model_backend.current_model_index}, "
-            f"iteration {current_iteration}, "
-            f"processed these messages: {sanitized_messages}"
+            f"Model {self.model_backend.model_type} "
+            f"[{current_iteration}]: {sanitized}"
         )
         if not isinstance(response, ChatCompletion):
             raise TypeError(
-                f"Expected response to be a `ChatCompletion` object, but "
-                f"got {type(response).__name__} instead."
+                f"Expected ChatCompletion, got {type(response).__name__}"
             )
         return self._handle_batch_response(response)
     def _sanitize_messages_for_logging(
@@ -1915,11 +2584,6 @@ class ChatAgent(BaseAgent):
         Returns:
             List[OpenAIMessage]: The sanitized OpenAI messages.
         """
-        import hashlib
-        import os
-        import re
-        import tempfile
         # Create a copy of messages for logging to avoid modifying the
         # original messages
         sanitized_messages = []
@@ -1960,7 +2624,14 @@ class ChatAgent(BaseAgent):
                                     # Save image to temp directory for viewing
                                     try:
-                                        import base64
+                                        # Sanitize img_format to prevent path
+                                        # traversal
+                                        safe_format = re.sub(
+                                            r'[^a-zA-Z0-9]', '', img_format
+                                        )[:10]
+                                        img_filename = (
+                                            f"image_{img_hash}.{safe_format}"
+                                        )
                                         temp_dir = tempfile.gettempdir()
                                         img_path = os.path.join(
@@ -1975,6 +2646,9 @@ class ChatAgent(BaseAgent):
                                                         base64_data
                                                     )
                                                 )
+                                            # Register for cleanup
+                                            with _temp_files_lock:
+                                                _temp_files.add(img_path)
                                         # Create a file:// URL that can be
                                         # opened
@@ -2227,7 +2901,8 @@ class ChatAgent(BaseAgent):
         try:
             raw_result = tool(**args)
             if self.mask_tool_output:
-                self._secure_result_store[tool_call_id] = raw_result
+                with self._secure_result_store_lock:
+                    self._secure_result_store[tool_call_id] = raw_result
                 result = (
                     "[The tool has been executed successfully, but the output"
                     " from the tool is masked. You can move forward]"
@@ -2285,7 +2960,7 @@ class ChatAgent(BaseAgent):
             # Capture the error message to prevent framework crash
             error_msg = f"Error executing async tool '{func_name}': {e!s}"
             result = f"Tool execution failed: {error_msg}"
-            logging.warning(error_msg)
+            logger.warning(error_msg)
         return self._record_tool_calling(func_name, args, result, tool_call_id)
     def _record_tool_calling(
@@ -2336,20 +3011,22 @@ class ChatAgent(BaseAgent):
         # This ensures the assistant message (tool call) always appears before
         # the function message (tool result) in the conversation context
         # Use time.time_ns() for nanosecond precision to avoid collisions
-        import time
         current_time_ns = time.time_ns()
         base_timestamp = current_time_ns / 1_000_000_000  # Convert to seconds
         self.update_memory(
-            assist_msg, OpenAIBackendRole.ASSISTANT, timestamp=base_timestamp
+            assist_msg,
+            OpenAIBackendRole.ASSISTANT,
+            timestamp=base_timestamp,
+            return_records=self._tool_output_cache_enabled,
         )
         # Add minimal increment to ensure function message comes after
-        self.update_memory(
+        func_records = self.update_memory(
             func_msg,
             OpenAIBackendRole.FUNCTION,
             timestamp=base_timestamp + 1e-6,
+            return_records=self._tool_output_cache_enabled,
         )
         # Record information about this tool call
@@ -2360,6 +3037,20 @@ class ChatAgent(BaseAgent):
             tool_call_id=tool_call_id,
         )
+        if (
+            self._tool_output_cache_enabled
+            and not mask_output
+            and func_records
+            and self._tool_output_cache_manager is not None
+        ):
+            serialized_result = self._serialize_tool_result(result)
+            self._register_tool_output_for_cache(
+                func_name,
+                tool_call_id,
+                serialized_result,
+                cast(List[MemoryRecord], func_records),
+            )
         return tool_record
     def _stream(
@@ -2428,7 +3119,7 @@ class ChatAgent(BaseAgent):
             # Check termination condition
             if self.stop_event and self.stop_event.is_set():
                 logger.info(
-                    f"Termination triggered at iteration " f"{iteration_count}"
+                    f"Termination triggered at iteration {iteration_count}"
                 )
                 yield self._step_terminate(
                     num_tokens, tool_call_records, "termination_triggered"
@@ -2611,12 +3302,6 @@ class ChatAgent(BaseAgent):
         stream_completed = False
         for chunk in stream:
-            # Update token usage if available
-            if chunk.usage:
-                self._update_token_usage_tracker(
-                    step_token_usage, safe_model_dump(chunk.usage)
-                )
             # Process chunk delta
             if chunk.choices and len(chunk.choices) > 0:
                 choice = chunk.choices[0]
@@ -2649,12 +3334,6 @@ class ChatAgent(BaseAgent):
                     # If we have complete tool calls, execute them with
                     # sync status updates
                     if accumulated_tool_calls:
-                        # Record assistant message with tool calls first
-                        self._record_assistant_tool_calls_message(
-                            accumulated_tool_calls,
-                            content_accumulator.get_full_content(),
-                        )
                         # Execute tools synchronously with
                         # optimized status updates
                         for (
@@ -2687,7 +3366,49 @@ class ChatAgent(BaseAgent):
                             )
                         self.record_message(final_message)
-                    break
+            elif chunk.usage and not chunk.choices:
+                # Handle final chunk with usage but empty choices
+                # This happens when stream_options={"include_usage": True}
+                # Update the final usage from this chunk
+                self._update_token_usage_tracker(
+                    step_token_usage, safe_model_dump(chunk.usage)
+                )
+                # Create final response with final usage
+                final_content = content_accumulator.get_full_content()
+                if final_content.strip():
+                    final_message = BaseMessage(
+                        role_name=self.role_name,
+                        role_type=self.role_type,
+                        meta_dict={},
+                        content=final_content,
+                    )
+                    if response_format:
+                        self._try_format_message(
+                            final_message, response_format
+                        )
+                    # Create final response with final usage (not partial)
+                    final_response = ChatAgentResponse(
+                        msgs=[final_message],
+                        terminated=False,
+                        info={
+                            "id": getattr(chunk, 'id', ''),
+                            "usage": step_token_usage.copy(),
+                            "finish_reasons": ["stop"],
+                            "num_tokens": self._get_token_count(final_content),
+                            "tool_calls": tool_call_records or [],
+                            "external_tool_requests": None,
+                            "streaming": False,
+                            "partial": False,
+                        },
+                    )
+                    yield final_response
+                break
+            elif stream_completed:
+                # If we've already seen finish_reason but no usage chunk, exit
+                break
         return stream_completed, tool_calls_complete
@@ -2767,72 +3488,70 @@ class ChatAgent(BaseAgent):
         accumulated_tool_calls: Dict[str, Any],
         tool_call_records: List[ToolCallingRecord],
     ) -> Generator[ChatAgentResponse, None, None]:
-        r"""Execute multiple tools synchronously with
-        proper content accumulation, using threads+queue for
-        non-blocking status streaming."""
-        def tool_worker(result_queue, tool_call_data):
-            try:
-                tool_call_record = self._execute_tool_from_stream_data(
-                    tool_call_data
-                )
-                result_queue.put(tool_call_record)
-            except Exception as e:
-                logger.error(f"Error in threaded tool execution: {e}")
-                result_queue.put(None)
+        r"""Execute multiple tools synchronously with proper content
+        accumulation, using ThreadPoolExecutor for better timeout handling."""
         tool_calls_to_execute = []
         for _tool_call_index, tool_call_data in accumulated_tool_calls.items():
             if tool_call_data.get('complete', False):
                 tool_calls_to_execute.append(tool_call_data)
-        # Phase 2: Execute tools in threads and yield status while waiting
-        for tool_call_data in tool_calls_to_execute:
-            function_name = tool_call_data['function']['name']
-            try:
-                args = json.loads(tool_call_data['function']['arguments'])
-            except json.JSONDecodeError:
-                args = tool_call_data['function']['arguments']
-            result_queue: queue.Queue[Optional[ToolCallingRecord]] = (
-                queue.Queue()
-            )
-            thread = threading.Thread(
-                target=tool_worker,
-                args=(result_queue, tool_call_data),
-            )
-            thread.start()
-            # Log debug info instead of adding to content
-            logger.info(
-                f"Calling function: {function_name} with arguments: {args}"
-            )
-            # wait for tool thread to finish with optional timeout
-            thread.join(self.tool_execution_timeout)
+        if not tool_calls_to_execute:
+            # No tools to execute, return immediately
+            return
+            yield  # Make this a generator
+        # Execute tools using ThreadPoolExecutor for proper timeout handling
+        # Use max_workers=len() for parallel execution, with min of 1
+        with concurrent.futures.ThreadPoolExecutor(
+            max_workers=max(1, len(tool_calls_to_execute))
+        ) as executor:
+            # Submit all tools first (parallel execution)
+            futures_map = {}
+            for tool_call_data in tool_calls_to_execute:
+                function_name = tool_call_data['function']['name']
+                try:
+                    args = json.loads(tool_call_data['function']['arguments'])
+                except json.JSONDecodeError:
+                    args = tool_call_data['function']['arguments']
-            # If timeout occurred, mark as error and continue
-            if thread.is_alive():
-                # Log timeout info instead of adding to content
-                logger.warning(
-                    f"Function '{function_name}' timed out after "
-                    f"{self.tool_execution_timeout} seconds"
+                # Log debug info
+                logger.info(
+                    f"Calling function: {function_name} with arguments: {args}"
                 )
-                # Detach thread (it may still finish later). Skip recording.
-                continue
-            # Tool finished, get result
-            tool_call_record = result_queue.get()
-            if tool_call_record:
-                tool_call_records.append(tool_call_record)
-                raw_result = tool_call_record.result
-                result_str = str(raw_result)
+                # Submit tool execution (non-blocking)
+                future = executor.submit(
+                    self._execute_tool_from_stream_data, tool_call_data
+                )
+                futures_map[future] = (function_name, tool_call_data)
+            # Wait for all futures to complete (or timeout)
+            for future in concurrent.futures.as_completed(
+                futures_map.keys(),
+                timeout=self.tool_execution_timeout
+                if self.tool_execution_timeout
+                else None,
+            ):
+                function_name, tool_call_data = futures_map[future]
-                # Log debug info instead of adding to content
-                logger.info(f"Function output: {result_str}")
-            else:
-                # Error already logged
-                continue
+                try:
+                    tool_call_record = future.result()
+                    if tool_call_record:
+                        tool_call_records.append(tool_call_record)
+                        logger.info(
+                            f"Function output: {tool_call_record.result}"
+                        )
+                except concurrent.futures.TimeoutError:
+                    logger.warning(
+                        f"Function '{function_name}' timed out after "
+                        f"{self.tool_execution_timeout} seconds"
+                    )
+                    future.cancel()
+                except Exception as e:
+                    logger.error(
+                        f"Error executing tool '{function_name}': {e}"
+                    )
         # Ensure this function remains a generator (required by type signature)
         return
@@ -2852,10 +3571,19 @@ class ChatAgent(BaseAgent):
                 tool = self._internal_tools[function_name]
                 try:
                     result = tool(**args)
+                    # First, create and record the assistant message with tool
+                    # call
+                    assist_msg = FunctionCallingMessage(
+                        role_name=self.role_name,
+                        role_type=self.role_type,
+                        meta_dict=None,
+                        content="",
+                        func_name=function_name,
+                        args=args,
+                        tool_call_id=tool_call_id,
+                    )
-                    # Only record the tool response message, not the assistant
-                    # message assistant message with tool_calls was already
-                    # recorded in _record_assistant_tool_calls_message
+                    # Then create the tool response message
                     func_msg = FunctionCallingMessage(
                         role_name=self.role_name,
                         role_type=self.role_type,
@@ -2866,7 +3594,23 @@ class ChatAgent(BaseAgent):
                         tool_call_id=tool_call_id,
                     )
-                    self.update_memory(func_msg, OpenAIBackendRole.FUNCTION)
+                    # Record both messages with precise timestamps to ensure
+                    # correct ordering
+                    current_time_ns = time.time_ns()
+                    base_timestamp = (
+                        current_time_ns / 1_000_000_000
+                    )  # Convert to seconds
+                    self.update_memory(
+                        assist_msg,
+                        OpenAIBackendRole.ASSISTANT,
+                        timestamp=base_timestamp,
+                    )
+                    self.update_memory(
+                        func_msg,
+                        OpenAIBackendRole.FUNCTION,
+                        timestamp=base_timestamp + 1e-6,
+                    )
                     return ToolCallingRecord(
                         tool_name=function_name,
@@ -2880,7 +3624,7 @@ class ChatAgent(BaseAgent):
                         f"Error executing tool '{function_name}': {e!s}"
                     )
                     result = {"error": error_msg}
-                    logging.warning(error_msg)
+                    logger.warning(error_msg)
                     # Record error response
                     func_msg = FunctionCallingMessage(
@@ -2950,10 +3694,19 @@ class ChatAgent(BaseAgent):
                     else:
                         # Fallback: synchronous call
                         result = tool(**args)
+                    # First, create and record the assistant message with tool
+                    # call
+                    assist_msg = FunctionCallingMessage(
+                        role_name=self.role_name,
+                        role_type=self.role_type,
+                        meta_dict=None,
+                        content="",
+                        func_name=function_name,
+                        args=args,
+                        tool_call_id=tool_call_id,
+                    )
-                    # Only record the tool response message, not the assistant
-                    # message assistant message with tool_calls was already
-                    # recorded in _record_assistant_tool_calls_message
+                    # Then create the tool response message
                     func_msg = FunctionCallingMessage(
                         role_name=self.role_name,
                         role_type=self.role_type,
@@ -2964,7 +3717,23 @@ class ChatAgent(BaseAgent):
                         tool_call_id=tool_call_id,
                     )
-                    self.update_memory(func_msg, OpenAIBackendRole.FUNCTION)
+                    # Record both messages with precise timestamps to ensure
+                    # correct ordering
+                    current_time_ns = time.time_ns()
+                    base_timestamp = (
+                        current_time_ns / 1_000_000_000
+                    )  # Convert to seconds
+                    self.update_memory(
+                        assist_msg,
+                        OpenAIBackendRole.ASSISTANT,
+                        timestamp=base_timestamp,
+                    )
+                    self.update_memory(
+                        func_msg,
+                        OpenAIBackendRole.FUNCTION,
+                        timestamp=base_timestamp + 1e-6,
+                    )
                     return ToolCallingRecord(
                         tool_name=function_name,
@@ -2978,7 +3747,7 @@ class ChatAgent(BaseAgent):
                         f"Error executing async tool '{function_name}': {e!s}"
                     )
                     result = {"error": error_msg}
-                    logging.warning(error_msg)
+                    logger.warning(error_msg)
                     # Record error response
                     func_msg = FunctionCallingMessage(
@@ -3088,7 +3857,7 @@ class ChatAgent(BaseAgent):
             # Check termination condition
             if self.stop_event and self.stop_event.is_set():
                 logger.info(
-                    f"Termination triggered at iteration " f"{iteration_count}"
+                    f"Termination triggered at iteration {iteration_count}"
                 )
                 yield self._step_terminate(
                     num_tokens, tool_call_records, "termination_triggered"
@@ -3315,18 +4084,13 @@ class ChatAgent(BaseAgent):
         response_format: Optional[Type[BaseModel]] = None,
     ) -> AsyncGenerator[Union[ChatAgentResponse, Tuple[bool, bool]], None]:
         r"""Async version of process streaming chunks with
-        content accumulator."""
+        content accumulator.
+        """
         tool_calls_complete = False
         stream_completed = False
         async for chunk in stream:
-            # Update token usage if available
-            if chunk.usage:
-                self._update_token_usage_tracker(
-                    step_token_usage, safe_model_dump(chunk.usage)
-                )
             # Process chunk delta
             if chunk.choices and len(chunk.choices) > 0:
                 choice = chunk.choices[0]
@@ -3359,13 +4123,6 @@ class ChatAgent(BaseAgent):
                     # If we have complete tool calls, execute them with
                     # async status updates
                     if accumulated_tool_calls:
-                        # Record assistant message with
-                        # tool calls first
-                        self._record_assistant_tool_calls_message(
-                            accumulated_tool_calls,
-                            content_accumulator.get_full_content(),
-                        )
                         # Execute tools asynchronously with real-time
                         # status updates
                         async for (
@@ -3400,7 +4157,49 @@ class ChatAgent(BaseAgent):
                             )
                         self.record_message(final_message)
-                    break
+            elif chunk.usage and not chunk.choices:
+                # Handle final chunk with usage but empty choices
+                # This happens when stream_options={"include_usage": True}
+                # Update the final usage from this chunk
+                self._update_token_usage_tracker(
+                    step_token_usage, safe_model_dump(chunk.usage)
+                )
+                # Create final response with final usage
+                final_content = content_accumulator.get_full_content()
+                if final_content.strip():
+                    final_message = BaseMessage(
+                        role_name=self.role_name,
+                        role_type=self.role_type,
+                        meta_dict={},
+                        content=final_content,
+                    )
+                    if response_format:
+                        self._try_format_message(
+                            final_message, response_format
+                        )
+                    # Create final response with final usage (not partial)
+                    final_response = ChatAgentResponse(
+                        msgs=[final_message],
+                        terminated=False,
+                        info={
+                            "id": getattr(chunk, 'id', ''),
+                            "usage": step_token_usage.copy(),
+                            "finish_reasons": ["stop"],
+                            "num_tokens": self._get_token_count(final_content),
+                            "tool_calls": tool_call_records or [],
+                            "external_tool_requests": None,
+                            "streaming": False,
+                            "partial": False,
+                        },
+                    )
+                    yield final_response
+                break
+            elif stream_completed:
+                # If we've already seen finish_reason but no usage chunk, exit
+                break
         # Yield the final status as a tuple
         yield (stream_completed, tool_calls_complete)
@@ -3493,15 +4292,18 @@ class ChatAgent(BaseAgent):
     ) -> ChatAgentResponse:
         r"""Create a streaming response using content accumulator."""
-        # Add new content to accumulator and get full content
+        # Add new content; only build full content when needed
         accumulator.add_streaming_content(new_content)
-        full_content = accumulator.get_full_content()
+        if self.stream_accumulate:
+            message_content = accumulator.get_full_content()
+        else:
+            message_content = new_content
         message = BaseMessage(
             role_name=self.role_name,
             role_type=self.role_type,
             meta_dict={},
-            content=full_content,
+            content=message_content,
         )
         return ChatAgentResponse(
@@ -3511,7 +4313,7 @@ class ChatAgent(BaseAgent):
                 "id": response_id,
                 "usage": step_token_usage.copy(),
                 "finish_reasons": ["streaming"],
-                "num_tokens": self._get_token_count(full_content),
+                "num_tokens": self._get_token_count(message_content),
                 "tool_calls": tool_call_records or [],
                 "external_tool_requests": None,
                 "streaming": True,
@@ -3567,10 +4369,12 @@ class ChatAgent(BaseAgent):
                 configuration.
         """
         # Create a new instance with the same configuration
-        # If with_memory is True, set system_message to None
-        # If with_memory is False, use the original system message
+        # If with_memory is True, set system_message to None (it will be
+        # copied from memory below, including any workflow context)
+        # If with_memory is False, use the current system message
+        # (which may include appended workflow context)
         # To avoid duplicated system memory.
-        system_message = None if with_memory else self._original_system_message
+        system_message = None if with_memory else self._system_message
         # Clone tools and collect toolkits that need registration
         cloned_tools, toolkits_to_register = self._clone_tools()
@@ -3584,7 +4388,7 @@ class ChatAgent(BaseAgent):
                 self.memory.get_context_creator(), "token_limit", None
             ),
             output_language=self._output_language,
-            tools=cloned_tools,
+            tools=cast(List[Union[FunctionTool, Callable]], cloned_tools),
             toolkits_to_register_agent=toolkits_to_register,
             external_tools=[
                 schema for schema in self._external_tool_schemas.values()
@@ -3598,6 +4402,10 @@ class ChatAgent(BaseAgent):
             tool_execution_timeout=self.tool_execution_timeout,
             pause_event=self.pause_event,
             prune_tool_calls_from_memory=self.prune_tool_calls_from_memory,
+            enable_tool_output_cache=self._tool_output_cache_enabled,
+            tool_output_cache_threshold=self._tool_output_cache_threshold,
+            tool_output_cache_dir=self._tool_output_cache_dir,
+            stream_accumulate=self.stream_accumulate,
         )
         # Copy memory if requested
@@ -3612,9 +4420,7 @@ class ChatAgent(BaseAgent):
     def _clone_tools(
         self,
-    ) -> Tuple[
-        List[Union[FunctionTool, Callable]], List[RegisteredAgentToolkit]
-    ]:
+    ) -> Tuple[List[FunctionTool], List[RegisteredAgentToolkit]]:
         r"""Clone tools and return toolkits that need agent registration.
         This method handles stateful toolkits by cloning them if they have
@@ -3666,18 +4472,62 @@ class ChatAgent(BaseAgent):
                         # Toolkit doesn't support cloning, use original
                         cloned_toolkits[toolkit_id] = toolkit_instance
+                if getattr(
+                    tool.func, "__message_integration_enhanced__", False
+                ):
+                    cloned_tools.append(
+                        FunctionTool(
+                            func=tool.func,
+                            openai_tool_schema=tool.get_openai_tool_schema(),
+                        )
+                    )
+                    continue
                 # Get the method from the cloned (or original) toolkit
                 toolkit = cloned_toolkits[toolkit_id]
                 method_name = tool.func.__name__
                 if hasattr(toolkit, method_name):
                     new_method = getattr(toolkit, method_name)
-                    cloned_tools.append(new_method)
+                    # Wrap cloned method into a new FunctionTool,
+                    # preserving schema
+                    try:
+                        new_tool = FunctionTool(
+                            func=new_method,
+                            openai_tool_schema=tool.get_openai_tool_schema(),
+                        )
+                        cloned_tools.append(new_tool)
+                    except Exception as e:
+                        # If wrapping fails, fallback to wrapping the original
+                        # function with its schema to maintain consistency
+                        logger.warning(
+                            f"Failed to wrap cloned toolkit "
+                            f"method '{method_name}' "
+                            f"with FunctionTool: {e}. Using original "
+                            f"function with preserved schema instead."
+                        )
+                        cloned_tools.append(
+                            FunctionTool(
+                                func=tool.func,
+                                openai_tool_schema=tool.get_openai_tool_schema(),
+                            )
+                        )
                 else:
-                    # Fallback to original function
-                    cloned_tools.append(tool.func)
+                    # Fallback to original function wrapped in FunctionTool
+                    cloned_tools.append(
+                        FunctionTool(
+                            func=tool.func,
+                            openai_tool_schema=tool.get_openai_tool_schema(),
+                        )
+                    )
             else:
-                # Not a toolkit method, just use the original function
-                cloned_tools.append(tool.func)
+                # Not a toolkit method, preserve FunctionTool schema directly
+                cloned_tools.append(
+                    FunctionTool(
+                        func=tool.func,
+                        openai_tool_schema=tool.get_openai_tool_schema(),
+                    )
+                )
         return cloned_tools, toolkits_to_register

camel-ai 0.2.75a5__py3-none-any.whl → 0.2.76__py3-none-any.whl

Potentially problematic release.

camel-ai 0.2.75a5py3-none-any.whl → 0.2.76py3-none-any.whl