PyPI - camel-ai - Versions diffs - 0.2.73a4__py3-none-any.whl → 0.2.80a2__py3-none-any.whl - Mend

camel-ai 0.2.73a4py3-none-any.whl → 0.2.80a2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (173) hide show

camel/__init__.py +1 -1
camel/agents/_utils.py +38 -0
camel/agents/chat_agent.py +2217 -519
camel/agents/mcp_agent.py +30 -27
camel/configs/__init__.py +15 -0
camel/configs/aihubmix_config.py +88 -0
camel/configs/amd_config.py +70 -0
camel/configs/cometapi_config.py +104 -0
camel/configs/minimax_config.py +93 -0
camel/configs/nebius_config.py +103 -0
camel/data_collectors/alpaca_collector.py +15 -6
camel/datasets/base_generator.py +39 -10
camel/environments/single_step.py +28 -3
camel/environments/tic_tac_toe.py +1 -1
camel/interpreters/__init__.py +2 -0
camel/interpreters/docker/Dockerfile +3 -12
camel/interpreters/e2b_interpreter.py +34 -1
camel/interpreters/microsandbox_interpreter.py +395 -0
camel/loaders/__init__.py +11 -2
camel/loaders/chunkr_reader.py +9 -0
camel/memories/agent_memories.py +48 -4
camel/memories/base.py +26 -0
camel/memories/blocks/chat_history_block.py +122 -4
camel/memories/context_creators/score_based.py +25 -384
camel/memories/records.py +88 -8
camel/messages/base.py +153 -34
camel/models/__init__.py +10 -0
camel/models/aihubmix_model.py +83 -0
camel/models/aiml_model.py +1 -16
camel/models/amd_model.py +101 -0
camel/models/anthropic_model.py +6 -19
camel/models/aws_bedrock_model.py +2 -33
camel/models/azure_openai_model.py +114 -89
camel/models/base_audio_model.py +3 -1
camel/models/base_model.py +32 -14
camel/models/cohere_model.py +1 -16
camel/models/cometapi_model.py +83 -0
camel/models/crynux_model.py +1 -16
camel/models/deepseek_model.py +1 -16
camel/models/fish_audio_model.py +6 -0
camel/models/gemini_model.py +36 -18
camel/models/groq_model.py +1 -17
camel/models/internlm_model.py +1 -16
camel/models/litellm_model.py +1 -16
camel/models/lmstudio_model.py +1 -17
camel/models/minimax_model.py +83 -0
camel/models/mistral_model.py +1 -16
camel/models/model_factory.py +27 -1
camel/models/modelscope_model.py +1 -16
camel/models/moonshot_model.py +105 -24
camel/models/nebius_model.py +83 -0
camel/models/nemotron_model.py +0 -5
camel/models/netmind_model.py +1 -16
camel/models/novita_model.py +1 -16
camel/models/nvidia_model.py +1 -16
camel/models/ollama_model.py +4 -19
camel/models/openai_compatible_model.py +62 -41
camel/models/openai_model.py +62 -57
camel/models/openrouter_model.py +1 -17
camel/models/ppio_model.py +1 -16
camel/models/qianfan_model.py +1 -16
camel/models/qwen_model.py +1 -16
camel/models/reka_model.py +1 -16
camel/models/samba_model.py +34 -47
camel/models/sglang_model.py +64 -31
camel/models/siliconflow_model.py +1 -16
camel/models/stub_model.py +0 -4
camel/models/togetherai_model.py +1 -16
camel/models/vllm_model.py +1 -16
camel/models/volcano_model.py +0 -17
camel/models/watsonx_model.py +1 -16
camel/models/yi_model.py +1 -16
camel/models/zhipuai_model.py +60 -16
camel/parsers/__init__.py +18 -0
camel/parsers/mcp_tool_call_parser.py +176 -0
camel/retrievers/auto_retriever.py +1 -0
camel/runtimes/daytona_runtime.py +11 -12
camel/societies/__init__.py +2 -0
camel/societies/workforce/__init__.py +2 -0
camel/societies/workforce/events.py +122 -0
camel/societies/workforce/prompts.py +146 -66
camel/societies/workforce/role_playing_worker.py +15 -11
camel/societies/workforce/single_agent_worker.py +302 -65
camel/societies/workforce/structured_output_handler.py +30 -18
camel/societies/workforce/task_channel.py +163 -27
camel/societies/workforce/utils.py +107 -13
camel/societies/workforce/workflow_memory_manager.py +772 -0
camel/societies/workforce/workforce.py +1949 -579
camel/societies/workforce/workforce_callback.py +74 -0
camel/societies/workforce/workforce_logger.py +168 -145
camel/societies/workforce/workforce_metrics.py +33 -0
camel/storages/key_value_storages/json.py +15 -2
camel/storages/key_value_storages/mem0_cloud.py +48 -47
camel/storages/object_storages/google_cloud.py +1 -1
camel/storages/vectordb_storages/oceanbase.py +13 -13
camel/storages/vectordb_storages/qdrant.py +3 -3
camel/storages/vectordb_storages/tidb.py +8 -6
camel/tasks/task.py +4 -3
camel/toolkits/__init__.py +20 -7
camel/toolkits/aci_toolkit.py +45 -0
camel/toolkits/base.py +6 -4
camel/toolkits/code_execution.py +28 -1
camel/toolkits/context_summarizer_toolkit.py +684 -0
camel/toolkits/dappier_toolkit.py +5 -1
camel/toolkits/dingtalk.py +1135 -0
camel/toolkits/edgeone_pages_mcp_toolkit.py +11 -31
camel/toolkits/excel_toolkit.py +1 -1
camel/toolkits/{file_write_toolkit.py → file_toolkit.py} +430 -36
camel/toolkits/function_tool.py +13 -3
camel/toolkits/github_toolkit.py +104 -17
camel/toolkits/gmail_toolkit.py +1839 -0
camel/toolkits/google_calendar_toolkit.py +38 -4
camel/toolkits/google_drive_mcp_toolkit.py +12 -31
camel/toolkits/hybrid_browser_toolkit/config_loader.py +15 -0
camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +77 -8
camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +884 -88
camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +5 -612
camel/toolkits/hybrid_browser_toolkit/ts/package.json +0 -1
camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +959 -89
camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +9 -2
camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +281 -213
camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +23 -3
camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +72 -7
camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +582 -132
camel/toolkits/hybrid_browser_toolkit_py/actions.py +158 -0
camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +55 -8
camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +43 -0
camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +321 -8
camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +10 -4
camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +45 -4
camel/toolkits/{openai_image_toolkit.py → image_generation_toolkit.py} +151 -53
camel/toolkits/klavis_toolkit.py +5 -1
camel/toolkits/markitdown_toolkit.py +27 -1
camel/toolkits/math_toolkit.py +64 -10
camel/toolkits/mcp_toolkit.py +366 -71
camel/toolkits/memory_toolkit.py +5 -1
camel/toolkits/message_integration.py +18 -13
camel/toolkits/minimax_mcp_toolkit.py +195 -0
camel/toolkits/note_taking_toolkit.py +19 -10
camel/toolkits/notion_mcp_toolkit.py +16 -26
camel/toolkits/openbb_toolkit.py +5 -1
camel/toolkits/origene_mcp_toolkit.py +8 -49
camel/toolkits/playwright_mcp_toolkit.py +12 -31
camel/toolkits/resend_toolkit.py +168 -0
camel/toolkits/search_toolkit.py +264 -91
camel/toolkits/slack_toolkit.py +64 -10
camel/toolkits/terminal_toolkit/__init__.py +18 -0
camel/toolkits/terminal_toolkit/terminal_toolkit.py +957 -0
camel/toolkits/terminal_toolkit/utils.py +532 -0
camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
camel/toolkits/video_analysis_toolkit.py +17 -11
camel/toolkits/wechat_official_toolkit.py +483 -0
camel/toolkits/zapier_toolkit.py +5 -1
camel/types/__init__.py +2 -2
camel/types/enums.py +274 -7
camel/types/openai_types.py +2 -2
camel/types/unified_model_type.py +15 -0
camel/utils/commons.py +36 -5
camel/utils/constants.py +3 -0
camel/utils/context_utils.py +1003 -0
camel/utils/mcp.py +138 -4
camel/utils/token_counting.py +43 -20
{camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/METADATA +223 -83
{camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/RECORD +170 -141
camel/loaders/pandas_reader.py +0 -368
camel/toolkits/openai_agent_toolkit.py +0 -135
camel/toolkits/terminal_toolkit.py +0 -1550
{camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/WHEEL +0 -0
{camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/licenses/LICENSE +0 -0

camel/agents/chat_agent.py CHANGED Viewed

@@ -14,13 +14,23 @@
 from __future__ import annotations
 import asyncio
+import atexit
+import base64
+import concurrent.futures
+import hashlib
+import inspect
 import json
-import logging
-import queue
+import os
+import random
+import re
+import tempfile
 import textwrap
 import threading
 import time
 import uuid
+import warnings
+from dataclasses import dataclass
+from datetime import datetime
 from pathlib import Path
 from typing import (
     TYPE_CHECKING,
@@ -40,12 +50,14 @@ from typing import (
 from openai import (
     AsyncStream,
+    RateLimitError,
     Stream,
 )
 from pydantic import BaseModel, ValidationError
 from camel.agents._types import ModelResponse, ToolCallRequest
 from camel.agents._utils import (
+    build_default_summary_prompt,
     convert_to_function_tool,
     convert_to_schema,
     get_info_dict,
@@ -57,6 +69,7 @@ from camel.logger import get_logger
 from camel.memories import (
     AgentMemory,
     ChatHistoryMemory,
+    ContextRecord,
     MemoryRecord,
     ScoreBasedContextCreator,
 )
@@ -85,20 +98,46 @@ from camel.types import (
 )
 from camel.types.agents import ToolCallingRecord
 from camel.utils import (
+    Constants,
     get_model_encoding,
     model_from_json_schema,
 )
 from camel.utils.commons import dependencies_required
+from camel.utils.context_utils import ContextUtility
+TOKEN_LIMIT_ERROR_MARKERS = (
+    "context_length_exceeded",
+    "prompt is too long",
+    "exceeded your current quota",
+    "tokens must be reduced",
+    "context length",
+    "token count",
+    "context limit",
+)
 if TYPE_CHECKING:
     from camel.terminators import ResponseTerminator
 logger = get_logger(__name__)
+# Cleanup temp files on exit
+_temp_files: Set[str] = set()
+_temp_files_lock = threading.Lock()
+def _cleanup_temp_files():
+    with _temp_files_lock:
+        for path in _temp_files:
+            try:
+                os.unlink(path)
+            except Exception:
+                pass
+atexit.register(_cleanup_temp_files)
 # AgentOps decorator setting
 try:
-    import os
     if os.getenv("AGENTOPS_API_KEY") is not None:
         from agentops import track_agent
     else:
@@ -132,13 +171,23 @@ SIMPLE_FORMAT_PROMPT = TextPrompt(
 )
+@dataclass
+class _ToolOutputHistoryEntry:
+    tool_name: str
+    tool_call_id: str
+    result_text: str
+    record_uuids: List[str]
+    record_timestamps: List[float]
+    cached: bool = False
 class StreamContentAccumulator:
     r"""Manages content accumulation across streaming responses to ensure
     all responses contain complete cumulative content."""
     def __init__(self):
         self.base_content = ""  # Content before tool calls
-        self.current_content = ""  # Current streaming content
+        self.current_content = []  # Accumulated streaming fragments
         self.tool_status_messages = []  # Accumulated tool status messages
     def set_base_content(self, content: str):
@@ -147,7 +196,7 @@ class StreamContentAccumulator:
     def add_streaming_content(self, new_content: str):
         r"""Add new streaming content."""
-        self.current_content += new_content
+        self.current_content.append(new_content)
     def add_tool_status(self, status_message: str):
         r"""Add a tool status message."""
@@ -156,16 +205,18 @@ class StreamContentAccumulator:
     def get_full_content(self) -> str:
         r"""Get the complete accumulated content."""
         tool_messages = "".join(self.tool_status_messages)
-        return self.base_content + tool_messages + self.current_content
+        current = "".join(self.current_content)
+        return self.base_content + tool_messages + current
     def get_content_with_new_status(self, status_message: str) -> str:
         r"""Get content with a new status message appended."""
         tool_messages = "".join([*self.tool_status_messages, status_message])
-        return self.base_content + tool_messages + self.current_content
+        current = "".join(self.current_content)
+        return self.base_content + tool_messages + current
     def reset_streaming_content(self):
         r"""Reset only the streaming content, keep base and tool status."""
-        self.current_content = ""
+        self.current_content = []
 class StreamingChatAgentResponse:
@@ -186,13 +237,10 @@ class StreamingChatAgentResponse:
     def _ensure_latest_response(self):
         r"""Ensure we have the latest response by consuming the generator."""
         if not self._consumed:
-            try:
-                for response in self._generator:
-                    self._responses.append(response)
-                    self._current_response = response
-                self._consumed = True
-            except StopIteration:
-                self._consumed = True
+            for response in self._generator:
+                self._responses.append(response)
+                self._current_response = response
+            self._consumed = True
     @property
     def msgs(self) -> List[BaseMessage]:
@@ -230,17 +278,14 @@ class StreamingChatAgentResponse:
         r"""Make this object iterable."""
         if self._consumed:
             # If already consumed, iterate over stored responses
-            return iter(self._responses)
+            yield from self._responses
         else:
             # If not consumed, consume and yield
-            try:
-                for response in self._generator:
-                    self._responses.append(response)
-                    self._current_response = response
-                    yield response
-                self._consumed = True
-            except StopIteration:
-                self._consumed = True
+            for response in self._generator:
+                self._responses.append(response)
+                self._current_response = response
+                yield response
+            self._consumed = True
     def __getattr__(self, name):
         r"""Forward any other attribute access to the latest response."""
@@ -271,13 +316,10 @@ class AsyncStreamingChatAgentResponse:
     async def _ensure_latest_response(self):
         r"""Ensure the latest response by consuming the async generator."""
         if not self._consumed:
-            try:
-                async for response in self._async_generator:
-                    self._responses.append(response)
-                    self._current_response = response
-                self._consumed = True
-            except StopAsyncIteration:
-                self._consumed = True
+            async for response in self._async_generator:
+                self._responses.append(response)
+                self._current_response = response
+            self._consumed = True
     async def _get_final_response(self) -> ChatAgentResponse:
         r"""Get the final response after consuming the entire stream."""
@@ -303,14 +345,11 @@ class AsyncStreamingChatAgentResponse:
         else:
             # If not consumed, consume and yield
             async def _consume_and_yield():
-                try:
-                    async for response in self._async_generator:
-                        self._responses.append(response)
-                        self._current_response = response
-                        yield response
-                    self._consumed = True
-                except StopAsyncIteration:
-                    self._consumed = True
+                async for response in self._async_generator:
+                    self._responses.append(response)
+                    self._current_response = response
+                    yield response
+                self._consumed = True
             return _consume_and_yield()
@@ -338,9 +377,9 @@ class ChatAgent(BaseAgent):
         message_window_size (int, optional): The maximum number of previous
             messages to include in the context window. If `None`, no windowing
             is performed. (default: :obj:`None`)
-        token_limit (int, optional): The maximum number of tokens in a context.
-            The context will be automatically pruned to fulfill the limitation.
-            If `None`, it will be set according to the backend model.
+        summarize_threshold (int, optional): The percentage of the context
+            window that triggers summarization. If `None`, will trigger
+            summarization when the context window is full.
             (default: :obj:`None`)
         output_language (str, optional): The language to be output by the
             agent. (default: :obj:`None`)
@@ -378,14 +417,35 @@ class ChatAgent(BaseAgent):
             for individual tool execution. If None, wait indefinitely.
         mask_tool_output (Optional[bool]): Whether to return a sanitized
             placeholder instead of the raw tool output. (default: :obj:`False`)
-        pause_event (Optional[asyncio.Event]): Event to signal pause of the
-            agent's operation. When clear, the agent will pause its execution.
-            (default: :obj:`None`)
+        pause_event (Optional[Union[threading.Event, asyncio.Event]]): Event to
+            signal pause of the agent's operation. When clear, the agent will
+            pause its execution. Use threading.Event for sync operations or
+            asyncio.Event for async operations. (default: :obj:`None`)
         prune_tool_calls_from_memory (bool): Whether to clean tool
             call messages from memory after response generation to save token
             usage. When enabled, removes FUNCTION/TOOL role messages and
             ASSISTANT messages with tool_calls after each step.
             (default: :obj:`False`)
+        enable_snapshot_clean (bool, optional): Whether to clean snapshot
+            markers and references from historical tool outputs in memory.
+            This removes verbose DOM markers (like [ref=...]) from older tool
+            results while keeping the latest output intact for immediate use.
+            (default: :obj:`False`)
+        retry_attempts (int, optional): Maximum number of retry attempts for
+            rate limit errors. (default: :obj:`3`)
+        retry_delay (float, optional): Initial delay in seconds between
+            retries. Uses exponential backoff. (default: :obj:`1.0`)
+        step_timeout (Optional[float], optional): Timeout in seconds for the
+            entire step operation. If None, no timeout is applied.
+            (default: :obj:`None`)
+        stream_accumulate (bool, optional): When True, partial streaming
+            updates return accumulated content (current behavior). When False,
+            partial updates return only the incremental delta. (default:
+            :obj:`True`)
+        summary_window_ratio (float, optional): Maximum fraction of the total
+            context window that can be occupied by summary information. Used
+            to limit how much of the model's context is reserved for
+            summarization results. (default: :obj:`0.6`)
     """
     def __init__(
@@ -408,6 +468,7 @@ class ChatAgent(BaseAgent):
         ] = None,
         memory: Optional[AgentMemory] = None,
         message_window_size: Optional[int] = None,
+        summarize_threshold: Optional[int] = 50,
         token_limit: Optional[int] = None,
         output_language: Optional[str] = None,
         tools: Optional[List[Union[FunctionTool, Callable]]] = None,
@@ -422,10 +483,16 @@ class ChatAgent(BaseAgent):
         max_iteration: Optional[int] = None,
         agent_id: Optional[str] = None,
         stop_event: Optional[threading.Event] = None,
-        tool_execution_timeout: Optional[float] = None,
+        tool_execution_timeout: Optional[float] = Constants.TIMEOUT_THRESHOLD,
         mask_tool_output: bool = False,
-        pause_event: Optional[asyncio.Event] = None,
+        pause_event: Optional[Union[threading.Event, asyncio.Event]] = None,
         prune_tool_calls_from_memory: bool = False,
+        enable_snapshot_clean: bool = False,
+        retry_attempts: int = 3,
+        retry_delay: float = 1.0,
+        step_timeout: Optional[float] = Constants.TIMEOUT_THRESHOLD,
+        stream_accumulate: bool = True,
+        summary_window_ratio: float = 0.6,
     ) -> None:
         if isinstance(model, ModelManager):
             self.model_backend = model
@@ -441,10 +508,13 @@ class ChatAgent(BaseAgent):
         # Assign unique ID
         self.agent_id = agent_id if agent_id else str(uuid.uuid4())
+        self._enable_snapshot_clean = enable_snapshot_clean
+        self._tool_output_history: List[_ToolOutputHistoryEntry] = []
         # Set up memory
         context_creator = ScoreBasedContextCreator(
             self.model_backend.token_counter,
-            token_limit or self.model_backend.token_limit,
+            self.model_backend.token_limit,
         )
         self._memory: AgentMemory = memory or ChatHistoryMemory(
@@ -459,9 +529,7 @@ class ChatAgent(BaseAgent):
         # Set up system message and initialize messages
         self._original_system_message = (
-            BaseMessage.make_assistant_message(
-                role_name="Assistant", content=system_message
-            )
+            BaseMessage.make_system_message(system_message)
             if isinstance(system_message, str)
             else system_message
         )
@@ -471,6 +539,21 @@ class ChatAgent(BaseAgent):
         )
         self.init_messages()
+        # Set up summarize threshold with validation
+        if summarize_threshold is not None:
+            if not (0 < summarize_threshold <= 100):
+                raise ValueError(
+                    f"summarize_threshold must be between 0 and 100, "
+                    f"got {summarize_threshold}"
+                )
+            logger.info(
+                f"Automatic context compression is enabled. Will trigger "
+                f"summarization when context window exceeds "
+                f"{summarize_threshold}% of the total token limit."
+            )
+        self.summarize_threshold = summarize_threshold
+        self._reset_summary_state()
         # Set up role name and role type
         self.role_name: str = (
             getattr(self.system_message, "role_name", None) or "assistant"
@@ -509,13 +592,25 @@ class ChatAgent(BaseAgent):
         self.tool_execution_timeout = tool_execution_timeout
         self.mask_tool_output = mask_tool_output
         self._secure_result_store: Dict[str, Any] = {}
+        self._secure_result_store_lock = threading.Lock()
         self.pause_event = pause_event
         self.prune_tool_calls_from_memory = prune_tool_calls_from_memory
+        self.retry_attempts = max(1, retry_attempts)
+        self.retry_delay = max(0.0, retry_delay)
+        self.step_timeout = step_timeout
+        self._context_utility: Optional[ContextUtility] = None
+        self._context_summary_agent: Optional["ChatAgent"] = None
+        self.stream_accumulate = stream_accumulate
+        self._last_tool_call_record: Optional[ToolCallingRecord] = None
+        self._last_tool_call_signature: Optional[str] = None
+        self._last_token_limit_tool_signature: Optional[str] = None
+        self.summary_window_ratio = summary_window_ratio
     def reset(self):
         r"""Resets the :obj:`ChatAgent` to its initial state."""
         self.terminated = False
         self.init_messages()
+        self._reset_summary_state()
         for terminator in self.response_terminators:
             terminator.reset()
@@ -699,6 +794,20 @@ class ChatAgent(BaseAgent):
         # Ensure the new memory has the system message
         self.init_messages()
+    def set_context_utility(
+        self, context_utility: Optional[ContextUtility]
+    ) -> None:
+        r"""Set the context utility for the agent.
+        This allows external components (like SingleAgentWorker) to provide
+        a shared context utility instance for workflow management.
+        Args:
+            context_utility (ContextUtility, optional): The context utility
+                to use. If None, the agent will create its own when needed.
+        """
+        self._context_utility = context_utility
     def _get_full_tool_schemas(self) -> List[Dict[str, Any]]:
         r"""Returns a list of tool schemas of all tools, including internal
         and external tools.
@@ -708,6 +817,329 @@ class ChatAgent(BaseAgent):
             for func_tool in self._internal_tools.values()
         ]
+    @staticmethod
+    def _is_token_limit_error(error: Exception) -> bool:
+        r"""Return True when the exception message indicates a token limit."""
+        error_message = str(error).lower()
+        return any(
+            marker in error_message for marker in TOKEN_LIMIT_ERROR_MARKERS
+        )
+    @staticmethod
+    def _is_tool_related_record(record: MemoryRecord) -> bool:
+        r"""Determine whether the given memory record
+        belongs to a tool call."""
+        if record.role_at_backend in {
+            OpenAIBackendRole.TOOL,
+            OpenAIBackendRole.FUNCTION,
+        }:
+            return True
+        if (
+            record.role_at_backend == OpenAIBackendRole.ASSISTANT
+            and isinstance(record.message, FunctionCallingMessage)
+        ):
+            return True
+        return False
+    def _find_indices_to_remove_for_last_tool_pair(
+        self, recent_records: List[ContextRecord]
+    ) -> List[int]:
+        """Find indices of records that should be removed to clean up the most
+        recent incomplete tool interaction pair.
+        This method identifies tool call/result pairs by tool_call_id and
+        returns the exact indices to remove, allowing non-contiguous deletions.
+        Logic:
+        - If the last record is a tool result (TOOL/FUNCTION) with a
+          tool_call_id, find the matching assistant call anywhere in history
+          and return both indices.
+        - If the last record is an assistant tool call without a result yet,
+          return just that index.
+        - For normal messages (non tool-related): remove just the last one.
+        - Fallback: If no tool_call_id is available, use heuristic (last 2 if
+          tool-related, otherwise last 1).
+        Returns:
+            List[int]: Indices to remove (may be non-contiguous).
+        """
+        if not recent_records:
+            return []
+        last_idx = len(recent_records) - 1
+        last_record = recent_records[last_idx].memory_record
+        # Case A: Last is an ASSISTANT tool call with no result yet
+        if (
+            last_record.role_at_backend == OpenAIBackendRole.ASSISTANT
+            and isinstance(last_record.message, FunctionCallingMessage)
+            and last_record.message.result is None
+        ):
+            return [last_idx]
+        # Case B: Last is TOOL/FUNCTION result, try id-based pairing
+        if last_record.role_at_backend in {
+            OpenAIBackendRole.TOOL,
+            OpenAIBackendRole.FUNCTION,
+        }:
+            tool_id = None
+            if isinstance(last_record.message, FunctionCallingMessage):
+                tool_id = last_record.message.tool_call_id
+            if tool_id:
+                for idx in range(len(recent_records) - 2, -1, -1):
+                    rec = recent_records[idx].memory_record
+                    if rec.role_at_backend != OpenAIBackendRole.ASSISTANT:
+                        continue
+                    # Check if this assistant message contains the tool_call_id
+                    matched = False
+                    # Case 1: FunctionCallingMessage (single tool call)
+                    if isinstance(rec.message, FunctionCallingMessage):
+                        if rec.message.tool_call_id == tool_id:
+                            matched = True
+                    # Case 2: BaseMessage with multiple tool_calls in meta_dict
+                    elif (
+                        hasattr(rec.message, "meta_dict")
+                        and rec.message.meta_dict
+                    ):
+                        tool_calls_list = rec.message.meta_dict.get(
+                            "tool_calls", []
+                        )
+                        if isinstance(tool_calls_list, list):
+                            for tc in tool_calls_list:
+                                if (
+                                    isinstance(tc, dict)
+                                    and tc.get("id") == tool_id
+                                ):
+                                    matched = True
+                                    break
+                    if matched:
+                        # Return both assistant call and tool result indices
+                        return [idx, last_idx]
+            # Fallback: no tool_call_id, use heuristic
+            if self._is_tool_related_record(last_record):
+                # Remove last 2 (assume they are paired)
+                return [last_idx - 1, last_idx] if last_idx > 0 else [last_idx]
+            else:
+                return [last_idx]
+        # Default: non tool-related tail => remove last one
+        return [last_idx]
+    @staticmethod
+    def _serialize_tool_args(args: Dict[str, Any]) -> str:
+        try:
+            return json.dumps(args, ensure_ascii=False, sort_keys=True)
+        except TypeError:
+            return str(args)
+    @classmethod
+    def _build_tool_signature(
+        cls, func_name: str, args: Dict[str, Any]
+    ) -> str:
+        args_repr = cls._serialize_tool_args(args)
+        return f"{func_name}:{args_repr}"
+    def _describe_tool_call(
+        self, record: Optional[ToolCallingRecord]
+    ) -> Optional[str]:
+        if record is None:
+            return None
+        args_repr = self._serialize_tool_args(record.args)
+        return f"Tool `{record.tool_name}` invoked with arguments {args_repr}."
+    def _update_last_tool_call_state(
+        self, record: Optional[ToolCallingRecord]
+    ) -> None:
+        """Track the most recent tool call and its identifying signature."""
+        self._last_tool_call_record = record
+        if record is None:
+            self._last_tool_call_signature = None
+            return
+        args = (
+            record.args
+            if isinstance(record.args, dict)
+            else {"_raw": record.args}
+        )
+        try:
+            signature = self._build_tool_signature(record.tool_name, args)
+        except Exception:  # pragma: no cover - defensive guard
+            signature = None
+        self._last_tool_call_signature = signature
+    def _format_tool_limit_notice(self) -> Optional[str]:
+        record = self._last_tool_call_record
+        description = self._describe_tool_call(record)
+        if description is None:
+            return None
+        notice_lines = [
+            "[Tool Call Causing Token Limit]",
+            description,
+        ]
+        if record is not None:
+            result = record.result
+            if isinstance(result, bytes):
+                result_repr = result.decode(errors="replace")
+            elif isinstance(result, str):
+                result_repr = result
+            else:
+                try:
+                    result_repr = json.dumps(
+                        result, ensure_ascii=False, sort_keys=True
+                    )
+                except (TypeError, ValueError):
+                    result_repr = str(result)
+            result_length = len(result_repr)
+            notice_lines.append(f"Tool result length: {result_length}")
+            if self.model_backend.token_limit != 999999999:
+                notice_lines.append(
+                    f"Token limit: {self.model_backend.token_limit}"
+                )
+        return "\n".join(notice_lines)
+    @staticmethod
+    def _append_user_messages_section(
+        summary_content: str, user_messages: List[str]
+    ) -> str:
+        section_title = "- **All User Messages**:"
+        sanitized_messages: List[str] = []
+        for msg in user_messages:
+            if not isinstance(msg, str):
+                msg = str(msg)
+            cleaned = " ".join(msg.strip().splitlines())
+            if cleaned:
+                sanitized_messages.append(cleaned)
+        bullet_block = (
+            "\n".join(f"- {m}" for m in sanitized_messages)
+            if sanitized_messages
+            else "- None noted"
+        )
+        user_section = f"{section_title}\n{bullet_block}"
+        summary_clean = summary_content.rstrip()
+        separator = "\n\n" if summary_clean else ""
+        return f"{summary_clean}{separator}{user_section}"
+    def _reset_summary_state(self) -> None:
+        self._summary_token_count = 0  # Total tokens in summary messages
+    def _calculate_next_summary_threshold(self) -> int:
+        r"""Calculate the next token threshold that should trigger
+        summarization.
+        The threshold calculation follows a progressive strategy:
+        - First time: token_limit * (summarize_threshold / 100)
+        - Subsequent times: (limit - summary_token) / 2 + summary_token
+        This ensures that as summaries accumulate, the threshold adapts
+        to maintain a reasonable balance between context and summaries.
+        Returns:
+            int: The token count threshold for next summarization.
+        """
+        token_limit = self.model_backend.token_limit
+        summary_token_count = self._summary_token_count
+        # First summarization: use the percentage threshold
+        if summary_token_count == 0:
+            threshold = int(token_limit * self.summarize_threshold / 100)
+        else:
+            # Subsequent summarizations: adaptive threshold
+            threshold = int(
+                (token_limit - summary_token_count)
+                * self.summarize_threshold
+                / 100
+                + summary_token_count
+            )
+        return threshold
+    def _update_memory_with_summary(
+        self, summary: str, include_summaries: bool = False
+    ) -> None:
+        r"""Update memory with summary result.
+        This method handles memory clearing and restoration of summaries based
+        on whether it's a progressive or full compression.
+        """
+        summary_content: str = summary
+        existing_summaries = []
+        if not include_summaries:
+            messages, _ = self.memory.get_context()
+            for msg in messages:
+                content = msg.get('content', '')
+                if isinstance(content, str) and content.startswith(
+                    '[CONTEXT_SUMMARY]'
+                ):
+                    existing_summaries.append(msg)
+        # Clear memory
+        self.clear_memory()
+        # Restore old summaries (for progressive compression)
+        for old_summary in existing_summaries:
+            content = old_summary.get('content', '')
+            if not isinstance(content, str):
+                content = str(content)
+            summary_msg = BaseMessage.make_assistant_message(
+                role_name="assistant", content=content
+            )
+            self.update_memory(summary_msg, OpenAIBackendRole.ASSISTANT)
+        # Add new summary
+        new_summary_msg = BaseMessage.make_assistant_message(
+            role_name="assistant", content=summary_content
+        )
+        self.update_memory(new_summary_msg, OpenAIBackendRole.ASSISTANT)
+        input_message = BaseMessage.make_assistant_message(
+            role_name="assistant",
+            content=(
+                "Please continue the conversation from "
+                "where we left it off without asking the user any further "
+                "questions. Continue with the last task that you were "
+                "asked to work on."
+            ),
+        )
+        self.update_memory(input_message, OpenAIBackendRole.ASSISTANT)
+        # Update token count
+        try:
+            summary_tokens = (
+                self.model_backend.token_counter.count_tokens_from_messages(
+                    [{"role": "assistant", "content": summary_content}]
+                )
+            )
+            if include_summaries:  # Full compression - reset count
+                self._summary_token_count = summary_tokens
+                logger.info(
+                    f"Full compression: Summary with {summary_tokens} tokens. "
+                    f"Total summary tokens reset to: {summary_tokens}"
+                )
+            else:  # Progressive compression - accumulate
+                self._summary_token_count += summary_tokens
+                logger.info(
+                    f"Progressive compression: New summary "
+                    f"with {summary_tokens} tokens. "
+                    f"Total summary tokens: "
+                    f"{self._summary_token_count}"
+                )
+        except Exception as e:
+            logger.warning(f"Failed to count summary tokens: {e}")
     def _get_external_tool_names(self) -> Set[str]:
         r"""Returns a set of external tool names."""
         return set(self._external_tool_schemas.keys())
@@ -722,6 +1154,282 @@ class ChatAgent(BaseAgent):
         for tool in tools:
             self.add_tool(tool)
+    def _serialize_tool_result(self, result: Any) -> str:
+        if isinstance(result, str):
+            return result
+        try:
+            return json.dumps(result, ensure_ascii=False)
+        except (TypeError, ValueError):
+            return str(result)
+    def _clean_snapshot_line(self, line: str) -> str:
+        r"""Clean a single snapshot line by removing prefixes and references.
+        This method handles snapshot lines in the format:
+        - [prefix] "quoted text" [attributes] [ref=...]: description
+        It preserves:
+        - Quoted text content (including brackets inside quotes)
+        - Description text after the colon
+        It removes:
+        - Line prefixes (e.g., "- button", "- tooltip", "generic:")
+        - Attribute markers (e.g., [disabled], [ref=e47])
+        - Lines with only element types
+        - All indentation
+        Args:
+            line: The original line content.
+        Returns:
+            The cleaned line content, or empty string if line should be
+            removed.
+        """
+        original = line.strip()
+        if not original:
+            return ''
+        # Check if line is just an element type marker
+        # (e.g., "- generic:", "button:")
+        if re.match(r'^(?:-\s+)?\w+\s*:?\s*$', original):
+            return ''
+        # Remove element type prefix
+        line = re.sub(r'^(?:-\s+)?\w+[\s:]+', '', original)
+        # Remove bracket markers while preserving quoted text
+        quoted_parts = []
+        def save_quoted(match):
+            quoted_parts.append(match.group(0))
+            return f'__QUOTED_{len(quoted_parts)-1}__'
+        line = re.sub(r'"[^"]*"', save_quoted, line)
+        line = re.sub(r'\s*\[[^\]]+\]\s*', ' ', line)
+        for i, quoted in enumerate(quoted_parts):
+            line = line.replace(f'__QUOTED_{i}__', quoted)
+        # Clean up formatting
+        line = re.sub(r'\s+', ' ', line).strip()
+        line = re.sub(r'\s*:\s*', ': ', line)
+        line = line.lstrip(': ').strip()
+        return '' if not line else line
+    def _clean_snapshot_content(self, content: str) -> str:
+        r"""Clean snapshot content by removing prefixes, references, and
+        deduplicating lines.
+        This method identifies snapshot lines (containing element keywords or
+        references) and cleans them while preserving non-snapshot content.
+        It also handles JSON-formatted tool outputs with snapshot fields.
+        Args:
+            content: The original snapshot content.
+        Returns:
+            The cleaned content with deduplicated lines.
+        """
+        try:
+            import json
+            data = json.loads(content)
+            modified = False
+            def clean_json_value(obj):
+                nonlocal modified
+                if isinstance(obj, dict):
+                    result = {}
+                    for key, value in obj.items():
+                        if key == 'snapshot' and isinstance(value, str):
+                            try:
+                                decoded_value = value.encode().decode(
+                                    'unicode_escape'
+                                )
+                            except (UnicodeDecodeError, AttributeError):
+                                decoded_value = value
+                            needs_cleaning = (
+                                '- ' in decoded_value
+                                or '[ref=' in decoded_value
+                                or any(
+                                    elem + ':' in decoded_value
+                                    for elem in [
+                                        'generic',
+                                        'img',
+                                        'banner',
+                                        'list',
+                                        'listitem',
+                                        'search',
+                                        'navigation',
+                                    ]
+                                )
+                            )
+                            if needs_cleaning:
+                                cleaned_snapshot = self._clean_text_snapshot(
+                                    decoded_value
+                                )
+                                result[key] = cleaned_snapshot
+                                modified = True
+                            else:
+                                result[key] = value
+                        else:
+                            result[key] = clean_json_value(value)
+                    return result
+                elif isinstance(obj, list):
+                    return [clean_json_value(item) for item in obj]
+                else:
+                    return obj
+            cleaned_data = clean_json_value(data)
+            if modified:
+                return json.dumps(cleaned_data, ensure_ascii=False, indent=4)
+            else:
+                return content
+        except (json.JSONDecodeError, TypeError):
+            return self._clean_text_snapshot(content)
+    def _clean_text_snapshot(self, content: str) -> str:
+        r"""Clean plain text snapshot content.
+        This method:
+        - Removes all indentation
+        - Deletes empty lines
+        - Deduplicates all lines
+        - Cleans snapshot-specific markers
+        Args:
+            content: The original snapshot text.
+        Returns:
+            The cleaned content with deduplicated lines, no indentation,
+            and no empty lines.
+        """
+        lines = content.split('\n')
+        cleaned_lines = []
+        seen = set()
+        for line in lines:
+            stripped_line = line.strip()
+            if not stripped_line:
+                continue
+            # Skip metadata lines (like "- /url:", "- /ref:")
+            if re.match(r'^-?\s*/\w+\s*:', stripped_line):
+                continue
+            is_snapshot_line = '[ref=' in stripped_line or re.match(
+                r'^(?:-\s+)?\w+(?:[\s:]|$)', stripped_line
+            )
+            if is_snapshot_line:
+                cleaned = self._clean_snapshot_line(stripped_line)
+                if cleaned and cleaned not in seen:
+                    cleaned_lines.append(cleaned)
+                    seen.add(cleaned)
+            else:
+                if stripped_line not in seen:
+                    cleaned_lines.append(stripped_line)
+                    seen.add(stripped_line)
+        return '\n'.join(cleaned_lines)
+    def _register_tool_output_for_cache(
+        self,
+        func_name: str,
+        tool_call_id: str,
+        result_text: str,
+        records: List[MemoryRecord],
+    ) -> None:
+        if not records:
+            return
+        entry = _ToolOutputHistoryEntry(
+            tool_name=func_name,
+            tool_call_id=tool_call_id,
+            result_text=result_text,
+            record_uuids=[str(record.uuid) for record in records],
+            record_timestamps=[record.timestamp for record in records],
+        )
+        self._tool_output_history.append(entry)
+        self._process_tool_output_cache()
+    def _process_tool_output_cache(self) -> None:
+        if not self._enable_snapshot_clean or not self._tool_output_history:
+            return
+        # Only clean older results; keep the latest expanded for immediate use.
+        for entry in self._tool_output_history[:-1]:
+            if entry.cached:
+                continue
+            self._clean_snapshot_in_memory(entry)
+    def _clean_snapshot_in_memory(
+        self, entry: _ToolOutputHistoryEntry
+    ) -> None:
+        if not entry.record_uuids:
+            return
+        # Clean snapshot markers and references from historical tool output
+        result_text = entry.result_text
+        if '- ' in result_text and '[ref=' in result_text:
+            cleaned_result = self._clean_snapshot_content(result_text)
+            # Update the message in memory storage
+            timestamp = (
+                entry.record_timestamps[0]
+                if entry.record_timestamps
+                else time.time_ns() / 1_000_000_000
+            )
+            cleaned_message = FunctionCallingMessage(
+                role_name=self.role_name,
+                role_type=self.role_type,
+                meta_dict={},
+                content="",
+                func_name=entry.tool_name,
+                result=cleaned_result,
+                tool_call_id=entry.tool_call_id,
+            )
+            chat_history_block = getattr(
+                self.memory, "_chat_history_block", None
+            )
+            storage = getattr(chat_history_block, "storage", None)
+            if storage is None:
+                return
+            existing_records = storage.load()
+            updated_records = [
+                record
+                for record in existing_records
+                if record["uuid"] not in entry.record_uuids
+            ]
+            new_record = MemoryRecord(
+                message=cleaned_message,
+                role_at_backend=OpenAIBackendRole.FUNCTION,
+                timestamp=timestamp,
+                agent_id=self.agent_id,
+            )
+            updated_records.append(new_record.to_dict())
+            updated_records.sort(key=lambda record: record["timestamp"])
+            storage.clear()
+            storage.save(updated_records)
+            logger.info(
+                "Cleaned snapshot in memory for tool output '%s' (%s)",
+                entry.tool_name,
+                entry.tool_call_id,
+            )
+            entry.cached = True
+            entry.record_uuids = [str(new_record.uuid)]
+            entry.record_timestamps = [timestamp]
     def add_external_tool(
         self, tool: Union[FunctionTool, Callable, Dict[str, Any]]
     ) -> None:
@@ -766,19 +1474,10 @@ class ChatAgent(BaseAgent):
         message: BaseMessage,
         role: OpenAIBackendRole,
         timestamp: Optional[float] = None,
-    ) -> None:
+        return_records: bool = False,
+    ) -> Optional[List[MemoryRecord]]:
         r"""Updates the agent memory with a new message.
-        If the single *message* exceeds the model's context window, it will
-        be **automatically split into multiple smaller chunks** before being
-        written into memory. This prevents later failures in
-        `ScoreBasedContextCreator` where an over-sized message cannot fit
-        into the available token budget at all.
-        This slicing logic handles both regular text messages (in the
-        `content` field) and long tool call results (in the `result` field of
-        a `FunctionCallingMessage`).
         Args:
             message (BaseMessage): The new message to add to the stored
                 messages.
@@ -786,168 +1485,41 @@ class ChatAgent(BaseAgent):
             timestamp (Optional[float], optional): Custom timestamp for the
                 memory record. If `None`, the current time will be used.
                 (default: :obj:`None`)
-                    (default: obj:`None`)
-        """
-        import math
-        import time
-        import uuid as _uuid
-        # 1. Helper to write a record to memory
-        def _write_single_record(
-            message: BaseMessage, role: OpenAIBackendRole, timestamp: float
-        ):
-            self.memory.write_record(
-                MemoryRecord(
-                    message=message,
-                    role_at_backend=role,
-                    timestamp=timestamp,
-                    agent_id=self.agent_id,
-                )
-            )
+            return_records (bool, optional): When ``True`` the method returns
+                the list of MemoryRecord objects written to memory.
+                (default: :obj:`False`)
-        base_ts = (
-            timestamp
+        Returns:
+            Optional[List[MemoryRecord]]: The records that were written when
+            ``return_records`` is ``True``; otherwise ``None``.
+        """
+        record = MemoryRecord(
+            message=message,
+            role_at_backend=role,
+            timestamp=timestamp
             if timestamp is not None
-            else time.time_ns() / 1_000_000_000
+            else time.time_ns() / 1_000_000_000,  # Nanosecond precision
+            agent_id=self.agent_id,
         )
+        self.memory.write_record(record)
-        # 2. Get token handling utilities, fallback if unavailable
-        try:
-            context_creator = self.memory.get_context_creator()
-            token_counter = context_creator.token_counter
-            token_limit = context_creator.token_limit
-        except AttributeError:
-            _write_single_record(message, role, base_ts)
-            return
+        if return_records:
+            return [record]
+        return None
-        # 3. Check if slicing is necessary
-        try:
-            current_tokens = token_counter.count_tokens_from_messages(
-                [message.to_openai_message(role)]
-            )
-            _, ctx_tokens = self.memory.get_context()
-            remaining_budget = max(0, token_limit - ctx_tokens)
+    def load_memory(self, memory: AgentMemory) -> None:
+        r"""Load the provided memory into the agent.
-            if current_tokens <= remaining_budget:
-                _write_single_record(message, role, base_ts)
-                return
-        except Exception as e:
-            logger.warning(
-                f"Token calculation failed before chunking, "
-                f"writing message as-is. Error: {e}"
-            )
-            _write_single_record(message, role, base_ts)
-            return
+        Args:
+            memory (AgentMemory): The memory to load into the agent.
-        # 4. Perform slicing
-        logger.warning(
-            f"Message with {current_tokens} tokens exceeds remaining budget "
-            f"of {remaining_budget}. Slicing into smaller chunks."
-        )
+        Returns:
+            None
+        """
-        text_to_chunk: Optional[str] = None
-        is_function_result = False
-        if isinstance(message, FunctionCallingMessage) and isinstance(
-            message.result, str
-        ):
-            text_to_chunk = message.result
-            is_function_result = True
-        elif isinstance(message.content, str):
-            text_to_chunk = message.content
-        if not text_to_chunk or not text_to_chunk.strip():
-            _write_single_record(message, role, base_ts)
-            return
-        # Encode the entire text to get a list of all token IDs
-        try:
-            all_token_ids = token_counter.encode(text_to_chunk)
-        except Exception as e:
-            logger.error(f"Failed to encode text for chunking: {e}")
-            _write_single_record(message, role, base_ts)  # Fallback
-            return
-        if not all_token_ids:
-            _write_single_record(message, role, base_ts)  # Nothing to chunk
-            return
-        # 1.  Base chunk size: one-tenth of the smaller of (a) total token
-        # limit and (b) current remaining budget.  This prevents us from
-        # creating chunks that are guaranteed to overflow the
-        # immediate context window.
-        base_chunk_size = max(1, remaining_budget) // 10
-        # 2.  Each chunk gets a textual prefix such as:
-        #        "[chunk 3/12 of a long message]\n"
-        #     The prefix itself consumes tokens, so if we do not subtract its
-        #     length the *total* tokens of the outgoing message (prefix + body)
-        #     can exceed the intended bound.  We estimate the prefix length
-        #     with a representative example that is safely long enough for the
-        #     vast majority of cases (three-digit indices).
-        sample_prefix = "[chunk 1/1000 of a long message]\n"
-        prefix_token_len = len(token_counter.encode(sample_prefix))
-        # 3.  The real capacity for the message body is therefore the base
-        #     chunk size minus the prefix length.  Fallback to at least one
-        #     token to avoid zero or negative sizes.
-        chunk_body_limit = max(1, base_chunk_size - prefix_token_len)
-        # 4.  Calculate how many chunks we will need with this body size.
-        num_chunks = math.ceil(len(all_token_ids) / chunk_body_limit)
-        group_id = str(_uuid.uuid4())
-        for i in range(num_chunks):
-            start_idx = i * chunk_body_limit
-            end_idx = start_idx + chunk_body_limit
-            chunk_token_ids = all_token_ids[start_idx:end_idx]
-            chunk_body = token_counter.decode(chunk_token_ids)
-            prefix = f"[chunk {i + 1}/{num_chunks} of a long message]\n"
-            new_body = prefix + chunk_body
-            if is_function_result and isinstance(
-                message, FunctionCallingMessage
-            ):
-                new_msg: BaseMessage = FunctionCallingMessage(
-                    role_name=message.role_name,
-                    role_type=message.role_type,
-                    meta_dict=message.meta_dict,
-                    content=message.content,
-                    func_name=message.func_name,
-                    args=message.args,
-                    result=new_body,
-                    tool_call_id=message.tool_call_id,
-                )
-            else:
-                new_msg = message.create_new_instance(new_body)
-            meta = (new_msg.meta_dict or {}).copy()
-            meta.update(
-                {
-                    "chunk_idx": i + 1,
-                    "chunk_total": num_chunks,
-                    "chunk_group_id": group_id,
-                }
-            )
-            new_msg.meta_dict = meta
-            # Increment timestamp slightly to maintain order
-            _write_single_record(new_msg, role, base_ts + i * 1e-6)
-    def load_memory(self, memory: AgentMemory) -> None:
-        r"""Load the provided memory into the agent.
-        Args:
-            memory (AgentMemory): The memory to load into the agent.
-        Returns:
-            None
-        """
-        for context_record in memory.retrieve():
-            self.memory.write_record(context_record.memory_record)
-        logger.info(f"Memory loaded from {memory}")
+        for context_record in memory.retrieve():
+            self.memory.write_record(context_record.memory_record)
+        logger.info(f"Memory loaded from {memory}")
     def load_memory_from_path(self, path: str) -> None:
         r"""Loads memory records from a JSON file filtered by this agent's ID.
@@ -1012,6 +1584,583 @@ class ChatAgent(BaseAgent):
         json_store.save(to_save)
         logger.info(f"Memory saved to {path}")
+    def summarize(
+        self,
+        filename: Optional[str] = None,
+        summary_prompt: Optional[str] = None,
+        response_format: Optional[Type[BaseModel]] = None,
+        working_directory: Optional[Union[str, Path]] = None,
+        include_summaries: bool = False,
+        add_user_messages: bool = True,
+    ) -> Dict[str, Any]:
+        r"""Summarize the agent's current conversation context and persist it
+        to a markdown file.
+        .. deprecated:: 0.2.80
+            Use :meth:`asummarize` for async/await support and better
+            performance in parallel summarization workflows.
+        Args:
+            filename (Optional[str]): The base filename (without extension) to
+                use for the markdown file. Defaults to a timestamped name when
+                not provided.
+            summary_prompt (Optional[str]): Custom prompt for the summarizer.
+                When omitted, a default prompt highlighting key decisions,
+                action items, and open questions is used.
+            response_format (Optional[Type[BaseModel]]): A Pydantic model
+                defining the expected structure of the response. If provided,
+                the summary will be generated as structured output and included
+                in the result.
+            include_summaries (bool): Whether to include previously generated
+                summaries in the content to be summarized. If False (default),
+                only non-summary messages will be summarized. If True, all
+                messages including previous summaries will be summarized
+                (full compression). (default: :obj:`False`)
+            working_directory (Optional[str|Path]): Optional directory to save
+                the markdown summary file. If provided, overrides the default
+                directory used by ContextUtility.
+            add_user_messages (bool): Whether add user messages to summary.
+                (default: :obj:`True`)
+        Returns:
+            Dict[str, Any]: A dictionary containing the summary text, file
+                path, status message, and optionally structured_summary if
+                response_format was provided.
+        See Also:
+            :meth:`asummarize`: Async version for non-blocking LLM calls.
+        """
+        warnings.warn(
+            "summarize() is synchronous. Consider using asummarize() "
+            "for async/await support and better performance.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        result: Dict[str, Any] = {
+            "summary": "",
+            "file_path": None,
+            "status": "",
+        }
+        try:
+            # Use external context if set, otherwise create local one
+            if self._context_utility is None:
+                if working_directory is not None:
+                    self._context_utility = ContextUtility(
+                        working_directory=str(working_directory)
+                    )
+                else:
+                    self._context_utility = ContextUtility()
+            context_util = self._context_utility
+            # Get conversation directly from agent's memory
+            messages, _ = self.memory.get_context()
+            if not messages:
+                status_message = (
+                    "No conversation context available to summarize."
+                )
+                result["status"] = status_message
+                return result
+            # Convert messages to conversation text
+            conversation_lines = []
+            user_messages: List[str] = []
+            for message in messages:
+                role = message.get('role', 'unknown')
+                content = message.get('content', '')
+                # Skip summary messages if include_summaries is False
+                if not include_summaries and isinstance(content, str):
+                    # Check if this is a summary message by looking for marker
+                    if content.startswith('[CONTEXT_SUMMARY]'):
+                        continue
+                # Handle tool call messages (assistant calling tools)
+                tool_calls = message.get('tool_calls')
+                if tool_calls and isinstance(tool_calls, (list, tuple)):
+                    for tool_call in tool_calls:
+                        # Handle both dict and object formats
+                        if isinstance(tool_call, dict):
+                            func_name = tool_call.get('function', {}).get(
+                                'name', 'unknown_tool'
+                            )
+                            func_args_str = tool_call.get('function', {}).get(
+                                'arguments', '{}'
+                            )
+                        else:
+                            # Handle object format (Pydantic or similar)
+                            func_name = getattr(
+                                getattr(tool_call, 'function', None),
+                                'name',
+                                'unknown_tool',
+                            )
+                            func_args_str = getattr(
+                                getattr(tool_call, 'function', None),
+                                'arguments',
+                                '{}',
+                            )
+                        # Parse and format arguments for readability
+                        try:
+                            import json
+                            args_dict = json.loads(func_args_str)
+                            args_formatted = ', '.join(
+                                f"{k}={v}" for k, v in args_dict.items()
+                            )
+                        except (json.JSONDecodeError, ValueError, TypeError):
+                            args_formatted = func_args_str
+                        conversation_lines.append(
+                            f"[TOOL CALL] {func_name}({args_formatted})"
+                        )
+                # Handle tool response messages
+                elif role == 'tool':
+                    tool_name = message.get('name', 'unknown_tool')
+                    if not content:
+                        content = str(message.get('content', ''))
+                    conversation_lines.append(
+                        f"[TOOL RESULT] {tool_name} → {content}"
+                    )
+                # Handle regular content messages (user/assistant/system)
+                elif content:
+                    content = str(content)
+                    if role == 'user':
+                        user_messages.append(content)
+                    conversation_lines.append(f"{role}: {content}")
+            conversation_text = "\n".join(conversation_lines).strip()
+            if not conversation_text:
+                status_message = (
+                    "Conversation context is empty; skipping summary."
+                )
+                result["status"] = status_message
+                return result
+            if self._context_summary_agent is None:
+                self._context_summary_agent = ChatAgent(
+                    system_message=(
+                        "You are a helpful assistant that summarizes "
+                        "conversations"
+                    ),
+                    model=self.model_backend,
+                    agent_id=f"{self.agent_id}_context_summarizer",
+                    summarize_threshold=None,
+                )
+            else:
+                self._context_summary_agent.reset()
+            if summary_prompt:
+                prompt_text = (
+                    f"{summary_prompt.rstrip()}\n\n"
+                    f"AGENT CONVERSATION TO BE SUMMARIZED:\n"
+                    f"{conversation_text}"
+                )
+            else:
+                prompt_text = build_default_summary_prompt(conversation_text)
+            try:
+                # Use structured output if response_format is provided
+                if response_format:
+                    response = self._context_summary_agent.step(
+                        prompt_text, response_format=response_format
+                    )
+                else:
+                    response = self._context_summary_agent.step(prompt_text)
+            except Exception as step_exc:
+                error_message = (
+                    f"Failed to generate summary using model: {step_exc}"
+                )
+                logger.error(error_message)
+                result["status"] = error_message
+                return result
+            if not response.msgs:
+                status_message = (
+                    "Failed to generate summary from model response."
+                )
+                result["status"] = status_message
+                return result
+            summary_content = response.msgs[-1].content.strip()
+            if not summary_content:
+                status_message = "Generated summary is empty."
+                result["status"] = status_message
+                return result
+            # handle structured output if response_format was provided
+            structured_output = None
+            if response_format and response.msgs[-1].parsed:
+                structured_output = response.msgs[-1].parsed
+            # determine filename: use provided filename, or extract from
+            # structured output, or generate timestamp
+            if filename:
+                base_filename = filename
+            elif structured_output and hasattr(
+                structured_output, 'task_title'
+            ):
+                # use task_title from structured output for filename
+                task_title = structured_output.task_title
+                clean_title = ContextUtility.sanitize_workflow_filename(
+                    task_title
+                )
+                base_filename = (
+                    f"{clean_title}_workflow" if clean_title else "workflow"
+                )
+            else:
+                base_filename = f"context_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}"  # noqa: E501
+            base_filename = Path(base_filename).with_suffix("").name
+            metadata = context_util.get_session_metadata()
+            metadata.update(
+                {
+                    "agent_id": self.agent_id,
+                    "message_count": len(messages),
+                }
+            )
+            # convert structured output to custom markdown if present
+            if structured_output:
+                # convert structured output to custom markdown
+                summary_content = context_util.structured_output_to_markdown(
+                    structured_data=structured_output, metadata=metadata
+                )
+            if add_user_messages:
+                summary_content = self._append_user_messages_section(
+                    summary_content, user_messages
+                )
+            # Save the markdown (either custom structured or default)
+            save_status = context_util.save_markdown_file(
+                base_filename,
+                summary_content,
+                title="Conversation Summary"
+                if not structured_output
+                else None,
+                metadata=metadata if not structured_output else None,
+            )
+            file_path = (
+                context_util.get_working_directory() / f"{base_filename}.md"
+            )
+            summary_content = (
+                f"[CONTEXT_SUMMARY] The following is a summary of our "
+                f"conversation from a previous session: {summary_content}"
+            )
+            # Prepare result dictionary
+            result_dict = {
+                "summary": summary_content,
+                "file_path": str(file_path),
+                "status": save_status,
+                "structured_summary": structured_output,
+            }
+            result.update(result_dict)
+            logger.info("Conversation summary saved to %s", file_path)
+            return result
+        except Exception as exc:
+            error_message = f"Failed to summarize conversation context: {exc}"
+            logger.error(error_message)
+            result["status"] = error_message
+            return result
+    async def asummarize(
+        self,
+        filename: Optional[str] = None,
+        summary_prompt: Optional[str] = None,
+        response_format: Optional[Type[BaseModel]] = None,
+        working_directory: Optional[Union[str, Path]] = None,
+        include_summaries: bool = False,
+        add_user_messages: bool = True,
+    ) -> Dict[str, Any]:
+        r"""Asynchronously summarize the agent's current conversation context
+        and persist it to a markdown file.
+        This is the async version of summarize() that uses astep() for
+        non-blocking LLM calls, enabling parallel summarization of multiple
+        agents.
+        Args:
+            filename (Optional[str]): The base filename (without extension) to
+                use for the markdown file. Defaults to a timestamped name when
+                not provided.
+            summary_prompt (Optional[str]): Custom prompt for the summarizer.
+                When omitted, a default prompt highlighting key decisions,
+                action items, and open questions is used.
+            response_format (Optional[Type[BaseModel]]): A Pydantic model
+                defining the expected structure of the response. If provided,
+                the summary will be generated as structured output and included
+                in the result.
+            working_directory (Optional[str|Path]): Optional directory to save
+                the markdown summary file. If provided, overrides the default
+                directory used by ContextUtility.
+            include_summaries (bool): Whether to include previously generated
+                summaries in the content to be summarized. If False (default),
+                only non-summary messages will be summarized. If True, all
+                messages including previous summaries will be summarized
+                (full compression). (default: :obj:`False`)
+            add_user_messages (bool): Whether add user messages to summary.
+                (default: :obj:`True`)
+        Returns:
+            Dict[str, Any]: A dictionary containing the summary text, file
+                path, status message, and optionally structured_summary if
+                response_format was provided.
+        """
+        result: Dict[str, Any] = {
+            "summary": "",
+            "file_path": None,
+            "status": "",
+        }
+        try:
+            # Use external context if set, otherwise create local one
+            if self._context_utility is None:
+                if working_directory is not None:
+                    self._context_utility = ContextUtility(
+                        working_directory=str(working_directory)
+                    )
+                else:
+                    self._context_utility = ContextUtility()
+            context_util = self._context_utility
+            # Get conversation directly from agent's memory
+            messages, _ = self.memory.get_context()
+            if not messages:
+                status_message = (
+                    "No conversation context available to summarize."
+                )
+                result["status"] = status_message
+                return result
+            # Convert messages to conversation text
+            conversation_lines = []
+            user_messages: List[str] = []
+            for message in messages:
+                role = message.get('role', 'unknown')
+                content = message.get('content', '')
+                # Skip summary messages if include_summaries is False
+                if not include_summaries and isinstance(content, str):
+                    # Check if this is a summary message by looking for marker
+                    if content.startswith('[CONTEXT_SUMMARY]'):
+                        continue
+                # Handle tool call messages (assistant calling tools)
+                tool_calls = message.get('tool_calls')
+                if tool_calls and isinstance(tool_calls, (list, tuple)):
+                    for tool_call in tool_calls:
+                        # Handle both dict and object formats
+                        if isinstance(tool_call, dict):
+                            func_name = tool_call.get('function', {}).get(
+                                'name', 'unknown_tool'
+                            )
+                            func_args_str = tool_call.get('function', {}).get(
+                                'arguments', '{}'
+                            )
+                        else:
+                            # Handle object format (Pydantic or similar)
+                            func_name = getattr(
+                                getattr(tool_call, 'function', None),
+                                'name',
+                                'unknown_tool',
+                            )
+                            func_args_str = getattr(
+                                getattr(tool_call, 'function', None),
+                                'arguments',
+                                '{}',
+                            )
+                        # Parse and format arguments for readability
+                        try:
+                            import json
+                            args_dict = json.loads(func_args_str)
+                            args_formatted = ', '.join(
+                                f"{k}={v}" for k, v in args_dict.items()
+                            )
+                        except (json.JSONDecodeError, ValueError, TypeError):
+                            args_formatted = func_args_str
+                        conversation_lines.append(
+                            f"[TOOL CALL] {func_name}({args_formatted})"
+                        )
+                # Handle tool response messages
+                elif role == 'tool':
+                    tool_name = message.get('name', 'unknown_tool')
+                    if not content:
+                        content = str(message.get('content', ''))
+                    conversation_lines.append(
+                        f"[TOOL RESULT] {tool_name} → {content}"
+                    )
+                # Handle regular content messages (user/assistant/system)
+                elif content:
+                    content = str(content)
+                    if role == 'user':
+                        user_messages.append(content)
+                    conversation_lines.append(f"{role}: {content}")
+            conversation_text = "\n".join(conversation_lines).strip()
+            if not conversation_text:
+                status_message = (
+                    "Conversation context is empty; skipping summary."
+                )
+                result["status"] = status_message
+                return result
+            if self._context_summary_agent is None:
+                self._context_summary_agent = ChatAgent(
+                    system_message=(
+                        "You are a helpful assistant that summarizes "
+                        "conversations"
+                    ),
+                    model=self.model_backend,
+                    agent_id=f"{self.agent_id}_context_summarizer",
+                    summarize_threshold=None,
+                )
+            else:
+                self._context_summary_agent.reset()
+            if summary_prompt:
+                prompt_text = (
+                    f"{summary_prompt.rstrip()}\n\n"
+                    f"AGENT CONVERSATION TO BE SUMMARIZED:\n"
+                    f"{conversation_text}"
+                )
+            else:
+                prompt_text = build_default_summary_prompt(conversation_text)
+            try:
+                # Use structured output if response_format is provided
+                if response_format:
+                    response = await self._context_summary_agent.astep(
+                        prompt_text, response_format=response_format
+                    )
+                else:
+                    response = await self._context_summary_agent.astep(
+                        prompt_text
+                    )
+                # Handle streaming response
+                if isinstance(response, AsyncStreamingChatAgentResponse):
+                    # Collect final response
+                    final_response = await response
+                    response = final_response
+            except Exception as step_exc:
+                error_message = (
+                    f"Failed to generate summary using model: {step_exc}"
+                )
+                logger.error(error_message)
+                result["status"] = error_message
+                return result
+            if not response.msgs:
+                status_message = (
+                    "Failed to generate summary from model response."
+                )
+                result["status"] = status_message
+                return result
+            summary_content = response.msgs[-1].content.strip()
+            if not summary_content:
+                status_message = "Generated summary is empty."
+                result["status"] = status_message
+                return result
+            # handle structured output if response_format was provided
+            structured_output = None
+            if response_format and response.msgs[-1].parsed:
+                structured_output = response.msgs[-1].parsed
+            # determine filename: use provided filename, or extract from
+            # structured output, or generate timestamp
+            if filename:
+                base_filename = filename
+            elif structured_output and hasattr(
+                structured_output, 'task_title'
+            ):
+                # use task_title from structured output for filename
+                task_title = structured_output.task_title
+                clean_title = ContextUtility.sanitize_workflow_filename(
+                    task_title
+                )
+                base_filename = (
+                    f"{clean_title}_workflow" if clean_title else "workflow"
+                )
+            else:
+                base_filename = f"context_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}"  # noqa: E501
+            base_filename = Path(base_filename).with_suffix("").name
+            metadata = context_util.get_session_metadata()
+            metadata.update(
+                {
+                    "agent_id": self.agent_id,
+                    "message_count": len(messages),
+                }
+            )
+            # convert structured output to custom markdown if present
+            if structured_output:
+                # convert structured output to custom markdown
+                summary_content = context_util.structured_output_to_markdown(
+                    structured_data=structured_output, metadata=metadata
+                )
+            if add_user_messages:
+                summary_content = self._append_user_messages_section(
+                    summary_content, user_messages
+                )
+            # Save the markdown (either custom structured or default)
+            save_status = context_util.save_markdown_file(
+                base_filename,
+                summary_content,
+                title="Conversation Summary"
+                if not structured_output
+                else None,
+                metadata=metadata if not structured_output else None,
+            )
+            file_path = (
+                context_util.get_working_directory() / f"{base_filename}.md"
+            )
+            summary_content = (
+                f"[CONTEXT_SUMMARY] The following is a summary of our "
+                f"conversation from a previous session: {summary_content}"
+            )
+            # Prepare result dictionary
+            result_dict = {
+                "summary": summary_content,
+                "file_path": str(file_path),
+                "status": save_status,
+                "structured_summary": structured_output,
+            }
+            result.update(result_dict)
+            logger.info("Conversation summary saved to %s", file_path)
+            return result
+        except Exception as exc:
+            error_message = f"Failed to summarize conversation context: {exc}"
+            logger.error(error_message)
+            result["status"] = error_message
+            return result
     def clear_memory(self) -> None:
         r"""Clear the agent's memory and reset to initial state.
@@ -1019,8 +2168,16 @@ class ChatAgent(BaseAgent):
             None
         """
         self.memory.clear()
         if self.system_message is not None:
-            self.update_memory(self.system_message, OpenAIBackendRole.SYSTEM)
+            self.memory.write_record(
+                MemoryRecord(
+                    message=self.system_message,
+                    role_at_backend=OpenAIBackendRole.SYSTEM,
+                    timestamp=time.time_ns() / 1_000_000_000,
+                    agent_id=self.agent_id,
+                )
+            )
     def _generate_system_message_for_output_language(
         self,
@@ -1045,28 +2202,81 @@ class ChatAgent(BaseAgent):
             content = self._original_system_message.content + language_prompt
             return self._original_system_message.create_new_instance(content)
         else:
-            return BaseMessage.make_assistant_message(
-                role_name="Assistant",
-                content=language_prompt,
-            )
+            return BaseMessage.make_system_message(language_prompt)
     def init_messages(self) -> None:
         r"""Initializes the stored messages list with the current system
         message.
         """
-        import time
+        self._reset_summary_state()
+        self.clear_memory()
-        self.memory.clear()
-        # avoid UserWarning: The `ChatHistoryMemory` is empty.
-        if self.system_message is not None:
-            self.memory.write_record(
-                MemoryRecord(
-                    message=self.system_message,
-                    role_at_backend=OpenAIBackendRole.SYSTEM,
-                    timestamp=time.time_ns() / 1_000_000_000,
-                    agent_id=self.agent_id,
-                )
-            )
+    def update_system_message(
+        self,
+        system_message: Union[BaseMessage, str],
+        reset_memory: bool = True,
+    ) -> None:
+        r"""Update the system message.
+        It will reset conversation with new system message.
+        Args:
+            system_message (Union[BaseMessage, str]): The new system message.
+                Can be either a BaseMessage object or a string.
+                If a string is provided, it will be converted
+                into a BaseMessage object.
+            reset_memory (bool):
+                Whether to reinitialize conversation messages after updating
+                the system message. Defaults to True.
+        """
+        if system_message is None:
+            raise ValueError("system_message is required and cannot be None. ")
+        self._original_system_message = (
+            BaseMessage.make_system_message(system_message)
+            if isinstance(system_message, str)
+            else system_message
+        )
+        self._system_message = (
+            self._generate_system_message_for_output_language()
+        )
+        if reset_memory:
+            self.init_messages()
+    def append_to_system_message(
+        self, content: str, reset_memory: bool = True
+    ) -> None:
+        """Append additional context to existing system message.
+        Args:
+            content (str): The additional system message.
+            reset_memory (bool):
+                Whether to reinitialize conversation messages after appending
+                additional context. Defaults to True.
+        """
+        original_content = (
+            self._original_system_message.content
+            if self._original_system_message
+            else ""
+        )
+        new_system_message = original_content + '\n' + content
+        self._original_system_message = BaseMessage.make_system_message(
+            new_system_message
+        )
+        self._system_message = (
+            self._generate_system_message_for_output_language()
+        )
+        if reset_memory:
+            self.init_messages()
+    def reset_to_original_system_message(self) -> None:
+        r"""Reset system message to original, removing any appended context.
+        This method reverts the agent's system message back to its original
+        state, removing any workflow context or other modifications that may
+        have been appended. Useful for resetting agent state in multi-turn
+        scenarios.
+        """
+        self._system_message = self._original_system_message
+        self.init_messages()
     def record_message(self, message: BaseMessage) -> None:
         r"""Records the externally provided message into the agent memory as if
@@ -1129,7 +2339,7 @@ class ChatAgent(BaseAgent):
             # Create a prompt based on the schema
             format_instruction = (
-                "\n\nPlease respond in the following JSON format:\n" "{\n"
+                "\n\nPlease respond in the following JSON format:\n{\n"
             )
             properties = schema.get("properties", {})
@@ -1216,6 +2426,33 @@ class ChatAgent(BaseAgent):
         # and True to indicate we used prompt formatting
         return modified_message, None, True
+    def _is_called_from_registered_toolkit(self) -> bool:
+        r"""Check if current step/astep call originates from a
+        RegisteredAgentToolkit.
+        This method uses stack inspection to detect if the current call
+        is originating from a toolkit that inherits from
+        RegisteredAgentToolkit. When detected, tools should be disabled to
+        prevent recursive calls.
+        Returns:
+            bool: True if called from a RegisteredAgentToolkit, False otherwise
+        """
+        from camel.toolkits.base import RegisteredAgentToolkit
+        try:
+            for frame_info in inspect.stack():
+                frame_locals = frame_info.frame.f_locals
+                if 'self' in frame_locals:
+                    caller_self = frame_locals['self']
+                    if isinstance(caller_self, RegisteredAgentToolkit):
+                        return True
+        except Exception:
+            return False
+        return False
     def _apply_prompt_based_parsing(
         self,
         response: ModelResponse,
@@ -1232,7 +2469,6 @@ class ChatAgent(BaseAgent):
                 try:
                     # Try to extract JSON from the response content
                     import json
-                    import re
                     from pydantic import ValidationError
@@ -1271,8 +2507,7 @@ class ChatAgent(BaseAgent):
                     if not message.parsed:
                         logger.warning(
-                            f"Failed to parse JSON from response: "
-                            f"{content}"
+                            f"Failed to parse JSON from response: {content}"
                         )
                 except Exception as e:
@@ -1365,6 +2600,9 @@ class ChatAgent(BaseAgent):
                 a StreamingChatAgentResponse that behaves like
                 ChatAgentResponse but can also be iterated for
                 streaming updates.
+        Raises:
+            TimeoutError: If the step operation exceeds the configured timeout.
         """
         stream = self.model_backend.model_config_dict.get("stream", False)
@@ -1374,6 +2612,30 @@ class ChatAgent(BaseAgent):
             generator = self._stream(input_message, response_format)
             return StreamingChatAgentResponse(generator)
+        # Execute with timeout if configured
+        if self.step_timeout is not None:
+            with concurrent.futures.ThreadPoolExecutor(
+                max_workers=1
+            ) as executor:
+                future = executor.submit(
+                    self._step_impl, input_message, response_format
+                )
+                try:
+                    return future.result(timeout=self.step_timeout)
+                except concurrent.futures.TimeoutError:
+                    future.cancel()
+                    raise TimeoutError(
+                        f"Step timed out after {self.step_timeout}s"
+                    )
+        else:
+            return self._step_impl(input_message, response_format)
+    def _step_impl(
+        self,
+        input_message: Union[BaseMessage, str],
+        response_format: Optional[Type[BaseModel]] = None,
+    ) -> ChatAgentResponse:
+        r"""Implementation of non-streaming step logic."""
         # Set Langfuse session_id using agent_id for trace grouping
         try:
             from camel.utils.langfuse import set_current_agent_session_id
@@ -1382,6 +2644,10 @@ class ChatAgent(BaseAgent):
         except ImportError:
             pass  # Langfuse not available
+        # Check if this call is from a RegisteredAgentToolkit to prevent tool
+        # use
+        disable_tools = self._is_called_from_registered_toolkit()
         # Handle response format compatibility with non-strict tools
         original_response_format = response_format
         input_message, response_format, used_prompt_formatting = (
@@ -1390,48 +2656,155 @@ class ChatAgent(BaseAgent):
             )
         )
-        # Convert input message to BaseMessage if necessary
-        if isinstance(input_message, str):
-            input_message = BaseMessage.make_user_message(
-                role_name="User", content=input_message
-            )
+        # Convert input message to BaseMessage if necessary
+        if isinstance(input_message, str):
+            input_message = BaseMessage.make_user_message(
+                role_name="User", content=input_message
+            )
+        # Add user input to memory
+        self.update_memory(input_message, OpenAIBackendRole.USER)
+        tool_call_records: List[ToolCallingRecord] = []
+        external_tool_call_requests: Optional[List[ToolCallRequest]] = None
+        accumulated_context_tokens = (
+            0  # This tracks cumulative context tokens, not API usage tokens
+        )
+        # Initialize token usage tracker
+        step_token_usage = self._create_token_usage_tracker()
+        iteration_count: int = 0
+        prev_num_openai_messages: int = 0
+        while True:
+            if self.pause_event is not None and not self.pause_event.is_set():
+                # Use efficient blocking wait for threading.Event
+                if isinstance(self.pause_event, threading.Event):
+                    self.pause_event.wait()
+                else:
+                    # Fallback for asyncio.Event in sync context
+                    while not self.pause_event.is_set():
+                        time.sleep(0.001)
+            try:
+                openai_messages, num_tokens = self.memory.get_context()
+                if self.summarize_threshold is not None:
+                    threshold = self._calculate_next_summary_threshold()
+                    summary_token_count = self._summary_token_count
+                    token_limit = self.model_backend.token_limit
+                    if num_tokens <= token_limit:
+                        if (
+                            summary_token_count
+                            > token_limit * self.summary_window_ratio
+                        ):
+                            logger.info(
+                                f"Summary tokens ({summary_token_count}) "
+                                f"exceed limit, full compression."
+                            )
+                            # Summarize everything (including summaries)
+                            summary = self.summarize(include_summaries=True)
+                            self._update_memory_with_summary(
+                                summary.get("summary", ""),
+                                include_summaries=True,
+                            )
+                        elif num_tokens > threshold:
+                            logger.info(
+                                f"Token count ({num_tokens}) exceed threshold "
+                                f"({threshold}). Triggering summarization."
+                            )
+                            # Only summarize non-summary content
+                            summary = self.summarize(include_summaries=False)
+                            self._update_memory_with_summary(
+                                summary.get("summary", ""),
+                                include_summaries=False,
+                            )
+                accumulated_context_tokens += num_tokens
+            except RuntimeError as e:
+                return self._step_terminate(
+                    e.args[1], tool_call_records, "max_tokens_exceeded"
+                )
+            # Get response from model backend with token limit error handling
+            try:
+                response = self._get_model_response(
+                    openai_messages,
+                    num_tokens=num_tokens,
+                    current_iteration=iteration_count,
+                    response_format=response_format,
+                    tool_schemas=[]
+                    if disable_tools
+                    else self._get_full_tool_schemas(),
+                    prev_num_openai_messages=prev_num_openai_messages,
+                )
+            except Exception as exc:
+                logger.exception("Model error: %s", exc)
+                if self._is_token_limit_error(exc):
+                    tool_signature = self._last_tool_call_signature
+                    if (
+                        tool_signature is not None
+                        and tool_signature
+                        == self._last_token_limit_tool_signature
+                    ):
+                        description = self._describe_tool_call(
+                            self._last_tool_call_record
+                        )
+                        repeated_msg = (
+                            "Context exceeded again by the same tool call."
+                        )
+                        if description:
+                            repeated_msg += f" {description}"
+                        raise RuntimeError(repeated_msg) from exc
+                    user_message_count = sum(
+                        1
+                        for msg in openai_messages
+                        if getattr(msg, "role", None) == "user"
+                    )
+                    if (
+                        user_message_count == 1
+                        and getattr(openai_messages[-1], "role", None)
+                        == "user"
+                    ):
+                        raise RuntimeError(
+                            "The provided user input alone exceeds the "
+                            "context window. Please shorten the input."
+                        ) from exc
+                    logger.warning(
+                        "Token limit exceeded error detected. "
+                        "Summarizing context."
+                    )
+                    recent_records: List[ContextRecord]
+                    try:
+                        recent_records = self.memory.retrieve()
+                    except Exception:  # pragma: no cover - defensive guard
+                        recent_records = []
-        # Add user input to memory
-        self.update_memory(input_message, OpenAIBackendRole.USER)
+                    indices_to_remove = (
+                        self._find_indices_to_remove_for_last_tool_pair(
+                            recent_records
+                        )
+                    )
+                    self.memory.remove_records_by_indices(indices_to_remove)
-        tool_call_records: List[ToolCallingRecord] = []
-        external_tool_call_requests: Optional[List[ToolCallRequest]] = None
+                    summary = self.summarize(include_summaries=False)
+                    tool_notice = self._format_tool_limit_notice()
+                    summary_messages = summary.get("summary", "")
-        accumulated_context_tokens = (
-            0  # This tracks cumulative context tokens, not API usage tokens
-        )
+                    if tool_notice:
+                        summary_messages += "\n\n" + tool_notice
-        # Initialize token usage tracker
-        step_token_usage = self._create_token_usage_tracker()
-        iteration_count: int = 0
-        prev_num_openai_messages: int = 0
+                    self._update_memory_with_summary(
+                        summary_messages, include_summaries=False
+                    )
+                    self._last_token_limit_tool_signature = tool_signature
+                    return self._step_impl(input_message, response_format)
-        while True:
-            if self.pause_event is not None and not self.pause_event.is_set():
-                while not self.pause_event.is_set():
-                    time.sleep(0.001)
+                raise
-            try:
-                openai_messages, num_tokens = self.memory.get_context()
-                accumulated_context_tokens += num_tokens
-            except RuntimeError as e:
-                return self._step_terminate(
-                    e.args[1], tool_call_records, "max_tokens_exceeded"
-                )
-            # Get response from model backend
-            response = self._get_model_response(
-                openai_messages,
-                num_tokens=num_tokens,
-                current_iteration=iteration_count,
-                response_format=response_format,
-                tool_schemas=self._get_full_tool_schemas(),
-                prev_num_openai_messages=prev_num_openai_messages,
-            )
             prev_num_openai_messages = len(openai_messages)
             iteration_count += 1
@@ -1444,7 +2817,7 @@ class ChatAgent(BaseAgent):
             if self.stop_event and self.stop_event.is_set():
                 # Use the _step_terminate to terminate the agent with reason
                 logger.info(
-                    f"Termination triggered at iteration " f"{iteration_count}"
+                    f"Termination triggered at iteration {iteration_count}"
                 )
                 return self._step_terminate(
                     accumulated_context_tokens,
@@ -1467,8 +2840,11 @@ class ChatAgent(BaseAgent):
                             self.pause_event is not None
                             and not self.pause_event.is_set()
                         ):
-                            while not self.pause_event.is_set():
-                                time.sleep(0.001)
+                            if isinstance(self.pause_event, threading.Event):
+                                self.pause_event.wait()
+                            else:
+                                while not self.pause_event.is_set():
+                                    time.sleep(0.001)
                         result = self._execute_tool(tool_call_request)
                         tool_call_records.append(result)
@@ -1544,6 +2920,10 @@ class ChatAgent(BaseAgent):
                 True, returns an AsyncStreamingChatAgentResponse that can be
                 awaited for the final result or async iterated for streaming
                 updates.
+        Raises:
+            asyncio.TimeoutError: If the step operation exceeds the configured
+                timeout.
         """
         try:
@@ -1559,9 +2939,22 @@ class ChatAgent(BaseAgent):
             async_generator = self._astream(input_message, response_format)
             return AsyncStreamingChatAgentResponse(async_generator)
         else:
-            return await self._astep_non_streaming_task(
-                input_message, response_format
-            )
+            if self.step_timeout is not None:
+                try:
+                    return await asyncio.wait_for(
+                        self._astep_non_streaming_task(
+                            input_message, response_format
+                        ),
+                        timeout=self.step_timeout,
+                    )
+                except asyncio.TimeoutError:
+                    raise asyncio.TimeoutError(
+                        f"Async step timed out after {self.step_timeout}s"
+                    )
+            else:
+                return await self._astep_non_streaming_task(
+                    input_message, response_format
+                )
     async def _astep_non_streaming_task(
         self,
@@ -1577,6 +2970,10 @@ class ChatAgent(BaseAgent):
         except ImportError:
             pass  # Langfuse not available
+        # Check if this call is from a RegisteredAgentToolkit to prevent tool
+        # use
+        disable_tools = self._is_called_from_registered_toolkit()
         # Handle response format compatibility with non-strict tools
         original_response_format = response_format
         input_message, response_format, used_prompt_formatting = (
@@ -1602,25 +2999,139 @@ class ChatAgent(BaseAgent):
         step_token_usage = self._create_token_usage_tracker()
         iteration_count: int = 0
         prev_num_openai_messages: int = 0
         while True:
             if self.pause_event is not None and not self.pause_event.is_set():
-                await self.pause_event.wait()
+                if isinstance(self.pause_event, asyncio.Event):
+                    await self.pause_event.wait()
+                elif isinstance(self.pause_event, threading.Event):
+                    # For threading.Event in async context, run in executor
+                    loop = asyncio.get_event_loop()
+                    await loop.run_in_executor(None, self.pause_event.wait)
             try:
                 openai_messages, num_tokens = self.memory.get_context()
+                if self.summarize_threshold is not None:
+                    threshold = self._calculate_next_summary_threshold()
+                    summary_token_count = self._summary_token_count
+                    token_limit = self.model_backend.token_limit
+                    if num_tokens <= token_limit:
+                        if (
+                            summary_token_count
+                            > token_limit * self.summary_window_ratio
+                        ):
+                            logger.info(
+                                f"Summary tokens ({summary_token_count}) "
+                                f"exceed limit, full compression."
+                            )
+                            # Summarize everything (including summaries)
+                            summary = await self.asummarize(
+                                include_summaries=True
+                            )
+                            self._update_memory_with_summary(
+                                summary.get("summary", ""),
+                                include_summaries=True,
+                            )
+                        elif num_tokens > threshold:
+                            logger.info(
+                                f"Token count ({num_tokens}) exceed threshold "
+                                "({threshold}). Triggering summarization."
+                            )
+                            # Only summarize non-summary content
+                            summary = await self.asummarize(
+                                include_summaries=False
+                            )
+                            self._update_memory_with_summary(
+                                summary.get("summary", ""),
+                                include_summaries=False,
+                            )
                 accumulated_context_tokens += num_tokens
             except RuntimeError as e:
                 return self._step_terminate(
                     e.args[1], tool_call_records, "max_tokens_exceeded"
                 )
+            # Get response from model backend with token limit error handling
+            try:
+                response = await self._aget_model_response(
+                    openai_messages,
+                    num_tokens=num_tokens,
+                    current_iteration=iteration_count,
+                    response_format=response_format,
+                    tool_schemas=[]
+                    if disable_tools
+                    else self._get_full_tool_schemas(),
+                    prev_num_openai_messages=prev_num_openai_messages,
+                )
+            except Exception as exc:
+                logger.exception("Model error: %s", exc)
+                if self._is_token_limit_error(exc):
+                    tool_signature = self._last_tool_call_signature
+                    if (
+                        tool_signature is not None
+                        and tool_signature
+                        == self._last_token_limit_tool_signature
+                    ):
+                        description = self._describe_tool_call(
+                            self._last_tool_call_record
+                        )
+                        repeated_msg = (
+                            "Context exceeded again by the same tool call."
+                        )
+                        if description:
+                            repeated_msg += f" {description}"
+                        raise RuntimeError(repeated_msg) from exc
+                    user_message_count = sum(
+                        1
+                        for msg in openai_messages
+                        if getattr(msg, "role", None) == "user"
+                    )
+                    if (
+                        user_message_count == 1
+                        and getattr(openai_messages[-1], "role", None)
+                        == "user"
+                    ):
+                        raise RuntimeError(
+                            "The provided user input alone exceeds the"
+                            "context window. Please shorten the input."
+                        ) from exc
+                    logger.warning(
+                        "Token limit exceeded error detected. "
+                        "Summarizing context."
+                    )
+                    recent_records: List[ContextRecord]
+                    try:
+                        recent_records = self.memory.retrieve()
+                    except Exception:  # pragma: no cover - defensive guard
+                        recent_records = []
+                    indices_to_remove = (
+                        self._find_indices_to_remove_for_last_tool_pair(
+                            recent_records
+                        )
+                    )
+                    self.memory.remove_records_by_indices(indices_to_remove)
+                    summary = await self.asummarize()
+                    tool_notice = self._format_tool_limit_notice()
+                    summary_messages = summary.get("summary", "")
+                    if tool_notice:
+                        summary_messages += "\n\n" + tool_notice
+                    self._update_memory_with_summary(
+                        summary_messages, include_summaries=False
+                    )
+                    self._last_token_limit_tool_signature = tool_signature
+                    return await self._astep_non_streaming_task(
+                        input_message, response_format
+                    )
+                raise
-            response = await self._aget_model_response(
-                openai_messages,
-                num_tokens=num_tokens,
-                current_iteration=iteration_count,
-                response_format=response_format,
-                tool_schemas=self._get_full_tool_schemas(),
-                prev_num_openai_messages=prev_num_openai_messages,
-            )
             prev_num_openai_messages = len(openai_messages)
             iteration_count += 1
@@ -1633,7 +3144,7 @@ class ChatAgent(BaseAgent):
             if self.stop_event and self.stop_event.is_set():
                 # Use the _step_terminate to terminate the agent with reason
                 logger.info(
-                    f"Termination triggered at iteration " f"{iteration_count}"
+                    f"Termination triggered at iteration {iteration_count}"
                 )
                 return self._step_terminate(
                     accumulated_context_tokens,
@@ -1656,7 +3167,13 @@ class ChatAgent(BaseAgent):
                             self.pause_event is not None
                             and not self.pause_event.is_set()
                         ):
-                            await self.pause_event.wait()
+                            if isinstance(self.pause_event, asyncio.Event):
+                                await self.pause_event.wait()
+                            elif isinstance(self.pause_event, threading.Event):
+                                loop = asyncio.get_event_loop()
+                                await loop.run_in_executor(
+                                    None, self.pause_event.wait
+                                )
                         tool_call_record = await self._aexecute_tool(
                             tool_call_request
                         )
@@ -1691,6 +3208,8 @@ class ChatAgent(BaseAgent):
         if self.prune_tool_calls_from_memory and tool_call_records:
             self.memory.clean_tool_calls()
+        self._last_token_limit_user_signature = None
         return self._convert_to_chatagent_response(
             response,
             tool_call_records,
@@ -1776,64 +3295,62 @@ class ChatAgent(BaseAgent):
         tool_schemas: Optional[List[Dict[str, Any]]] = None,
         prev_num_openai_messages: int = 0,
     ) -> ModelResponse:
-        r"""Internal function for agent step model response.
-        Args:
-            openai_messages (List[OpenAIMessage]): The OpenAI
-                messages to process.
-            num_tokens (int): The number of tokens in the context.
-            current_iteration (int): The current iteration of the step.
-            response_format (Optional[Type[BaseModel]]): The response
-                format to use.
-            tool_schemas (Optional[List[Dict[str, Any]]]): The tool
-                schemas to use.
-            prev_num_openai_messages (int): The number of openai messages
-                logged in the previous iteration.
-        Returns:
-            ModelResponse: The model response.
-        """
+        r"""Internal function for agent step model response."""
+        last_error = None
-        response = None
-        try:
-            response = self.model_backend.run(
-                openai_messages, response_format, tool_schemas or None
-            )
-        except Exception as exc:
-            logger.error(
-                f"An error occurred while running model "
-                f"{self.model_backend.model_type}, "
-                f"index: {self.model_backend.current_model_index}",
-                exc_info=exc,
-            )
-            error_info = str(exc)
-        if not response and self.model_backend.num_models > 1:
-            raise ModelProcessingError(
-                "Unable to process messages: none of the provided models "
-                "run successfully."
-            )
-        elif not response:
+        for attempt in range(self.retry_attempts):
+            try:
+                response = self.model_backend.run(
+                    openai_messages, response_format, tool_schemas or None
+                )
+                if response:
+                    break
+            except RateLimitError as e:
+                if self._is_token_limit_error(e):
+                    raise
+                last_error = e
+                if attempt < self.retry_attempts - 1:
+                    delay = min(self.retry_delay * (2**attempt), 60.0)
+                    delay = random.uniform(0, delay)  # Add jitter
+                    logger.warning(
+                        f"Rate limit hit (attempt {attempt + 1}"
+                        f"/{self.retry_attempts}). Retrying in {delay:.1f}s"
+                    )
+                    time.sleep(delay)
+                else:
+                    logger.error(
+                        f"Rate limit exhausted after "
+                        f"{self.retry_attempts} attempts"
+                    )
+            except Exception:
+                logger.error(
+                    f"Model error: {self.model_backend.model_type}",
+                )
+                raise
+        else:
+            # Loop completed without success
             raise ModelProcessingError(
-                f"Unable to process messages: the only provided model "
-                f"did not run successfully. Error: {error_info}"
+                f"Unable to process messages: "
+                f"{str(last_error) if last_error else 'Unknown error'}"
             )
-        sanitized_messages = self._sanitize_messages_for_logging(
+        # Log success
+        sanitized = self._sanitize_messages_for_logging(
             openai_messages, prev_num_openai_messages
         )
         logger.info(
-            f"Model {self.model_backend.model_type}, "
-            f"index {self.model_backend.current_model_index}, "
-            f"iteration {current_iteration}, "
-            f"processed these messages: {sanitized_messages}"
+            f"Model {self.model_backend.model_type} "
+            f"[{current_iteration}]: {sanitized}"
         )
         if not isinstance(response, ChatCompletion):
             raise TypeError(
-                f"Expected response to be a `ChatCompletion` object, but "
-                f"got {type(response).__name__} instead."
+                f"Expected ChatCompletion, got {type(response).__name__}"
             )
         return self._handle_batch_response(response)
+    @observe()
     async def _aget_model_response(
         self,
         openai_messages: List[OpenAIMessage],
@@ -1843,62 +3360,61 @@ class ChatAgent(BaseAgent):
         tool_schemas: Optional[List[Dict[str, Any]]] = None,
         prev_num_openai_messages: int = 0,
     ) -> ModelResponse:
-        r"""Internal function for agent async step model response.
-        Args:
-            openai_messages (List[OpenAIMessage]): The OpenAI messages
-                to process.
-            num_tokens (int): The number of tokens in the context.
-            current_iteration (int): The current iteration of the step.
-            response_format (Optional[Type[BaseModel]]): The response
-                format to use.
-            tool_schemas (Optional[List[Dict[str, Any]]]): The tool schemas
-                to use.
-            prev_num_openai_messages (int): The number of openai messages
-                logged in the previous iteration.
-        Returns:
-            ModelResponse: The model response.
-        """
-        response = None
-        try:
-            response = await self.model_backend.arun(
-                openai_messages, response_format, tool_schemas or None
-            )
-        except Exception as exc:
-            logger.error(
-                f"An error occurred while running model "
-                f"{self.model_backend.model_type}, "
-                f"index: {self.model_backend.current_model_index}",
-                exc_info=exc,
-            )
-            error_info = str(exc)
+        r"""Internal function for agent async step model response."""
+        last_error = None
-        if not response and self.model_backend.num_models > 1:
-            raise ModelProcessingError(
-                "Unable to process messages: none of the provided models "
-                "run successfully."
-            )
-        elif not response:
+        for attempt in range(self.retry_attempts):
+            try:
+                response = await self.model_backend.arun(
+                    openai_messages, response_format, tool_schemas or None
+                )
+                if response:
+                    break
+            except RateLimitError as e:
+                if self._is_token_limit_error(e):
+                    raise
+                last_error = e
+                if attempt < self.retry_attempts - 1:
+                    delay = min(self.retry_delay * (2**attempt), 60.0)
+                    delay = random.uniform(0, delay)  # Add jitter
+                    logger.warning(
+                        f"Rate limit hit (attempt {attempt + 1}"
+                        f"/{self.retry_attempts}). "
+                        f"Retrying in {delay:.1f}s"
+                    )
+                    await asyncio.sleep(delay)
+                else:
+                    logger.error(
+                        f"Rate limit exhausted after "
+                        f"{self.retry_attempts} attempts"
+                    )
+            except Exception:
+                logger.error(
+                    f"Model error: {self.model_backend.model_type}",
+                    exc_info=True,
+                )
+                raise
+        else:
+            # Loop completed without success
             raise ModelProcessingError(
-                f"Unable to process messages: the only provided model "
-                f"did not run successfully. Error: {error_info}"
+                f"Unable to process messages: "
+                f"{str(last_error) if last_error else 'Unknown error'}"
             )
-        sanitized_messages = self._sanitize_messages_for_logging(
+        # Log success
+        sanitized = self._sanitize_messages_for_logging(
             openai_messages, prev_num_openai_messages
         )
         logger.info(
-            f"Model {self.model_backend.model_type}, "
-            f"index {self.model_backend.current_model_index}, "
-            f"iteration {current_iteration}, "
-            f"processed these messages: {sanitized_messages}"
+            f"Model {self.model_backend.model_type} "
+            f"[{current_iteration}]: {sanitized}"
         )
         if not isinstance(response, ChatCompletion):
             raise TypeError(
-                f"Expected response to be a `ChatCompletion` object, but "
-                f"got {type(response).__name__} instead."
+                f"Expected ChatCompletion, got {type(response).__name__}"
             )
         return self._handle_batch_response(response)
     def _sanitize_messages_for_logging(
@@ -1915,11 +3431,6 @@ class ChatAgent(BaseAgent):
         Returns:
             List[OpenAIMessage]: The sanitized OpenAI messages.
         """
-        import hashlib
-        import os
-        import re
-        import tempfile
         # Create a copy of messages for logging to avoid modifying the
         # original messages
         sanitized_messages = []
@@ -1960,7 +3471,14 @@ class ChatAgent(BaseAgent):
                                     # Save image to temp directory for viewing
                                     try:
-                                        import base64
+                                        # Sanitize img_format to prevent path
+                                        # traversal
+                                        safe_format = re.sub(
+                                            r'[^a-zA-Z0-9]', '', img_format
+                                        )[:10]
+                                        img_filename = (
+                                            f"image_{img_hash}.{safe_format}"
+                                        )
                                         temp_dir = tempfile.gettempdir()
                                         img_path = os.path.join(
@@ -1975,6 +3493,9 @@ class ChatAgent(BaseAgent):
                                                         base64_data
                                                     )
                                                 )
+                                            # Register for cleanup
+                                            with _temp_files_lock:
+                                                _temp_files.add(img_path)
                                         # Create a file:// URL that can be
                                         # opened
@@ -2148,9 +3669,9 @@ class ChatAgent(BaseAgent):
         if tool_calls := response.choices[0].message.tool_calls:
             tool_call_requests = []
             for tool_call in tool_calls:
-                tool_name = tool_call.function.name
+                tool_name = tool_call.function.name  # type: ignore[union-attr]
                 tool_call_id = tool_call.id
-                args = json.loads(tool_call.function.arguments)
+                args = json.loads(tool_call.function.arguments)  # type: ignore[union-attr]
                 tool_call_request = ToolCallRequest(
                     tool_name=tool_name, args=args, tool_call_id=tool_call_id
                 )
@@ -2227,7 +3748,8 @@ class ChatAgent(BaseAgent):
         try:
             raw_result = tool(**args)
             if self.mask_tool_output:
-                self._secure_result_store[tool_call_id] = raw_result
+                with self._secure_result_store_lock:
+                    self._secure_result_store[tool_call_id] = raw_result
                 result = (
                     "[The tool has been executed successfully, but the output"
                     " from the tool is masked. You can move forward]"
@@ -2285,7 +3807,7 @@ class ChatAgent(BaseAgent):
             # Capture the error message to prevent framework crash
             error_msg = f"Error executing async tool '{func_name}': {e!s}"
             result = f"Tool execution failed: {error_msg}"
-            logging.warning(error_msg)
+            logger.warning(error_msg)
         return self._record_tool_calling(func_name, args, result, tool_call_id)
     def _record_tool_calling(
@@ -2336,22 +3858,34 @@ class ChatAgent(BaseAgent):
         # This ensures the assistant message (tool call) always appears before
         # the function message (tool result) in the conversation context
         # Use time.time_ns() for nanosecond precision to avoid collisions
-        import time
         current_time_ns = time.time_ns()
         base_timestamp = current_time_ns / 1_000_000_000  # Convert to seconds
         self.update_memory(
-            assist_msg, OpenAIBackendRole.ASSISTANT, timestamp=base_timestamp
+            assist_msg,
+            OpenAIBackendRole.ASSISTANT,
+            timestamp=base_timestamp,
+            return_records=self._enable_snapshot_clean,
         )
         # Add minimal increment to ensure function message comes after
-        self.update_memory(
+        func_records = self.update_memory(
             func_msg,
             OpenAIBackendRole.FUNCTION,
             timestamp=base_timestamp + 1e-6,
+            return_records=self._enable_snapshot_clean,
         )
+        # Register tool output for snapshot cleaning if enabled
+        if self._enable_snapshot_clean and not mask_output and func_records:
+            serialized_result = self._serialize_tool_result(result)
+            self._register_tool_output_for_cache(
+                func_name,
+                tool_call_id,
+                serialized_result,
+                cast(List[MemoryRecord], func_records),
+            )
         # Record information about this tool call
         tool_record = ToolCallingRecord(
             tool_name=func_name,
@@ -2360,6 +3894,7 @@ class ChatAgent(BaseAgent):
             tool_call_id=tool_call_id,
         )
+        self._update_last_tool_call_state(tool_record)
         return tool_record
     def _stream(
@@ -2428,7 +3963,7 @@ class ChatAgent(BaseAgent):
             # Check termination condition
             if self.stop_event and self.stop_event.is_set():
                 logger.info(
-                    f"Termination triggered at iteration " f"{iteration_count}"
+                    f"Termination triggered at iteration {iteration_count}"
                 )
                 yield self._step_terminate(
                     num_tokens, tool_call_records, "termination_triggered"
@@ -2611,12 +4146,6 @@ class ChatAgent(BaseAgent):
         stream_completed = False
         for chunk in stream:
-            # Update token usage if available
-            if chunk.usage:
-                self._update_token_usage_tracker(
-                    step_token_usage, safe_model_dump(chunk.usage)
-                )
             # Process chunk delta
             if chunk.choices and len(chunk.choices) > 0:
                 choice = chunk.choices[0]
@@ -2649,12 +4178,6 @@ class ChatAgent(BaseAgent):
                     # If we have complete tool calls, execute them with
                     # sync status updates
                     if accumulated_tool_calls:
-                        # Record assistant message with tool calls first
-                        self._record_assistant_tool_calls_message(
-                            accumulated_tool_calls,
-                            content_accumulator.get_full_content(),
-                        )
                         # Execute tools synchronously with
                         # optimized status updates
                         for (
@@ -2687,7 +4210,49 @@ class ChatAgent(BaseAgent):
                             )
                         self.record_message(final_message)
-                    break
+            elif chunk.usage and not chunk.choices:
+                # Handle final chunk with usage but empty choices
+                # This happens when stream_options={"include_usage": True}
+                # Update the final usage from this chunk
+                self._update_token_usage_tracker(
+                    step_token_usage, safe_model_dump(chunk.usage)
+                )
+                # Create final response with final usage
+                final_content = content_accumulator.get_full_content()
+                if final_content.strip():
+                    final_message = BaseMessage(
+                        role_name=self.role_name,
+                        role_type=self.role_type,
+                        meta_dict={},
+                        content=final_content,
+                    )
+                    if response_format:
+                        self._try_format_message(
+                            final_message, response_format
+                        )
+                    # Create final response with final usage (not partial)
+                    final_response = ChatAgentResponse(
+                        msgs=[final_message],
+                        terminated=False,
+                        info={
+                            "id": getattr(chunk, 'id', ''),
+                            "usage": step_token_usage.copy(),
+                            "finish_reasons": ["stop"],
+                            "num_tokens": self._get_token_count(final_content),
+                            "tool_calls": tool_call_records or [],
+                            "external_tool_requests": None,
+                            "streaming": False,
+                            "partial": False,
+                        },
+                    )
+                    yield final_response
+                break
+            elif stream_completed:
+                # If we've already seen finish_reason but no usage chunk, exit
+                break
         return stream_completed, tool_calls_complete
@@ -2767,77 +4332,70 @@ class ChatAgent(BaseAgent):
         accumulated_tool_calls: Dict[str, Any],
         tool_call_records: List[ToolCallingRecord],
     ) -> Generator[ChatAgentResponse, None, None]:
-        r"""Execute multiple tools synchronously with
-        proper content accumulation, using threads+queue for
-        non-blocking status streaming."""
-        def tool_worker(result_queue, tool_call_data):
-            try:
-                tool_call_record = self._execute_tool_from_stream_data(
-                    tool_call_data
-                )
-                result_queue.put(tool_call_record)
-            except Exception as e:
-                logger.error(f"Error in threaded tool execution: {e}")
-                result_queue.put(None)
+        r"""Execute multiple tools synchronously with proper content
+        accumulation, using ThreadPoolExecutor for better timeout handling."""
         tool_calls_to_execute = []
         for _tool_call_index, tool_call_data in accumulated_tool_calls.items():
             if tool_call_data.get('complete', False):
                 tool_calls_to_execute.append(tool_call_data)
-        # Phase 2: Execute tools in threads and yield status while waiting
-        for tool_call_data in tool_calls_to_execute:
-            function_name = tool_call_data['function']['name']
-            try:
-                args = json.loads(tool_call_data['function']['arguments'])
-            except json.JSONDecodeError:
-                args = tool_call_data['function']['arguments']
-            result_queue: queue.Queue[Optional[ToolCallingRecord]] = (
-                queue.Queue()
-            )
-            thread = threading.Thread(
-                target=tool_worker,
-                args=(
-                    self._internal_tools[function_name],
-                    args,
-                    result_queue,
-                    tool_call_data,
-                ),
-            )
-            thread.start()
-            # Log debug info instead of adding to content
-            logger.info(
-                f"Calling function: {function_name} with arguments: {args}"
-            )
-            # wait for tool thread to finish with optional timeout
-            thread.join(self.tool_execution_timeout)
+        if not tool_calls_to_execute:
+            # No tools to execute, return immediately
+            return
+            yield  # Make this a generator
+        # Execute tools using ThreadPoolExecutor for proper timeout handling
+        # Use max_workers=len() for parallel execution, with min of 1
+        with concurrent.futures.ThreadPoolExecutor(
+            max_workers=max(1, len(tool_calls_to_execute))
+        ) as executor:
+            # Submit all tools first (parallel execution)
+            futures_map = {}
+            for tool_call_data in tool_calls_to_execute:
+                function_name = tool_call_data['function']['name']
+                try:
+                    args = json.loads(tool_call_data['function']['arguments'])
+                except json.JSONDecodeError:
+                    args = tool_call_data['function']['arguments']
-            # If timeout occurred, mark as error and continue
-            if thread.is_alive():
-                # Log timeout info instead of adding to content
-                logger.warning(
-                    f"Function '{function_name}' timed out after "
-                    f"{self.tool_execution_timeout} seconds"
+                # Log debug info
+                logger.info(
+                    f"Calling function: {function_name} with arguments: {args}"
                 )
-                # Detach thread (it may still finish later). Skip recording.
-                continue
-            # Tool finished, get result
-            tool_call_record = result_queue.get()
-            if tool_call_record:
-                tool_call_records.append(tool_call_record)
-                raw_result = tool_call_record.result
-                result_str = str(raw_result)
+                # Submit tool execution (non-blocking)
+                future = executor.submit(
+                    self._execute_tool_from_stream_data, tool_call_data
+                )
+                futures_map[future] = (function_name, tool_call_data)
+            # Wait for all futures to complete (or timeout)
+            for future in concurrent.futures.as_completed(
+                futures_map.keys(),
+                timeout=self.tool_execution_timeout
+                if self.tool_execution_timeout
+                else None,
+            ):
+                function_name, tool_call_data = futures_map[future]
-                # Log debug info instead of adding to content
-                logger.info(f"Function output: {result_str}")
-            else:
-                # Error already logged
-                continue
+                try:
+                    tool_call_record = future.result()
+                    if tool_call_record:
+                        tool_call_records.append(tool_call_record)
+                        logger.info(
+                            f"Function output: {tool_call_record.result}"
+                        )
+                except concurrent.futures.TimeoutError:
+                    logger.warning(
+                        f"Function '{function_name}' timed out after "
+                        f"{self.tool_execution_timeout} seconds"
+                    )
+                    future.cancel()
+                except Exception as e:
+                    logger.error(
+                        f"Error executing tool '{function_name}': {e}"
+                    )
         # Ensure this function remains a generator (required by type signature)
         return
@@ -2857,10 +4415,19 @@ class ChatAgent(BaseAgent):
                 tool = self._internal_tools[function_name]
                 try:
                     result = tool(**args)
+                    # First, create and record the assistant message with tool
+                    # call
+                    assist_msg = FunctionCallingMessage(
+                        role_name=self.role_name,
+                        role_type=self.role_type,
+                        meta_dict=None,
+                        content="",
+                        func_name=function_name,
+                        args=args,
+                        tool_call_id=tool_call_id,
+                    )
-                    # Only record the tool response message, not the assistant
-                    # message assistant message with tool_calls was already
-                    # recorded in _record_assistant_tool_calls_message
+                    # Then create the tool response message
                     func_msg = FunctionCallingMessage(
                         role_name=self.role_name,
                         role_type=self.role_type,
@@ -2871,21 +4438,39 @@ class ChatAgent(BaseAgent):
                         tool_call_id=tool_call_id,
                     )
-                    self.update_memory(func_msg, OpenAIBackendRole.FUNCTION)
+                    # Record both messages with precise timestamps to ensure
+                    # correct ordering
+                    current_time_ns = time.time_ns()
+                    base_timestamp = (
+                        current_time_ns / 1_000_000_000
+                    )  # Convert to seconds
+                    self.update_memory(
+                        assist_msg,
+                        OpenAIBackendRole.ASSISTANT,
+                        timestamp=base_timestamp,
+                    )
+                    self.update_memory(
+                        func_msg,
+                        OpenAIBackendRole.FUNCTION,
+                        timestamp=base_timestamp + 1e-6,
+                    )
-                    return ToolCallingRecord(
+                    tool_record = ToolCallingRecord(
                         tool_name=function_name,
                         args=args,
                         result=result,
                         tool_call_id=tool_call_id,
                     )
+                    self._update_last_tool_call_state(tool_record)
+                    return tool_record
                 except Exception as e:
                     error_msg = (
                         f"Error executing tool '{function_name}': {e!s}"
                     )
                     result = {"error": error_msg}
-                    logging.warning(error_msg)
+                    logger.warning(error_msg)
                     # Record error response
                     func_msg = FunctionCallingMessage(
@@ -2900,12 +4485,14 @@ class ChatAgent(BaseAgent):
                     self.update_memory(func_msg, OpenAIBackendRole.FUNCTION)
-                    return ToolCallingRecord(
+                    tool_record = ToolCallingRecord(
                         tool_name=function_name,
                         args=args,
                         result=result,
                         tool_call_id=tool_call_id,
                     )
+                    self._update_last_tool_call_state(tool_record)
+                    return tool_record
             else:
                 logger.warning(
                     f"Tool '{function_name}' not found in internal tools"
@@ -2927,6 +4514,23 @@ class ChatAgent(BaseAgent):
             tool_call_id = tool_call_data['id']
             if function_name in self._internal_tools:
+                # Create the tool call message
+                assist_msg = FunctionCallingMessage(
+                    role_name=self.role_name,
+                    role_type=self.role_type,
+                    meta_dict=None,
+                    content="",
+                    func_name=function_name,
+                    args=args,
+                    tool_call_id=tool_call_id,
+                )
+                assist_ts = time.time_ns() / 1_000_000_000
+                self.update_memory(
+                    assist_msg,
+                    OpenAIBackendRole.ASSISTANT,
+                    timestamp=assist_ts,
+                )
                 tool = self._internal_tools[function_name]
                 try:
                     # Try different invocation paths in order of preference
@@ -2956,9 +4560,7 @@ class ChatAgent(BaseAgent):
                         # Fallback: synchronous call
                         result = tool(**args)
-                    # Only record the tool response message, not the assistant
-                    # message assistant message with tool_calls was already
-                    # recorded in _record_assistant_tool_calls_message
+                    # Create the tool response message
                     func_msg = FunctionCallingMessage(
                         role_name=self.role_name,
                         role_type=self.role_type,
@@ -2968,22 +4570,28 @@ class ChatAgent(BaseAgent):
                         result=result,
                         tool_call_id=tool_call_id,
                     )
+                    func_ts = time.time_ns() / 1_000_000_000
+                    self.update_memory(
+                        func_msg,
+                        OpenAIBackendRole.FUNCTION,
+                        timestamp=func_ts,
+                    )
-                    self.update_memory(func_msg, OpenAIBackendRole.FUNCTION)
-                    return ToolCallingRecord(
+                    tool_record = ToolCallingRecord(
                         tool_name=function_name,
                         args=args,
                         result=result,
                         tool_call_id=tool_call_id,
                     )
+                    self._update_last_tool_call_state(tool_record)
+                    return tool_record
                 except Exception as e:
                     error_msg = (
                         f"Error executing async tool '{function_name}': {e!s}"
                     )
                     result = {"error": error_msg}
-                    logging.warning(error_msg)
+                    logger.warning(error_msg)
                     # Record error response
                     func_msg = FunctionCallingMessage(
@@ -2995,15 +4603,21 @@ class ChatAgent(BaseAgent):
                         result=result,
                         tool_call_id=tool_call_id,
                     )
+                    func_ts = time.time_ns() / 1_000_000_000
+                    self.update_memory(
+                        func_msg,
+                        OpenAIBackendRole.FUNCTION,
+                        timestamp=func_ts,
+                    )
-                    self.update_memory(func_msg, OpenAIBackendRole.FUNCTION)
-                    return ToolCallingRecord(
+                    tool_record = ToolCallingRecord(
                         tool_name=function_name,
                         args=args,
                         result=result,
                         tool_call_id=tool_call_id,
                     )
+                    self._update_last_tool_call_state(tool_record)
+                    return tool_record
             else:
                 logger.warning(
                     f"Tool '{function_name}' not found in internal tools"
@@ -3093,7 +4707,7 @@ class ChatAgent(BaseAgent):
             # Check termination condition
             if self.stop_event and self.stop_event.is_set():
                 logger.info(
-                    f"Termination triggered at iteration " f"{iteration_count}"
+                    f"Termination triggered at iteration {iteration_count}"
                 )
                 yield self._step_terminate(
                     num_tokens, tool_call_records, "termination_triggered"
@@ -3320,18 +4934,13 @@ class ChatAgent(BaseAgent):
         response_format: Optional[Type[BaseModel]] = None,
     ) -> AsyncGenerator[Union[ChatAgentResponse, Tuple[bool, bool]], None]:
         r"""Async version of process streaming chunks with
-        content accumulator."""
+        content accumulator.
+        """
         tool_calls_complete = False
         stream_completed = False
         async for chunk in stream:
-            # Update token usage if available
-            if chunk.usage:
-                self._update_token_usage_tracker(
-                    step_token_usage, safe_model_dump(chunk.usage)
-                )
             # Process chunk delta
             if chunk.choices and len(chunk.choices) > 0:
                 choice = chunk.choices[0]
@@ -3364,13 +4973,6 @@ class ChatAgent(BaseAgent):
                     # If we have complete tool calls, execute them with
                     # async status updates
                     if accumulated_tool_calls:
-                        # Record assistant message with
-                        # tool calls first
-                        self._record_assistant_tool_calls_message(
-                            accumulated_tool_calls,
-                            content_accumulator.get_full_content(),
-                        )
                         # Execute tools asynchronously with real-time
                         # status updates
                         async for (
@@ -3405,7 +5007,49 @@ class ChatAgent(BaseAgent):
                             )
                         self.record_message(final_message)
-                    break
+            elif chunk.usage and not chunk.choices:
+                # Handle final chunk with usage but empty choices
+                # This happens when stream_options={"include_usage": True}
+                # Update the final usage from this chunk
+                self._update_token_usage_tracker(
+                    step_token_usage, safe_model_dump(chunk.usage)
+                )
+                # Create final response with final usage
+                final_content = content_accumulator.get_full_content()
+                if final_content.strip():
+                    final_message = BaseMessage(
+                        role_name=self.role_name,
+                        role_type=self.role_type,
+                        meta_dict={},
+                        content=final_content,
+                    )
+                    if response_format:
+                        self._try_format_message(
+                            final_message, response_format
+                        )
+                    # Create final response with final usage (not partial)
+                    final_response = ChatAgentResponse(
+                        msgs=[final_message],
+                        terminated=False,
+                        info={
+                            "id": getattr(chunk, 'id', ''),
+                            "usage": step_token_usage.copy(),
+                            "finish_reasons": ["stop"],
+                            "num_tokens": self._get_token_count(final_content),
+                            "tool_calls": tool_call_records or [],
+                            "external_tool_requests": None,
+                            "streaming": False,
+                            "partial": False,
+                        },
+                    )
+                    yield final_response
+                break
+            elif stream_completed:
+                # If we've already seen finish_reason but no usage chunk, exit
+                break
         # Yield the final status as a tuple
         yield (stream_completed, tool_calls_complete)
@@ -3498,15 +5142,18 @@ class ChatAgent(BaseAgent):
     ) -> ChatAgentResponse:
         r"""Create a streaming response using content accumulator."""
-        # Add new content to accumulator and get full content
+        # Add new content; only build full content when needed
         accumulator.add_streaming_content(new_content)
-        full_content = accumulator.get_full_content()
+        if self.stream_accumulate:
+            message_content = accumulator.get_full_content()
+        else:
+            message_content = new_content
         message = BaseMessage(
             role_name=self.role_name,
             role_type=self.role_type,
             meta_dict={},
-            content=full_content,
+            content=message_content,
         )
         return ChatAgentResponse(
@@ -3516,7 +5163,7 @@ class ChatAgent(BaseAgent):
                 "id": response_id,
                 "usage": step_token_usage.copy(),
                 "finish_reasons": ["streaming"],
-                "num_tokens": self._get_token_count(full_content),
+                "num_tokens": self._get_token_count(message_content),
                 "tool_calls": tool_call_records or [],
                 "external_tool_requests": None,
                 "streaming": True,
@@ -3572,10 +5219,12 @@ class ChatAgent(BaseAgent):
                 configuration.
         """
         # Create a new instance with the same configuration
-        # If with_memory is True, set system_message to None
-        # If with_memory is False, use the original system message
+        # If with_memory is True, set system_message to None (it will be
+        # copied from memory below, including any workflow context)
+        # If with_memory is False, use the current system message
+        # (which may include appended workflow context)
         # To avoid duplicated system memory.
-        system_message = None if with_memory else self._original_system_message
+        system_message = None if with_memory else self._system_message
         # Clone tools and collect toolkits that need registration
         cloned_tools, toolkits_to_register = self._clone_tools()
@@ -3589,7 +5238,7 @@ class ChatAgent(BaseAgent):
                 self.memory.get_context_creator(), "token_limit", None
             ),
             output_language=self._output_language,
-            tools=cloned_tools,
+            tools=cast(List[Union[FunctionTool, Callable]], cloned_tools),
             toolkits_to_register_agent=toolkits_to_register,
             external_tools=[
                 schema for schema in self._external_tool_schemas.values()
@@ -3603,6 +5252,7 @@ class ChatAgent(BaseAgent):
             tool_execution_timeout=self.tool_execution_timeout,
             pause_event=self.pause_event,
             prune_tool_calls_from_memory=self.prune_tool_calls_from_memory,
+            stream_accumulate=self.stream_accumulate,
         )
         # Copy memory if requested
@@ -3617,9 +5267,7 @@ class ChatAgent(BaseAgent):
     def _clone_tools(
         self,
-    ) -> Tuple[
-        List[Union[FunctionTool, Callable]], List[RegisteredAgentToolkit]
-    ]:
+    ) -> Tuple[List[FunctionTool], List[RegisteredAgentToolkit]]:
         r"""Clone tools and return toolkits that need agent registration.
         This method handles stateful toolkits by cloning them if they have
@@ -3674,15 +5322,65 @@ class ChatAgent(BaseAgent):
                 # Get the method from the cloned (or original) toolkit
                 toolkit = cloned_toolkits[toolkit_id]
                 method_name = tool.func.__name__
+                # Check if toolkit was actually cloned or just reused
+                toolkit_was_cloned = toolkit is not toolkit_instance
                 if hasattr(toolkit, method_name):
                     new_method = getattr(toolkit, method_name)
-                    cloned_tools.append(new_method)
+                    # If toolkit wasn't cloned (stateless), preserve the
+                    # original function to maintain any enhancements/wrappers
+                    if not toolkit_was_cloned:
+                        # Toolkit is stateless, safe to reuse original function
+                        cloned_tools.append(
+                            FunctionTool(
+                                func=tool.func,
+                                openai_tool_schema=tool.get_openai_tool_schema(),
+                            )
+                        )
+                        continue
+                    # Toolkit was cloned, use the new method
+                    # Wrap cloned method into a new FunctionTool,
+                    # preserving schema
+                    try:
+                        new_tool = FunctionTool(
+                            func=new_method,
+                            openai_tool_schema=tool.get_openai_tool_schema(),
+                        )
+                        cloned_tools.append(new_tool)
+                    except Exception as e:
+                        # If wrapping fails, fallback to wrapping the original
+                        # function with its schema to maintain consistency
+                        logger.warning(
+                            f"Failed to wrap cloned toolkit "
+                            f"method '{method_name}' "
+                            f"with FunctionTool: {e}. Using original "
+                            f"function with preserved schema instead."
+                        )
+                        cloned_tools.append(
+                            FunctionTool(
+                                func=tool.func,
+                                openai_tool_schema=tool.get_openai_tool_schema(),
+                            )
+                        )
                 else:
-                    # Fallback to original function
-                    cloned_tools.append(tool.func)
+                    # Fallback to original function wrapped in FunctionTool
+                    cloned_tools.append(
+                        FunctionTool(
+                            func=tool.func,
+                            openai_tool_schema=tool.get_openai_tool_schema(),
+                        )
+                    )
             else:
-                # Not a toolkit method, just use the original function
-                cloned_tools.append(tool.func)
+                # Not a toolkit method, preserve FunctionTool schema directly
+                cloned_tools.append(
+                    FunctionTool(
+                        func=tool.func,
+                        openai_tool_schema=tool.get_openai_tool_schema(),
+                    )
+                )
         return cloned_tools, toolkits_to_register

camel-ai 0.2.73a4__py3-none-any.whl → 0.2.80a2__py3-none-any.whl

camel-ai 0.2.73a4py3-none-any.whl → 0.2.80a2py3-none-any.whl