PyPI - camel-ai - Versions diffs - 0.2.78__py3-none-any.whl → 0.2.79a1__py3-none-any.whl - Mend

camel-ai 0.2.78py3-none-any.whl → 0.2.79a1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of camel-ai might be problematic. Click here for more details.

Files changed (39) hide show

camel/__init__.py +1 -1
camel/agents/_utils.py +38 -0
camel/agents/chat_agent.py +1112 -287
camel/datasets/base_generator.py +39 -10
camel/environments/single_step.py +28 -3
camel/memories/__init__.py +1 -2
camel/memories/agent_memories.py +34 -0
camel/memories/base.py +26 -0
camel/memories/blocks/chat_history_block.py +117 -17
camel/memories/context_creators/score_based.py +25 -384
camel/messages/base.py +26 -0
camel/models/aws_bedrock_model.py +1 -17
camel/models/azure_openai_model.py +113 -67
camel/models/model_factory.py +17 -1
camel/models/moonshot_model.py +102 -5
camel/models/openai_compatible_model.py +62 -32
camel/models/openai_model.py +61 -35
camel/models/samba_model.py +34 -15
camel/models/sglang_model.py +41 -11
camel/societies/workforce/__init__.py +2 -0
camel/societies/workforce/events.py +122 -0
camel/societies/workforce/role_playing_worker.py +15 -11
camel/societies/workforce/single_agent_worker.py +143 -291
camel/societies/workforce/utils.py +2 -1
camel/societies/workforce/workflow_memory_manager.py +772 -0
camel/societies/workforce/workforce.py +513 -188
camel/societies/workforce/workforce_callback.py +74 -0
camel/societies/workforce/workforce_logger.py +144 -140
camel/societies/workforce/workforce_metrics.py +33 -0
camel/storages/vectordb_storages/oceanbase.py +5 -4
camel/toolkits/file_toolkit.py +166 -0
camel/toolkits/message_integration.py +15 -13
camel/toolkits/terminal_toolkit/terminal_toolkit.py +112 -79
camel/types/enums.py +1 -0
camel/utils/context_utils.py +201 -2
{camel_ai-0.2.78.dist-info → camel_ai-0.2.79a1.dist-info}/METADATA +14 -13
{camel_ai-0.2.78.dist-info → camel_ai-0.2.79a1.dist-info}/RECORD +39 -35
{camel_ai-0.2.78.dist-info → camel_ai-0.2.79a1.dist-info}/WHEEL +0 -0
{camel_ai-0.2.78.dist-info → camel_ai-0.2.79a1.dist-info}/licenses/LICENSE +0 -0

camel/agents/chat_agent.py CHANGED Viewed

@@ -20,7 +20,6 @@ import concurrent.futures
 import hashlib
 import inspect
 import json
-import math
 import os
 import random
 import re
@@ -57,6 +56,7 @@ from pydantic import BaseModel, ValidationError
 from camel.agents._types import ModelResponse, ToolCallRequest
 from camel.agents._utils import (
+    build_default_summary_prompt,
     convert_to_function_tool,
     convert_to_schema,
     get_info_dict,
@@ -68,10 +68,10 @@ from camel.logger import get_logger
 from camel.memories import (
     AgentMemory,
     ChatHistoryMemory,
+    ContextRecord,
     MemoryRecord,
     ScoreBasedContextCreator,
 )
-from camel.memories.blocks.chat_history_block import EmptyMemoryWarning
 from camel.messages import (
     BaseMessage,
     FunctionCallingMessage,
@@ -103,6 +103,16 @@ from camel.utils import (
 from camel.utils.commons import dependencies_required
 from camel.utils.context_utils import ContextUtility
+TOKEN_LIMIT_ERROR_MARKERS = (
+    "context_length_exceeded",
+    "prompt is too long",
+    "exceeded your current quota",
+    "tokens must be reduced",
+    "context length",
+    "token count",
+    "context limit",
+)
 if TYPE_CHECKING:
     from camel.terminators import ResponseTerminator
@@ -355,9 +365,9 @@ class ChatAgent(BaseAgent):
         message_window_size (int, optional): The maximum number of previous
             messages to include in the context window. If `None`, no windowing
             is performed. (default: :obj:`None`)
-        token_limit (int, optional): The maximum number of tokens in a context.
-            The context will be automatically pruned to fulfill the limitation.
-            If `None`, it will be set according to the backend model.
+        summarize_threshold (int, optional): The percentage of the context
+            window that triggers summarization. If `None`, will trigger
+            summarization when the context window is full.
             (default: :obj:`None`)
         output_language (str, optional): The language to be output by the
             agent. (default: :obj:`None`)
@@ -415,6 +425,10 @@ class ChatAgent(BaseAgent):
             updates return accumulated content (current behavior). When False,
             partial updates return only the incremental delta. (default:
             :obj:`True`)
+        summary_window_ratio (float, optional): Maximum fraction of the total
+            context window that can be occupied by summary information. Used
+            to limit how much of the model's context is reserved for
+            summarization results. (default: :obj:`0.6`)
     """
     def __init__(
@@ -437,6 +451,7 @@ class ChatAgent(BaseAgent):
         ] = None,
         memory: Optional[AgentMemory] = None,
         message_window_size: Optional[int] = None,
+        summarize_threshold: Optional[int] = 50,
         token_limit: Optional[int] = None,
         output_language: Optional[str] = None,
         tools: Optional[List[Union[FunctionTool, Callable]]] = None,
@@ -459,6 +474,7 @@ class ChatAgent(BaseAgent):
         retry_delay: float = 1.0,
         step_timeout: Optional[float] = None,
         stream_accumulate: bool = True,
+        summary_window_ratio: float = 0.6,
     ) -> None:
         if isinstance(model, ModelManager):
             self.model_backend = model
@@ -477,7 +493,7 @@ class ChatAgent(BaseAgent):
         # Set up memory
         context_creator = ScoreBasedContextCreator(
             self.model_backend.token_counter,
-            token_limit or self.model_backend.token_limit,
+            self.model_backend.token_limit,
         )
         self._memory: AgentMemory = memory or ChatHistoryMemory(
@@ -492,9 +508,7 @@ class ChatAgent(BaseAgent):
         # Set up system message and initialize messages
         self._original_system_message = (
-            BaseMessage.make_assistant_message(
-                role_name="Assistant", content=system_message
-            )
+            BaseMessage.make_system_message(system_message)
             if isinstance(system_message, str)
             else system_message
         )
@@ -504,6 +518,21 @@ class ChatAgent(BaseAgent):
         )
         self.init_messages()
+        # Set up summarize threshold with validation
+        if summarize_threshold is not None:
+            if not (0 < summarize_threshold <= 100):
+                raise ValueError(
+                    f"summarize_threshold must be between 0 and 100, "
+                    f"got {summarize_threshold}"
+                )
+            logger.info(
+                f"Automatic context compression is enabled. Will trigger "
+                f"summarization when context window exceeds "
+                f"{summarize_threshold}% of the total token limit."
+            )
+        self.summarize_threshold = summarize_threshold
+        self._reset_summary_state()
         # Set up role name and role type
         self.role_name: str = (
             getattr(self.system_message, "role_name", None) or "assistant"
@@ -551,11 +580,16 @@ class ChatAgent(BaseAgent):
         self._context_utility: Optional[ContextUtility] = None
         self._context_summary_agent: Optional["ChatAgent"] = None
         self.stream_accumulate = stream_accumulate
+        self._last_tool_call_record: Optional[ToolCallingRecord] = None
+        self._last_tool_call_signature: Optional[str] = None
+        self._last_token_limit_tool_signature: Optional[str] = None
+        self.summary_window_ratio = summary_window_ratio
     def reset(self):
         r"""Resets the :obj:`ChatAgent` to its initial state."""
         self.terminated = False
         self.init_messages()
+        self._reset_summary_state()
         for terminator in self.response_terminators:
             terminator.reset()
@@ -762,6 +796,329 @@ class ChatAgent(BaseAgent):
             for func_tool in self._internal_tools.values()
         ]
+    @staticmethod
+    def _is_token_limit_error(error: Exception) -> bool:
+        r"""Return True when the exception message indicates a token limit."""
+        error_message = str(error).lower()
+        return any(
+            marker in error_message for marker in TOKEN_LIMIT_ERROR_MARKERS
+        )
+    @staticmethod
+    def _is_tool_related_record(record: MemoryRecord) -> bool:
+        r"""Determine whether the given memory record
+        belongs to a tool call."""
+        if record.role_at_backend in {
+            OpenAIBackendRole.TOOL,
+            OpenAIBackendRole.FUNCTION,
+        }:
+            return True
+        if (
+            record.role_at_backend == OpenAIBackendRole.ASSISTANT
+            and isinstance(record.message, FunctionCallingMessage)
+        ):
+            return True
+        return False
+    def _find_indices_to_remove_for_last_tool_pair(
+        self, recent_records: List[ContextRecord]
+    ) -> List[int]:
+        """Find indices of records that should be removed to clean up the most
+        recent incomplete tool interaction pair.
+        This method identifies tool call/result pairs by tool_call_id and
+        returns the exact indices to remove, allowing non-contiguous deletions.
+        Logic:
+        - If the last record is a tool result (TOOL/FUNCTION) with a
+          tool_call_id, find the matching assistant call anywhere in history
+          and return both indices.
+        - If the last record is an assistant tool call without a result yet,
+          return just that index.
+        - For normal messages (non tool-related): remove just the last one.
+        - Fallback: If no tool_call_id is available, use heuristic (last 2 if
+          tool-related, otherwise last 1).
+        Returns:
+            List[int]: Indices to remove (may be non-contiguous).
+        """
+        if not recent_records:
+            return []
+        last_idx = len(recent_records) - 1
+        last_record = recent_records[last_idx].memory_record
+        # Case A: Last is an ASSISTANT tool call with no result yet
+        if (
+            last_record.role_at_backend == OpenAIBackendRole.ASSISTANT
+            and isinstance(last_record.message, FunctionCallingMessage)
+            and last_record.message.result is None
+        ):
+            return [last_idx]
+        # Case B: Last is TOOL/FUNCTION result, try id-based pairing
+        if last_record.role_at_backend in {
+            OpenAIBackendRole.TOOL,
+            OpenAIBackendRole.FUNCTION,
+        }:
+            tool_id = None
+            if isinstance(last_record.message, FunctionCallingMessage):
+                tool_id = last_record.message.tool_call_id
+            if tool_id:
+                for idx in range(len(recent_records) - 2, -1, -1):
+                    rec = recent_records[idx].memory_record
+                    if rec.role_at_backend != OpenAIBackendRole.ASSISTANT:
+                        continue
+                    # Check if this assistant message contains the tool_call_id
+                    matched = False
+                    # Case 1: FunctionCallingMessage (single tool call)
+                    if isinstance(rec.message, FunctionCallingMessage):
+                        if rec.message.tool_call_id == tool_id:
+                            matched = True
+                    # Case 2: BaseMessage with multiple tool_calls in meta_dict
+                    elif (
+                        hasattr(rec.message, "meta_dict")
+                        and rec.message.meta_dict
+                    ):
+                        tool_calls_list = rec.message.meta_dict.get(
+                            "tool_calls", []
+                        )
+                        if isinstance(tool_calls_list, list):
+                            for tc in tool_calls_list:
+                                if (
+                                    isinstance(tc, dict)
+                                    and tc.get("id") == tool_id
+                                ):
+                                    matched = True
+                                    break
+                    if matched:
+                        # Return both assistant call and tool result indices
+                        return [idx, last_idx]
+            # Fallback: no tool_call_id, use heuristic
+            if self._is_tool_related_record(last_record):
+                # Remove last 2 (assume they are paired)
+                return [last_idx - 1, last_idx] if last_idx > 0 else [last_idx]
+            else:
+                return [last_idx]
+        # Default: non tool-related tail => remove last one
+        return [last_idx]
+    @staticmethod
+    def _serialize_tool_args(args: Dict[str, Any]) -> str:
+        try:
+            return json.dumps(args, ensure_ascii=False, sort_keys=True)
+        except TypeError:
+            return str(args)
+    @classmethod
+    def _build_tool_signature(
+        cls, func_name: str, args: Dict[str, Any]
+    ) -> str:
+        args_repr = cls._serialize_tool_args(args)
+        return f"{func_name}:{args_repr}"
+    def _describe_tool_call(
+        self, record: Optional[ToolCallingRecord]
+    ) -> Optional[str]:
+        if record is None:
+            return None
+        args_repr = self._serialize_tool_args(record.args)
+        return f"Tool `{record.tool_name}` invoked with arguments {args_repr}."
+    def _update_last_tool_call_state(
+        self, record: Optional[ToolCallingRecord]
+    ) -> None:
+        """Track the most recent tool call and its identifying signature."""
+        self._last_tool_call_record = record
+        if record is None:
+            self._last_tool_call_signature = None
+            return
+        args = (
+            record.args
+            if isinstance(record.args, dict)
+            else {"_raw": record.args}
+        )
+        try:
+            signature = self._build_tool_signature(record.tool_name, args)
+        except Exception:  # pragma: no cover - defensive guard
+            signature = None
+        self._last_tool_call_signature = signature
+    def _format_tool_limit_notice(self) -> Optional[str]:
+        record = self._last_tool_call_record
+        description = self._describe_tool_call(record)
+        if description is None:
+            return None
+        notice_lines = [
+            "[Tool Call Causing Token Limit]",
+            description,
+        ]
+        if record is not None:
+            result = record.result
+            if isinstance(result, bytes):
+                result_repr = result.decode(errors="replace")
+            elif isinstance(result, str):
+                result_repr = result
+            else:
+                try:
+                    result_repr = json.dumps(
+                        result, ensure_ascii=False, sort_keys=True
+                    )
+                except (TypeError, ValueError):
+                    result_repr = str(result)
+            result_length = len(result_repr)
+            notice_lines.append(f"Tool result length: {result_length}")
+            if self.model_backend.token_limit != 999999999:
+                notice_lines.append(
+                    f"Token limit: {self.model_backend.token_limit}"
+                )
+        return "\n".join(notice_lines)
+    @staticmethod
+    def _append_user_messages_section(
+        summary_content: str, user_messages: List[str]
+    ) -> str:
+        section_title = "- **All User Messages**:"
+        sanitized_messages: List[str] = []
+        for msg in user_messages:
+            if not isinstance(msg, str):
+                msg = str(msg)
+            cleaned = " ".join(msg.strip().splitlines())
+            if cleaned:
+                sanitized_messages.append(cleaned)
+        bullet_block = (
+            "\n".join(f"- {m}" for m in sanitized_messages)
+            if sanitized_messages
+            else "- None noted"
+        )
+        user_section = f"{section_title}\n{bullet_block}"
+        summary_clean = summary_content.rstrip()
+        separator = "\n\n" if summary_clean else ""
+        return f"{summary_clean}{separator}{user_section}"
+    def _reset_summary_state(self) -> None:
+        self._summary_token_count = 0  # Total tokens in summary messages
+    def _calculate_next_summary_threshold(self) -> int:
+        r"""Calculate the next token threshold that should trigger
+        summarization.
+        The threshold calculation follows a progressive strategy:
+        - First time: token_limit * (summarize_threshold / 100)
+        - Subsequent times: (limit - summary_token) / 2 + summary_token
+        This ensures that as summaries accumulate, the threshold adapts
+        to maintain a reasonable balance between context and summaries.
+        Returns:
+            int: The token count threshold for next summarization.
+        """
+        token_limit = self.model_backend.token_limit
+        summary_token_count = self._summary_token_count
+        # First summarization: use the percentage threshold
+        if summary_token_count == 0:
+            threshold = int(token_limit * self.summarize_threshold / 100)
+        else:
+            # Subsequent summarizations: adaptive threshold
+            threshold = int(
+                (token_limit - summary_token_count)
+                * self.summarize_threshold
+                / 100
+                + summary_token_count
+            )
+        return threshold
+    def _update_memory_with_summary(
+        self, summary: str, include_summaries: bool = False
+    ) -> None:
+        r"""Update memory with summary result.
+        This method handles memory clearing and restoration of summaries based
+        on whether it's a progressive or full compression.
+        """
+        summary_content: str = summary
+        existing_summaries = []
+        if not include_summaries:
+            messages, _ = self.memory.get_context()
+            for msg in messages:
+                content = msg.get('content', '')
+                if isinstance(content, str) and content.startswith(
+                    '[CONTEXT_SUMMARY]'
+                ):
+                    existing_summaries.append(msg)
+        # Clear memory
+        self.clear_memory()
+        # Restore old summaries (for progressive compression)
+        for old_summary in existing_summaries:
+            content = old_summary.get('content', '')
+            if not isinstance(content, str):
+                content = str(content)
+            summary_msg = BaseMessage.make_assistant_message(
+                role_name="assistant", content=content
+            )
+            self.update_memory(summary_msg, OpenAIBackendRole.ASSISTANT)
+        # Add new summary
+        new_summary_msg = BaseMessage.make_assistant_message(
+            role_name="assistant", content=summary_content
+        )
+        self.update_memory(new_summary_msg, OpenAIBackendRole.ASSISTANT)
+        input_message = BaseMessage.make_assistant_message(
+            role_name="assistant",
+            content=(
+                "Please continue the conversation from "
+                "where we left it off without asking the user any further "
+                "questions. Continue with the last task that you were "
+                "asked to work on."
+            ),
+        )
+        self.update_memory(input_message, OpenAIBackendRole.ASSISTANT)
+        # Update token count
+        try:
+            summary_tokens = (
+                self.model_backend.token_counter.count_tokens_from_messages(
+                    [{"role": "assistant", "content": summary_content}]
+                )
+            )
+            if include_summaries:  # Full compression - reset count
+                self._summary_token_count = summary_tokens
+                logger.info(
+                    f"Full compression: Summary with {summary_tokens} tokens. "
+                    f"Total summary tokens reset to: {summary_tokens}"
+                )
+            else:  # Progressive compression - accumulate
+                self._summary_token_count += summary_tokens
+                logger.info(
+                    f"Progressive compression: New summary "
+                    f"with {summary_tokens} tokens. "
+                    f"Total summary tokens: "
+                    f"{self._summary_token_count}"
+                )
+        except Exception as e:
+            logger.warning(f"Failed to count summary tokens: {e}")
     def _get_external_tool_names(self) -> Set[str]:
         r"""Returns a set of external tool names."""
         return set(self._external_tool_schemas.keys())
@@ -823,16 +1180,6 @@ class ChatAgent(BaseAgent):
     ) -> None:
         r"""Updates the agent memory with a new message.
-        If the single *message* exceeds the model's context window, it will
-        be **automatically split into multiple smaller chunks** before being
-        written into memory. This prevents later failures in
-        `ScoreBasedContextCreator` where an over-sized message cannot fit
-        into the available token budget at all.
-        This slicing logic handles both regular text messages (in the
-        `content` field) and long tool call results (in the `result` field of
-        a `FunctionCallingMessage`).
         Args:
             message (BaseMessage): The new message to add to the stored
                 messages.
@@ -842,153 +1189,15 @@ class ChatAgent(BaseAgent):
                 (default: :obj:`None`)
                     (default: obj:`None`)
         """
-        # 1. Helper to write a record to memory
-        def _write_single_record(
-            message: BaseMessage, role: OpenAIBackendRole, timestamp: float
-        ):
-            self.memory.write_record(
-                MemoryRecord(
-                    message=message,
-                    role_at_backend=role,
-                    timestamp=timestamp,
-                    agent_id=self.agent_id,
-                )
-            )
-        base_ts = (
-            timestamp
+        record = MemoryRecord(
+            message=message,
+            role_at_backend=role,
+            timestamp=timestamp
             if timestamp is not None
-            else time.time_ns() / 1_000_000_000
-        )
-        # 2. Get token handling utilities, fallback if unavailable
-        try:
-            context_creator = self.memory.get_context_creator()
-            token_counter = context_creator.token_counter
-            token_limit = context_creator.token_limit
-        except AttributeError:
-            _write_single_record(message, role, base_ts)
-            return
-        # 3. Check if slicing is necessary
-        try:
-            current_tokens = token_counter.count_tokens_from_messages(
-                [message.to_openai_message(role)]
-            )
-            with warnings.catch_warnings():
-                warnings.filterwarnings("ignore", category=EmptyMemoryWarning)
-                _, ctx_tokens = self.memory.get_context()
-            remaining_budget = max(0, token_limit - ctx_tokens)
-            if current_tokens <= remaining_budget:
-                _write_single_record(message, role, base_ts)
-                return
-        except Exception as e:
-            logger.warning(
-                f"Token calculation failed before chunking, "
-                f"writing message as-is. Error: {e}"
-            )
-            _write_single_record(message, role, base_ts)
-            return
-        # 4. Perform slicing
-        logger.warning(
-            f"Message with {current_tokens} tokens exceeds remaining budget "
-            f"of {remaining_budget}. Slicing into smaller chunks."
+            else time.time_ns() / 1_000_000_000,  # Nanosecond precision
+            agent_id=self.agent_id,
         )
-        text_to_chunk: Optional[str] = None
-        is_function_result = False
-        if isinstance(message, FunctionCallingMessage) and isinstance(
-            message.result, str
-        ):
-            text_to_chunk = message.result
-            is_function_result = True
-        elif isinstance(message.content, str):
-            text_to_chunk = message.content
-        if not text_to_chunk or not text_to_chunk.strip():
-            _write_single_record(message, role, base_ts)
-            return
-        # Encode the entire text to get a list of all token IDs
-        try:
-            all_token_ids = token_counter.encode(text_to_chunk)
-        except Exception as e:
-            logger.error(f"Failed to encode text for chunking: {e}")
-            _write_single_record(message, role, base_ts)  # Fallback
-            return
-        if not all_token_ids:
-            _write_single_record(message, role, base_ts)  # Nothing to chunk
-            return
-        # 1.  Base chunk size: one-tenth of the smaller of (a) total token
-        # limit and (b) current remaining budget.  This prevents us from
-        # creating chunks that are guaranteed to overflow the
-        # immediate context window.
-        base_chunk_size = max(1, remaining_budget) // 10
-        # 2.  Each chunk gets a textual prefix such as:
-        #        "[chunk 3/12 of a long message]\n"
-        #     The prefix itself consumes tokens, so if we do not subtract its
-        #     length the *total* tokens of the outgoing message (prefix + body)
-        #     can exceed the intended bound.  We estimate the prefix length
-        #     with a representative example that is safely long enough for the
-        #     vast majority of cases (three-digit indices).
-        sample_prefix = "[chunk 1/1000 of a long message]\n"
-        prefix_token_len = len(token_counter.encode(sample_prefix))
-        # 3.  The real capacity for the message body is therefore the base
-        #     chunk size minus the prefix length.  Fallback to at least one
-        #     token to avoid zero or negative sizes.
-        chunk_body_limit = max(1, base_chunk_size - prefix_token_len)
-        # 4.  Calculate how many chunks we will need with this body size.
-        num_chunks = math.ceil(len(all_token_ids) / chunk_body_limit)
-        group_id = str(uuid.uuid4())
-        for i in range(num_chunks):
-            start_idx = i * chunk_body_limit
-            end_idx = start_idx + chunk_body_limit
-            chunk_token_ids = all_token_ids[start_idx:end_idx]
-            chunk_body = token_counter.decode(chunk_token_ids)
-            prefix = f"[chunk {i + 1}/{num_chunks} of a long message]\n"
-            new_body = prefix + chunk_body
-            if is_function_result and isinstance(
-                message, FunctionCallingMessage
-            ):
-                new_msg: BaseMessage = FunctionCallingMessage(
-                    role_name=message.role_name,
-                    role_type=message.role_type,
-                    meta_dict=message.meta_dict,
-                    content=message.content,
-                    func_name=message.func_name,
-                    args=message.args,
-                    result=new_body,
-                    tool_call_id=message.tool_call_id,
-                )
-            else:
-                new_msg = message.create_new_instance(new_body)
-            meta = (new_msg.meta_dict or {}).copy()
-            meta.update(
-                {
-                    "chunk_idx": i + 1,
-                    "chunk_total": num_chunks,
-                    "chunk_group_id": group_id,
-                }
-            )
-            new_msg.meta_dict = meta
-            # Increment timestamp slightly to maintain order
-            _write_single_record(new_msg, role, base_ts + i * 1e-6)
+        self.memory.write_record(record)
     def load_memory(self, memory: AgentMemory) -> None:
         r"""Load the provided memory into the agent.
@@ -1042,40 +1251,333 @@ class ChatAgent(BaseAgent):
                     f"Skipping invalid record: malformed message "
                     f"structure in {record_dict}"
                 )
-                continue
+                continue
+            try:
+                record = MemoryRecord.from_dict(record_dict)
+                self.memory.write_records([record])
+            except Exception as e:
+                logger.warning(
+                    f"Error converting record to MemoryRecord: {e}. "
+                    f"Record: {record_dict}"
+                )
+        logger.info(f"Memory loaded from {path}")
+    def save_memory(self, path: str) -> None:
+        r"""Retrieves the current conversation data from memory and writes it
+        into a JSON file using JsonStorage.
+        Args:
+            path (str): Target file path to store JSON data.
+        """
+        json_store = JsonStorage(Path(path))
+        context_records = self.memory.retrieve()
+        to_save = [cr.memory_record.to_dict() for cr in context_records]
+        json_store.save(to_save)
+        logger.info(f"Memory saved to {path}")
+    def summarize(
+        self,
+        filename: Optional[str] = None,
+        summary_prompt: Optional[str] = None,
+        response_format: Optional[Type[BaseModel]] = None,
+        working_directory: Optional[Union[str, Path]] = None,
+        include_summaries: bool = False,
+        add_user_messages: bool = True,
+    ) -> Dict[str, Any]:
+        r"""Summarize the agent's current conversation context and persist it
+        to a markdown file.
+        .. deprecated:: 0.2.80
+            Use :meth:`asummarize` for async/await support and better
+            performance in parallel summarization workflows.
+        Args:
+            filename (Optional[str]): The base filename (without extension) to
+                use for the markdown file. Defaults to a timestamped name when
+                not provided.
+            summary_prompt (Optional[str]): Custom prompt for the summarizer.
+                When omitted, a default prompt highlighting key decisions,
+                action items, and open questions is used.
+            response_format (Optional[Type[BaseModel]]): A Pydantic model
+                defining the expected structure of the response. If provided,
+                the summary will be generated as structured output and included
+                in the result.
+            include_summaries (bool): Whether to include previously generated
+                summaries in the content to be summarized. If False (default),
+                only non-summary messages will be summarized. If True, all
+                messages including previous summaries will be summarized
+                (full compression). (default: :obj:`False`)
+            working_directory (Optional[str|Path]): Optional directory to save
+                the markdown summary file. If provided, overrides the default
+                directory used by ContextUtility.
+            add_user_messages (bool): Whether add user messages to summary.
+                (default: :obj:`True`)
+        Returns:
+            Dict[str, Any]: A dictionary containing the summary text, file
+                path, status message, and optionally structured_summary if
+                response_format was provided.
+        See Also:
+            :meth:`asummarize`: Async version for non-blocking LLM calls.
+        """
+        warnings.warn(
+            "summarize() is synchronous. Consider using asummarize() "
+            "for async/await support and better performance.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        result: Dict[str, Any] = {
+            "summary": "",
+            "file_path": None,
+            "status": "",
+        }
+        try:
+            # Use external context if set, otherwise create local one
+            if self._context_utility is None:
+                if working_directory is not None:
+                    self._context_utility = ContextUtility(
+                        working_directory=str(working_directory)
+                    )
+                else:
+                    self._context_utility = ContextUtility()
+            context_util = self._context_utility
+            # Get conversation directly from agent's memory
+            messages, _ = self.memory.get_context()
+            if not messages:
+                status_message = (
+                    "No conversation context available to summarize."
+                )
+                result["status"] = status_message
+                return result
+            # Convert messages to conversation text
+            conversation_lines = []
+            user_messages: List[str] = []
+            for message in messages:
+                role = message.get('role', 'unknown')
+                content = message.get('content', '')
+                # Skip summary messages if include_summaries is False
+                if not include_summaries and isinstance(content, str):
+                    # Check if this is a summary message by looking for marker
+                    if content.startswith('[CONTEXT_SUMMARY]'):
+                        continue
+                # Handle tool call messages (assistant calling tools)
+                tool_calls = message.get('tool_calls')
+                if tool_calls and isinstance(tool_calls, (list, tuple)):
+                    for tool_call in tool_calls:
+                        # Handle both dict and object formats
+                        if isinstance(tool_call, dict):
+                            func_name = tool_call.get('function', {}).get(
+                                'name', 'unknown_tool'
+                            )
+                            func_args_str = tool_call.get('function', {}).get(
+                                'arguments', '{}'
+                            )
+                        else:
+                            # Handle object format (Pydantic or similar)
+                            func_name = getattr(
+                                getattr(tool_call, 'function', None),
+                                'name',
+                                'unknown_tool',
+                            )
+                            func_args_str = getattr(
+                                getattr(tool_call, 'function', None),
+                                'arguments',
+                                '{}',
+                            )
+                        # Parse and format arguments for readability
+                        try:
+                            import json
+                            args_dict = json.loads(func_args_str)
+                            args_formatted = ', '.join(
+                                f"{k}={v}" for k, v in args_dict.items()
+                            )
+                        except (json.JSONDecodeError, ValueError, TypeError):
+                            args_formatted = func_args_str
+                        conversation_lines.append(
+                            f"[TOOL CALL] {func_name}({args_formatted})"
+                        )
+                # Handle tool response messages
+                elif role == 'tool':
+                    tool_name = message.get('name', 'unknown_tool')
+                    if not content:
+                        content = str(message.get('content', ''))
+                    conversation_lines.append(
+                        f"[TOOL RESULT] {tool_name} → {content}"
+                    )
+                # Handle regular content messages (user/assistant/system)
+                elif content:
+                    content = str(content)
+                    if role == 'user':
+                        user_messages.append(content)
+                    conversation_lines.append(f"{role}: {content}")
+            conversation_text = "\n".join(conversation_lines).strip()
+            if not conversation_text:
+                status_message = (
+                    "Conversation context is empty; skipping summary."
+                )
+                result["status"] = status_message
+                return result
+            if self._context_summary_agent is None:
+                self._context_summary_agent = ChatAgent(
+                    system_message=(
+                        "You are a helpful assistant that summarizes "
+                        "conversations"
+                    ),
+                    model=self.model_backend,
+                    agent_id=f"{self.agent_id}_context_summarizer",
+                )
+            else:
+                self._context_summary_agent.reset()
+            if summary_prompt:
+                prompt_text = (
+                    f"{summary_prompt.rstrip()}\n\n"
+                    f"AGENT CONVERSATION TO BE SUMMARIZED:\n"
+                    f"{conversation_text}"
+                )
+            else:
+                prompt_text = build_default_summary_prompt(conversation_text)
+            try:
+                # Use structured output if response_format is provided
+                if response_format:
+                    response = self._context_summary_agent.step(
+                        prompt_text, response_format=response_format
+                    )
+                else:
+                    response = self._context_summary_agent.step(prompt_text)
+            except Exception as step_exc:
+                error_message = (
+                    f"Failed to generate summary using model: {step_exc}"
+                )
+                logger.error(error_message)
+                result["status"] = error_message
+                return result
+            if not response.msgs:
+                status_message = (
+                    "Failed to generate summary from model response."
+                )
+                result["status"] = status_message
+                return result
+            summary_content = response.msgs[-1].content.strip()
+            if not summary_content:
+                status_message = "Generated summary is empty."
+                result["status"] = status_message
+                return result
+            # handle structured output if response_format was provided
+            structured_output = None
+            if response_format and response.msgs[-1].parsed:
+                structured_output = response.msgs[-1].parsed
+            # determine filename: use provided filename, or extract from
+            # structured output, or generate timestamp
+            if filename:
+                base_filename = filename
+            elif structured_output and hasattr(
+                structured_output, 'task_title'
+            ):
+                # use task_title from structured output for filename
+                task_title = structured_output.task_title
+                clean_title = ContextUtility.sanitize_workflow_filename(
+                    task_title
+                )
+                base_filename = (
+                    f"{clean_title}_workflow" if clean_title else "workflow"
+                )
+            else:
+                base_filename = f"context_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}"  # noqa: E501
+            base_filename = Path(base_filename).with_suffix("").name
+            metadata = context_util.get_session_metadata()
+            metadata.update(
+                {
+                    "agent_id": self.agent_id,
+                    "message_count": len(messages),
+                }
+            )
+            # convert structured output to custom markdown if present
+            if structured_output:
+                # convert structured output to custom markdown
+                summary_content = context_util.structured_output_to_markdown(
+                    structured_data=structured_output, metadata=metadata
+                )
+            if add_user_messages:
+                summary_content = self._append_user_messages_section(
+                    summary_content, user_messages
+                )
-            try:
-                record = MemoryRecord.from_dict(record_dict)
-                self.memory.write_records([record])
-            except Exception as e:
-                logger.warning(
-                    f"Error converting record to MemoryRecord: {e}. "
-                    f"Record: {record_dict}"
-                )
-        logger.info(f"Memory loaded from {path}")
+            # Save the markdown (either custom structured or default)
+            save_status = context_util.save_markdown_file(
+                base_filename,
+                summary_content,
+                title="Conversation Summary"
+                if not structured_output
+                else None,
+                metadata=metadata if not structured_output else None,
+            )
-    def save_memory(self, path: str) -> None:
-        r"""Retrieves the current conversation data from memory and writes it
-        into a JSON file using JsonStorage.
+            file_path = (
+                context_util.get_working_directory() / f"{base_filename}.md"
+            )
+            summary_content = (
+                f"[CONTEXT_SUMMARY] The following is a summary of our "
+                f"conversation from a previous session: {summary_content}"
+            )
+            # Prepare result dictionary
+            result_dict = {
+                "summary": summary_content,
+                "file_path": str(file_path),
+                "status": save_status,
+                "structured_summary": structured_output,
+            }
-        Args:
-            path (str): Target file path to store JSON data.
-        """
-        json_store = JsonStorage(Path(path))
-        context_records = self.memory.retrieve()
-        to_save = [cr.memory_record.to_dict() for cr in context_records]
-        json_store.save(to_save)
-        logger.info(f"Memory saved to {path}")
+            result.update(result_dict)
+            logger.info("Conversation summary saved to %s", file_path)
+            return result
-    def summarize(
+        except Exception as exc:
+            error_message = f"Failed to summarize conversation context: {exc}"
+            logger.error(error_message)
+            result["status"] = error_message
+            return result
+    async def asummarize(
         self,
         filename: Optional[str] = None,
         summary_prompt: Optional[str] = None,
         response_format: Optional[Type[BaseModel]] = None,
         working_directory: Optional[Union[str, Path]] = None,
+        include_summaries: bool = False,
+        add_user_messages: bool = True,
     ) -> Dict[str, Any]:
-        r"""Summarize the agent's current conversation context and persist it
-        to a markdown file.
+        r"""Asynchronously summarize the agent's current conversation context
+        and persist it to a markdown file.
+        This is the async version of summarize() that uses astep() for
+        non-blocking LLM calls, enabling parallel summarization of multiple
+        agents.
         Args:
             filename (Optional[str]): The base filename (without extension) to
@@ -1091,7 +1593,13 @@ class ChatAgent(BaseAgent):
             working_directory (Optional[str|Path]): Optional directory to save
                 the markdown summary file. If provided, overrides the default
                 directory used by ContextUtility.
+            include_summaries (bool): Whether to include previously generated
+                summaries in the content to be summarized. If False (default),
+                only non-summary messages will be summarized. If True, all
+                messages including previous summaries will be summarized
+                (full compression). (default: :obj:`False`)
+            add_user_messages (bool): Whether add user messages to summary.
+                (default: :obj:`True`)
         Returns:
             Dict[str, Any]: A dictionary containing the summary text, file
                 path, status message, and optionally structured_summary if
@@ -1127,10 +1635,17 @@ class ChatAgent(BaseAgent):
             # Convert messages to conversation text
             conversation_lines = []
+            user_messages: List[str] = []
             for message in messages:
                 role = message.get('role', 'unknown')
                 content = message.get('content', '')
+                # Skip summary messages if include_summaries is False
+                if not include_summaries and isinstance(content, str):
+                    # Check if this is a summary message by looking for marker
+                    if content.startswith('[CONTEXT_SUMMARY]'):
+                        continue
                 # Handle tool call messages (assistant calling tools)
                 tool_calls = message.get('tool_calls')
                 if tool_calls and isinstance(tool_calls, (list, tuple)):
@@ -1182,6 +1697,9 @@ class ChatAgent(BaseAgent):
                 # Handle regular content messages (user/assistant/system)
                 elif content:
+                    content = str(content)
+                    if role == 'user':
+                        user_messages.append(content)
                     conversation_lines.append(f"{role}: {content}")
             conversation_text = "\n".join(conversation_lines).strip()
@@ -1212,20 +1730,25 @@ class ChatAgent(BaseAgent):
                     f"{conversation_text}"
                 )
             else:
-                prompt_text = (
-                    "Summarize the context information in concise markdown "
-                    "bullet points highlighting key decisions, action items.\n"
-                    f"Context information:\n{conversation_text}"
-                )
+                prompt_text = build_default_summary_prompt(conversation_text)
             try:
                 # Use structured output if response_format is provided
                 if response_format:
-                    response = self._context_summary_agent.step(
+                    response = await self._context_summary_agent.astep(
                         prompt_text, response_format=response_format
                     )
                 else:
-                    response = self._context_summary_agent.step(prompt_text)
+                    response = await self._context_summary_agent.astep(
+                        prompt_text
+                    )
+                # Handle streaming response
+                if isinstance(response, AsyncStreamingChatAgentResponse):
+                    # Collect final response
+                    final_response = await response
+                    response = final_response
             except Exception as step_exc:
                 error_message = (
                     f"Failed to generate summary using model: {step_exc}"
@@ -1247,11 +1770,29 @@ class ChatAgent(BaseAgent):
                 result["status"] = status_message
                 return result
-            base_filename = (
-                filename
-                if filename
-                else f"context_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}"  # noqa: E501
-            )
+            # handle structured output if response_format was provided
+            structured_output = None
+            if response_format and response.msgs[-1].parsed:
+                structured_output = response.msgs[-1].parsed
+            # determine filename: use provided filename, or extract from
+            # structured output, or generate timestamp
+            if filename:
+                base_filename = filename
+            elif structured_output and hasattr(
+                structured_output, 'task_title'
+            ):
+                # use task_title from structured output for filename
+                task_title = structured_output.task_title
+                clean_title = ContextUtility.sanitize_workflow_filename(
+                    task_title
+                )
+                base_filename = (
+                    f"{clean_title}_workflow" if clean_title else "workflow"
+                )
+            else:
+                base_filename = f"context_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}"  # noqa: E501
             base_filename = Path(base_filename).with_suffix("").name
             metadata = context_util.get_session_metadata()
@@ -1262,14 +1803,16 @@ class ChatAgent(BaseAgent):
                 }
             )
-            # Handle structured output if response_format was provided
-            structured_output = None
-            if response_format and response.msgs[-1].parsed:
-                structured_output = response.msgs[-1].parsed
-                # Convert structured output to custom markdown
+            # convert structured output to custom markdown if present
+            if structured_output:
+                # convert structured output to custom markdown
                 summary_content = context_util.structured_output_to_markdown(
                     structured_data=structured_output, metadata=metadata
                 )
+            if add_user_messages:
+                summary_content = self._append_user_messages_section(
+                    summary_content, user_messages
+                )
             # Save the markdown (either custom structured or default)
             save_status = context_util.save_markdown_file(
@@ -1285,6 +1828,11 @@ class ChatAgent(BaseAgent):
                 context_util.get_working_directory() / f"{base_filename}.md"
             )
+            summary_content = (
+                f"[CONTEXT_SUMMARY] The following is a summary of our "
+                f"conversation from a previous session: {summary_content}"
+            )
             # Prepare result dictionary
             result_dict = {
                 "summary": summary_content,
@@ -1312,7 +1860,14 @@ class ChatAgent(BaseAgent):
         self.memory.clear()
         if self.system_message is not None:
-            self.update_memory(self.system_message, OpenAIBackendRole.SYSTEM)
+            self.memory.write_record(
+                MemoryRecord(
+                    message=self.system_message,
+                    role_at_backend=OpenAIBackendRole.SYSTEM,
+                    timestamp=time.time_ns() / 1_000_000_000,
+                    agent_id=self.agent_id,
+                )
+            )
     def _generate_system_message_for_output_language(
         self,
@@ -1337,26 +1892,70 @@ class ChatAgent(BaseAgent):
             content = self._original_system_message.content + language_prompt
             return self._original_system_message.create_new_instance(content)
         else:
-            return BaseMessage.make_assistant_message(
-                role_name="Assistant",
-                content=language_prompt,
-            )
+            return BaseMessage.make_system_message(language_prompt)
     def init_messages(self) -> None:
         r"""Initializes the stored messages list with the current system
         message.
         """
-        self.memory.clear()
-        # avoid UserWarning: The `ChatHistoryMemory` is empty.
-        if self.system_message is not None:
-            self.memory.write_record(
-                MemoryRecord(
-                    message=self.system_message,
-                    role_at_backend=OpenAIBackendRole.SYSTEM,
-                    timestamp=time.time_ns() / 1_000_000_000,
-                    agent_id=self.agent_id,
-                )
-            )
+        self._reset_summary_state()
+        self.clear_memory()
+    def update_system_message(
+        self,
+        system_message: Union[BaseMessage, str],
+        reset_memory: bool = True,
+    ) -> None:
+        r"""Update the system message.
+        It will reset conversation with new system message.
+        Args:
+            system_message (Union[BaseMessage, str]): The new system message.
+                Can be either a BaseMessage object or a string.
+                If a string is provided, it will be converted
+                into a BaseMessage object.
+            reset_memory (bool):
+                Whether to reinitialize conversation messages after updating
+                the system message. Defaults to True.
+        """
+        if system_message is None:
+            raise ValueError("system_message is required and cannot be None. ")
+        self._original_system_message = (
+            BaseMessage.make_system_message(system_message)
+            if isinstance(system_message, str)
+            else system_message
+        )
+        self._system_message = (
+            self._generate_system_message_for_output_language()
+        )
+        if reset_memory:
+            self.init_messages()
+    def append_to_system_message(
+        self, content: str, reset_memory: bool = True
+    ) -> None:
+        """Append additional context to existing system message.
+        Args:
+            content (str): The additional system message.
+            reset_memory (bool):
+                Whether to reinitialize conversation messages after appending
+                additional context. Defaults to True.
+        """
+        original_content = (
+            self._original_system_message.content
+            if self._original_system_message
+            else ""
+        )
+        new_system_message = original_content + '\n' + content
+        self._original_system_message = BaseMessage.make_system_message(
+            new_system_message
+        )
+        self._system_message = (
+            self._generate_system_message_for_output_language()
+        )
+        if reset_memory:
+            self.init_messages()
     def reset_to_original_system_message(self) -> None:
         r"""Reset system message to original, removing any appended context.
@@ -1780,22 +2379,122 @@ class ChatAgent(BaseAgent):
             try:
                 openai_messages, num_tokens = self.memory.get_context()
+                if self.summarize_threshold is not None:
+                    threshold = self._calculate_next_summary_threshold()
+                    summary_token_count = self._summary_token_count
+                    token_limit = self.model_backend.token_limit
+                    if num_tokens <= token_limit:
+                        if (
+                            summary_token_count
+                            > token_limit * self.summary_window_ratio
+                        ):
+                            logger.info(
+                                f"Summary tokens ({summary_token_count}) "
+                                f"exceed limit, full compression."
+                            )
+                            # Summarize everything (including summaries)
+                            summary = self.summarize(include_summaries=True)
+                            self._update_memory_with_summary(
+                                summary.get("summary", ""),
+                                include_summaries=True,
+                            )
+                        elif num_tokens > threshold:
+                            logger.info(
+                                f"Token count ({num_tokens}) exceed threshold "
+                                f"({threshold}). Triggering summarization."
+                            )
+                            # Only summarize non-summary content
+                            summary = self.summarize(include_summaries=False)
+                            self._update_memory_with_summary(
+                                summary.get("summary", ""),
+                                include_summaries=False,
+                            )
                 accumulated_context_tokens += num_tokens
             except RuntimeError as e:
                 return self._step_terminate(
                     e.args[1], tool_call_records, "max_tokens_exceeded"
                 )
-            # Get response from model backend
-            response = self._get_model_response(
-                openai_messages,
-                num_tokens=num_tokens,
-                current_iteration=iteration_count,
-                response_format=response_format,
-                tool_schemas=[]
-                if disable_tools
-                else self._get_full_tool_schemas(),
-                prev_num_openai_messages=prev_num_openai_messages,
-            )
+            # Get response from model backend with token limit error handling
+            try:
+                response = self._get_model_response(
+                    openai_messages,
+                    num_tokens=num_tokens,
+                    current_iteration=iteration_count,
+                    response_format=response_format,
+                    tool_schemas=[]
+                    if disable_tools
+                    else self._get_full_tool_schemas(),
+                    prev_num_openai_messages=prev_num_openai_messages,
+                )
+            except Exception as exc:
+                logger.exception("Model error: %s", exc)
+                if self._is_token_limit_error(exc):
+                    tool_signature = self._last_tool_call_signature
+                    if (
+                        tool_signature is not None
+                        and tool_signature
+                        == self._last_token_limit_tool_signature
+                    ):
+                        description = self._describe_tool_call(
+                            self._last_tool_call_record
+                        )
+                        repeated_msg = (
+                            "Context exceeded again by the same tool call."
+                        )
+                        if description:
+                            repeated_msg += f" {description}"
+                        raise RuntimeError(repeated_msg) from exc
+                    user_message_count = sum(
+                        1
+                        for msg in openai_messages
+                        if getattr(msg, "role", None) == "user"
+                    )
+                    if (
+                        user_message_count == 1
+                        and getattr(openai_messages[-1], "role", None)
+                        == "user"
+                    ):
+                        raise RuntimeError(
+                            "The provided user input alone exceeds the "
+                            "context window. Please shorten the input."
+                        ) from exc
+                    logger.warning(
+                        "Token limit exceeded error detected. "
+                        "Summarizing context."
+                    )
+                    recent_records: List[ContextRecord]
+                    try:
+                        recent_records = self.memory.retrieve()
+                    except Exception:  # pragma: no cover - defensive guard
+                        recent_records = []
+                    indices_to_remove = (
+                        self._find_indices_to_remove_for_last_tool_pair(
+                            recent_records
+                        )
+                    )
+                    self.memory.remove_records_by_indices(indices_to_remove)
+                    summary = self.summarize(include_summaries=False)
+                    tool_notice = self._format_tool_limit_notice()
+                    summary_messages = summary.get("summary", "")
+                    if tool_notice:
+                        summary_messages += "\n\n" + tool_notice
+                    self._update_memory_with_summary(
+                        summary_messages, include_summaries=False
+                    )
+                    self._last_token_limit_tool_signature = tool_signature
+                    return self._step_impl(input_message, response_format)
+                raise
             prev_num_openai_messages = len(openai_messages)
             iteration_count += 1
@@ -1990,6 +2689,7 @@ class ChatAgent(BaseAgent):
         step_token_usage = self._create_token_usage_tracker()
         iteration_count: int = 0
         prev_num_openai_messages: int = 0
         while True:
             if self.pause_event is not None and not self.pause_event.is_set():
                 if isinstance(self.pause_event, asyncio.Event):
@@ -2000,21 +2700,128 @@ class ChatAgent(BaseAgent):
                     await loop.run_in_executor(None, self.pause_event.wait)
             try:
                 openai_messages, num_tokens = self.memory.get_context()
+                if self.summarize_threshold is not None:
+                    threshold = self._calculate_next_summary_threshold()
+                    summary_token_count = self._summary_token_count
+                    token_limit = self.model_backend.token_limit
+                    if num_tokens <= token_limit:
+                        if (
+                            summary_token_count
+                            > token_limit * self.summary_window_ratio
+                        ):
+                            logger.info(
+                                f"Summary tokens ({summary_token_count}) "
+                                f"exceed limit, full compression."
+                            )
+                            # Summarize everything (including summaries)
+                            summary = await self.asummarize(
+                                include_summaries=True
+                            )
+                            self._update_memory_with_summary(
+                                summary.get("summary", ""),
+                                include_summaries=True,
+                            )
+                        elif num_tokens > threshold:
+                            logger.info(
+                                f"Token count ({num_tokens}) exceed threshold "
+                                "({threshold}). Triggering summarization."
+                            )
+                            # Only summarize non-summary content
+                            summary = await self.asummarize(
+                                include_summaries=False
+                            )
+                            self._update_memory_with_summary(
+                                summary.get("summary", ""),
+                                include_summaries=False,
+                            )
                 accumulated_context_tokens += num_tokens
             except RuntimeError as e:
                 return self._step_terminate(
                     e.args[1], tool_call_records, "max_tokens_exceeded"
                 )
-            response = await self._aget_model_response(
-                openai_messages,
-                num_tokens=num_tokens,
-                current_iteration=iteration_count,
-                response_format=response_format,
-                tool_schemas=[]
-                if disable_tools
-                else self._get_full_tool_schemas(),
-                prev_num_openai_messages=prev_num_openai_messages,
-            )
+            # Get response from model backend with token limit error handling
+            try:
+                response = await self._aget_model_response(
+                    openai_messages,
+                    num_tokens=num_tokens,
+                    current_iteration=iteration_count,
+                    response_format=response_format,
+                    tool_schemas=[]
+                    if disable_tools
+                    else self._get_full_tool_schemas(),
+                    prev_num_openai_messages=prev_num_openai_messages,
+                )
+            except Exception as exc:
+                logger.exception("Model error: %s", exc)
+                if self._is_token_limit_error(exc):
+                    tool_signature = self._last_tool_call_signature
+                    if (
+                        tool_signature is not None
+                        and tool_signature
+                        == self._last_token_limit_tool_signature
+                    ):
+                        description = self._describe_tool_call(
+                            self._last_tool_call_record
+                        )
+                        repeated_msg = (
+                            "Context exceeded again by the same tool call."
+                        )
+                        if description:
+                            repeated_msg += f" {description}"
+                        raise RuntimeError(repeated_msg) from exc
+                    user_message_count = sum(
+                        1
+                        for msg in openai_messages
+                        if getattr(msg, "role", None) == "user"
+                    )
+                    if (
+                        user_message_count == 1
+                        and getattr(openai_messages[-1], "role", None)
+                        == "user"
+                    ):
+                        raise RuntimeError(
+                            "The provided user input alone exceeds the"
+                            "context window. Please shorten the input."
+                        ) from exc
+                    logger.warning(
+                        "Token limit exceeded error detected. "
+                        "Summarizing context."
+                    )
+                    recent_records: List[ContextRecord]
+                    try:
+                        recent_records = self.memory.retrieve()
+                    except Exception:  # pragma: no cover - defensive guard
+                        recent_records = []
+                    indices_to_remove = (
+                        self._find_indices_to_remove_for_last_tool_pair(
+                            recent_records
+                        )
+                    )
+                    self.memory.remove_records_by_indices(indices_to_remove)
+                    summary = await self.asummarize()
+                    tool_notice = self._format_tool_limit_notice()
+                    summary_messages = summary.get("summary", "")
+                    if tool_notice:
+                        summary_messages += "\n\n" + tool_notice
+                    self._update_memory_with_summary(
+                        summary_messages, include_summaries=False
+                    )
+                    self._last_token_limit_tool_signature = tool_signature
+                    return await self._astep_non_streaming_task(
+                        input_message, response_format
+                    )
+                raise
             prev_num_openai_messages = len(openai_messages)
             iteration_count += 1
@@ -2091,6 +2898,8 @@ class ChatAgent(BaseAgent):
         if self.prune_tool_calls_from_memory and tool_call_records:
             self.memory.clean_tool_calls()
+        self._last_token_limit_user_signature = None
         return self._convert_to_chatagent_response(
             response,
             tool_call_records,
@@ -2187,6 +2996,8 @@ class ChatAgent(BaseAgent):
                 if response:
                     break
             except RateLimitError as e:
+                if self._is_token_limit_error(e):
+                    raise
                 last_error = e
                 if attempt < self.retry_attempts - 1:
                     delay = min(self.retry_delay * (2**attempt), 60.0)
@@ -2204,7 +3015,6 @@ class ChatAgent(BaseAgent):
             except Exception:
                 logger.error(
                     f"Model error: {self.model_backend.model_type}",
-                    exc_info=True,
                 )
                 raise
         else:
@@ -2251,6 +3061,8 @@ class ChatAgent(BaseAgent):
                 if response:
                     break
             except RateLimitError as e:
+                if self._is_token_limit_error(e):
+                    raise
                 last_error = e
                 if attempt < self.retry_attempts - 1:
                     delay = min(self.retry_delay * (2**attempt), 60.0)
@@ -2758,6 +3570,7 @@ class ChatAgent(BaseAgent):
             tool_call_id=tool_call_id,
         )
+        self._update_last_tool_call_state(tool_record)
         return tool_record
     def _stream(
@@ -3319,12 +4132,14 @@ class ChatAgent(BaseAgent):
                         timestamp=base_timestamp + 1e-6,
                     )
-                    return ToolCallingRecord(
+                    tool_record = ToolCallingRecord(
                         tool_name=function_name,
                         args=args,
                         result=result,
                         tool_call_id=tool_call_id,
                     )
+                    self._update_last_tool_call_state(tool_record)
+                    return tool_record
                 except Exception as e:
                     error_msg = (
@@ -3346,12 +4161,14 @@ class ChatAgent(BaseAgent):
                     self.update_memory(func_msg, OpenAIBackendRole.FUNCTION)
-                    return ToolCallingRecord(
+                    tool_record = ToolCallingRecord(
                         tool_name=function_name,
                         args=args,
                         result=result,
                         tool_call_id=tool_call_id,
                     )
+                    self._update_last_tool_call_state(tool_record)
+                    return tool_record
             else:
                 logger.warning(
                     f"Tool '{function_name}' not found in internal tools"
@@ -3373,6 +4190,23 @@ class ChatAgent(BaseAgent):
             tool_call_id = tool_call_data['id']
             if function_name in self._internal_tools:
+                # Create the tool call message
+                assist_msg = FunctionCallingMessage(
+                    role_name=self.role_name,
+                    role_type=self.role_type,
+                    meta_dict=None,
+                    content="",
+                    func_name=function_name,
+                    args=args,
+                    tool_call_id=tool_call_id,
+                )
+                assist_ts = time.time_ns() / 1_000_000_000
+                self.update_memory(
+                    assist_msg,
+                    OpenAIBackendRole.ASSISTANT,
+                    timestamp=assist_ts,
+                )
                 tool = self._internal_tools[function_name]
                 try:
                     # Try different invocation paths in order of preference
@@ -3401,19 +4235,8 @@ class ChatAgent(BaseAgent):
                     else:
                         # Fallback: synchronous call
                         result = tool(**args)
-                    # First, create and record the assistant message with tool
-                    # call
-                    assist_msg = FunctionCallingMessage(
-                        role_name=self.role_name,
-                        role_type=self.role_type,
-                        meta_dict=None,
-                        content="",
-                        func_name=function_name,
-                        args=args,
-                        tool_call_id=tool_call_id,
-                    )
-                    # Then create the tool response message
+                    # Create the tool response message
                     func_msg = FunctionCallingMessage(
                         role_name=self.role_name,
                         role_type=self.role_type,
@@ -3423,31 +4246,21 @@ class ChatAgent(BaseAgent):
                         result=result,
                         tool_call_id=tool_call_id,
                     )
-                    # Record both messages with precise timestamps to ensure
-                    # correct ordering
-                    current_time_ns = time.time_ns()
-                    base_timestamp = (
-                        current_time_ns / 1_000_000_000
-                    )  # Convert to seconds
-                    self.update_memory(
-                        assist_msg,
-                        OpenAIBackendRole.ASSISTANT,
-                        timestamp=base_timestamp,
-                    )
+                    func_ts = time.time_ns() / 1_000_000_000
                     self.update_memory(
                         func_msg,
                         OpenAIBackendRole.FUNCTION,
-                        timestamp=base_timestamp + 1e-6,
+                        timestamp=func_ts,
                     )
-                    return ToolCallingRecord(
+                    tool_record = ToolCallingRecord(
                         tool_name=function_name,
                         args=args,
                         result=result,
                         tool_call_id=tool_call_id,
                     )
+                    self._update_last_tool_call_state(tool_record)
+                    return tool_record
                 except Exception as e:
                     error_msg = (
@@ -3466,15 +4279,21 @@ class ChatAgent(BaseAgent):
                         result=result,
                         tool_call_id=tool_call_id,
                     )
+                    func_ts = time.time_ns() / 1_000_000_000
+                    self.update_memory(
+                        func_msg,
+                        OpenAIBackendRole.FUNCTION,
+                        timestamp=func_ts,
+                    )
-                    self.update_memory(func_msg, OpenAIBackendRole.FUNCTION)
-                    return ToolCallingRecord(
+                    tool_record = ToolCallingRecord(
                         tool_name=function_name,
                         args=args,
                         result=result,
                         tool_call_id=tool_call_id,
                     )
+                    self._update_last_tool_call_state(tool_record)
+                    return tool_record
             else:
                 logger.warning(
                     f"Tool '{function_name}' not found in internal tools"
@@ -4176,23 +4995,29 @@ class ChatAgent(BaseAgent):
                         # Toolkit doesn't support cloning, use original
                         cloned_toolkits[toolkit_id] = toolkit_instance
-                if getattr(
-                    tool.func, "__message_integration_enhanced__", False
-                ):
-                    cloned_tools.append(
-                        FunctionTool(
-                            func=tool.func,
-                            openai_tool_schema=tool.get_openai_tool_schema(),
-                        )
-                    )
-                    continue
                 # Get the method from the cloned (or original) toolkit
                 toolkit = cloned_toolkits[toolkit_id]
                 method_name = tool.func.__name__
+                # Check if toolkit was actually cloned or just reused
+                toolkit_was_cloned = toolkit is not toolkit_instance
                 if hasattr(toolkit, method_name):
                     new_method = getattr(toolkit, method_name)
+                    # If toolkit wasn't cloned (stateless), preserve the
+                    # original function to maintain any enhancements/wrappers
+                    if not toolkit_was_cloned:
+                        # Toolkit is stateless, safe to reuse original function
+                        cloned_tools.append(
+                            FunctionTool(
+                                func=tool.func,
+                                openai_tool_schema=tool.get_openai_tool_schema(),
+                            )
+                        )
+                        continue
+                    # Toolkit was cloned, use the new method
                     # Wrap cloned method into a new FunctionTool,
                     # preserving schema
                     try:

camel-ai 0.2.78__py3-none-any.whl → 0.2.79a1__py3-none-any.whl

Potentially problematic release.

camel-ai 0.2.78py3-none-any.whl → 0.2.79a1py3-none-any.whl