camel-ai 0.2.79a0__py3-none-any.whl → 0.2.79a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

@@ -20,7 +20,6 @@ import concurrent.futures
20
20
  import hashlib
21
21
  import inspect
22
22
  import json
23
- import math
24
23
  import os
25
24
  import random
26
25
  import re
@@ -57,6 +56,7 @@ from pydantic import BaseModel, ValidationError
57
56
 
58
57
  from camel.agents._types import ModelResponse, ToolCallRequest
59
58
  from camel.agents._utils import (
59
+ build_default_summary_prompt,
60
60
  convert_to_function_tool,
61
61
  convert_to_schema,
62
62
  get_info_dict,
@@ -68,6 +68,7 @@ from camel.logger import get_logger
68
68
  from camel.memories import (
69
69
  AgentMemory,
70
70
  ChatHistoryMemory,
71
+ ContextRecord,
71
72
  MemoryRecord,
72
73
  ScoreBasedContextCreator,
73
74
  )
@@ -102,6 +103,16 @@ from camel.utils import (
102
103
  from camel.utils.commons import dependencies_required
103
104
  from camel.utils.context_utils import ContextUtility
104
105
 
106
+ TOKEN_LIMIT_ERROR_MARKERS = (
107
+ "context_length_exceeded",
108
+ "prompt is too long",
109
+ "exceeded your current quota",
110
+ "tokens must be reduced",
111
+ "context length",
112
+ "token count",
113
+ "context limit",
114
+ )
115
+
105
116
  if TYPE_CHECKING:
106
117
  from camel.terminators import ResponseTerminator
107
118
 
@@ -354,9 +365,9 @@ class ChatAgent(BaseAgent):
354
365
  message_window_size (int, optional): The maximum number of previous
355
366
  messages to include in the context window. If `None`, no windowing
356
367
  is performed. (default: :obj:`None`)
357
- token_limit (int, optional): The maximum number of tokens in a context.
358
- The context will be automatically pruned to fulfill the limitation.
359
- If `None`, it will be set according to the backend model.
368
+ summarize_threshold (int, optional): The percentage of the context
369
+ window that triggers summarization. If `None`, will trigger
370
+ summarization when the context window is full.
360
371
  (default: :obj:`None`)
361
372
  output_language (str, optional): The language to be output by the
362
373
  agent. (default: :obj:`None`)
@@ -414,6 +425,10 @@ class ChatAgent(BaseAgent):
414
425
  updates return accumulated content (current behavior). When False,
415
426
  partial updates return only the incremental delta. (default:
416
427
  :obj:`True`)
428
+ summary_window_ratio (float, optional): Maximum fraction of the total
429
+ context window that can be occupied by summary information. Used
430
+ to limit how much of the model's context is reserved for
431
+ summarization results. (default: :obj:`0.6`)
417
432
  """
418
433
 
419
434
  def __init__(
@@ -436,6 +451,7 @@ class ChatAgent(BaseAgent):
436
451
  ] = None,
437
452
  memory: Optional[AgentMemory] = None,
438
453
  message_window_size: Optional[int] = None,
454
+ summarize_threshold: Optional[int] = 50,
439
455
  token_limit: Optional[int] = None,
440
456
  output_language: Optional[str] = None,
441
457
  tools: Optional[List[Union[FunctionTool, Callable]]] = None,
@@ -458,6 +474,7 @@ class ChatAgent(BaseAgent):
458
474
  retry_delay: float = 1.0,
459
475
  step_timeout: Optional[float] = None,
460
476
  stream_accumulate: bool = True,
477
+ summary_window_ratio: float = 0.6,
461
478
  ) -> None:
462
479
  if isinstance(model, ModelManager):
463
480
  self.model_backend = model
@@ -476,7 +493,7 @@ class ChatAgent(BaseAgent):
476
493
  # Set up memory
477
494
  context_creator = ScoreBasedContextCreator(
478
495
  self.model_backend.token_counter,
479
- token_limit or self.model_backend.token_limit,
496
+ self.model_backend.token_limit,
480
497
  )
481
498
 
482
499
  self._memory: AgentMemory = memory or ChatHistoryMemory(
@@ -491,9 +508,7 @@ class ChatAgent(BaseAgent):
491
508
 
492
509
  # Set up system message and initialize messages
493
510
  self._original_system_message = (
494
- BaseMessage.make_assistant_message(
495
- role_name="Assistant", content=system_message
496
- )
511
+ BaseMessage.make_system_message(system_message)
497
512
  if isinstance(system_message, str)
498
513
  else system_message
499
514
  )
@@ -503,6 +518,21 @@ class ChatAgent(BaseAgent):
503
518
  )
504
519
  self.init_messages()
505
520
 
521
+ # Set up summarize threshold with validation
522
+ if summarize_threshold is not None:
523
+ if not (0 < summarize_threshold <= 100):
524
+ raise ValueError(
525
+ f"summarize_threshold must be between 0 and 100, "
526
+ f"got {summarize_threshold}"
527
+ )
528
+ logger.info(
529
+ f"Automatic context compression is enabled. Will trigger "
530
+ f"summarization when context window exceeds "
531
+ f"{summarize_threshold}% of the total token limit."
532
+ )
533
+ self.summarize_threshold = summarize_threshold
534
+ self._reset_summary_state()
535
+
506
536
  # Set up role name and role type
507
537
  self.role_name: str = (
508
538
  getattr(self.system_message, "role_name", None) or "assistant"
@@ -550,11 +580,16 @@ class ChatAgent(BaseAgent):
550
580
  self._context_utility: Optional[ContextUtility] = None
551
581
  self._context_summary_agent: Optional["ChatAgent"] = None
552
582
  self.stream_accumulate = stream_accumulate
583
+ self._last_tool_call_record: Optional[ToolCallingRecord] = None
584
+ self._last_tool_call_signature: Optional[str] = None
585
+ self._last_token_limit_tool_signature: Optional[str] = None
586
+ self.summary_window_ratio = summary_window_ratio
553
587
 
554
588
  def reset(self):
555
589
  r"""Resets the :obj:`ChatAgent` to its initial state."""
556
590
  self.terminated = False
557
591
  self.init_messages()
592
+ self._reset_summary_state()
558
593
  for terminator in self.response_terminators:
559
594
  terminator.reset()
560
595
 
@@ -761,6 +796,329 @@ class ChatAgent(BaseAgent):
761
796
  for func_tool in self._internal_tools.values()
762
797
  ]
763
798
 
799
+ @staticmethod
800
+ def _is_token_limit_error(error: Exception) -> bool:
801
+ r"""Return True when the exception message indicates a token limit."""
802
+ error_message = str(error).lower()
803
+ return any(
804
+ marker in error_message for marker in TOKEN_LIMIT_ERROR_MARKERS
805
+ )
806
+
807
+ @staticmethod
808
+ def _is_tool_related_record(record: MemoryRecord) -> bool:
809
+ r"""Determine whether the given memory record
810
+ belongs to a tool call."""
811
+ if record.role_at_backend in {
812
+ OpenAIBackendRole.TOOL,
813
+ OpenAIBackendRole.FUNCTION,
814
+ }:
815
+ return True
816
+
817
+ if (
818
+ record.role_at_backend == OpenAIBackendRole.ASSISTANT
819
+ and isinstance(record.message, FunctionCallingMessage)
820
+ ):
821
+ return True
822
+
823
+ return False
824
+
825
+ def _find_indices_to_remove_for_last_tool_pair(
826
+ self, recent_records: List[ContextRecord]
827
+ ) -> List[int]:
828
+ """Find indices of records that should be removed to clean up the most
829
+ recent incomplete tool interaction pair.
830
+
831
+ This method identifies tool call/result pairs by tool_call_id and
832
+ returns the exact indices to remove, allowing non-contiguous deletions.
833
+
834
+ Logic:
835
+ - If the last record is a tool result (TOOL/FUNCTION) with a
836
+ tool_call_id, find the matching assistant call anywhere in history
837
+ and return both indices.
838
+ - If the last record is an assistant tool call without a result yet,
839
+ return just that index.
840
+ - For normal messages (non tool-related): remove just the last one.
841
+ - Fallback: If no tool_call_id is available, use heuristic (last 2 if
842
+ tool-related, otherwise last 1).
843
+
844
+ Returns:
845
+ List[int]: Indices to remove (may be non-contiguous).
846
+ """
847
+ if not recent_records:
848
+ return []
849
+
850
+ last_idx = len(recent_records) - 1
851
+ last_record = recent_records[last_idx].memory_record
852
+
853
+ # Case A: Last is an ASSISTANT tool call with no result yet
854
+ if (
855
+ last_record.role_at_backend == OpenAIBackendRole.ASSISTANT
856
+ and isinstance(last_record.message, FunctionCallingMessage)
857
+ and last_record.message.result is None
858
+ ):
859
+ return [last_idx]
860
+
861
+ # Case B: Last is TOOL/FUNCTION result, try id-based pairing
862
+ if last_record.role_at_backend in {
863
+ OpenAIBackendRole.TOOL,
864
+ OpenAIBackendRole.FUNCTION,
865
+ }:
866
+ tool_id = None
867
+ if isinstance(last_record.message, FunctionCallingMessage):
868
+ tool_id = last_record.message.tool_call_id
869
+
870
+ if tool_id:
871
+ for idx in range(len(recent_records) - 2, -1, -1):
872
+ rec = recent_records[idx].memory_record
873
+ if rec.role_at_backend != OpenAIBackendRole.ASSISTANT:
874
+ continue
875
+
876
+ # Check if this assistant message contains the tool_call_id
877
+ matched = False
878
+
879
+ # Case 1: FunctionCallingMessage (single tool call)
880
+ if isinstance(rec.message, FunctionCallingMessage):
881
+ if rec.message.tool_call_id == tool_id:
882
+ matched = True
883
+
884
+ # Case 2: BaseMessage with multiple tool_calls in meta_dict
885
+ elif (
886
+ hasattr(rec.message, "meta_dict")
887
+ and rec.message.meta_dict
888
+ ):
889
+ tool_calls_list = rec.message.meta_dict.get(
890
+ "tool_calls", []
891
+ )
892
+ if isinstance(tool_calls_list, list):
893
+ for tc in tool_calls_list:
894
+ if (
895
+ isinstance(tc, dict)
896
+ and tc.get("id") == tool_id
897
+ ):
898
+ matched = True
899
+ break
900
+
901
+ if matched:
902
+ # Return both assistant call and tool result indices
903
+ return [idx, last_idx]
904
+
905
+ # Fallback: no tool_call_id, use heuristic
906
+ if self._is_tool_related_record(last_record):
907
+ # Remove last 2 (assume they are paired)
908
+ return [last_idx - 1, last_idx] if last_idx > 0 else [last_idx]
909
+ else:
910
+ return [last_idx]
911
+
912
+ # Default: non tool-related tail => remove last one
913
+ return [last_idx]
914
+
915
+ @staticmethod
916
+ def _serialize_tool_args(args: Dict[str, Any]) -> str:
917
+ try:
918
+ return json.dumps(args, ensure_ascii=False, sort_keys=True)
919
+ except TypeError:
920
+ return str(args)
921
+
922
+ @classmethod
923
+ def _build_tool_signature(
924
+ cls, func_name: str, args: Dict[str, Any]
925
+ ) -> str:
926
+ args_repr = cls._serialize_tool_args(args)
927
+ return f"{func_name}:{args_repr}"
928
+
929
+ def _describe_tool_call(
930
+ self, record: Optional[ToolCallingRecord]
931
+ ) -> Optional[str]:
932
+ if record is None:
933
+ return None
934
+ args_repr = self._serialize_tool_args(record.args)
935
+ return f"Tool `{record.tool_name}` invoked with arguments {args_repr}."
936
+
937
+ def _update_last_tool_call_state(
938
+ self, record: Optional[ToolCallingRecord]
939
+ ) -> None:
940
+ """Track the most recent tool call and its identifying signature."""
941
+ self._last_tool_call_record = record
942
+ if record is None:
943
+ self._last_tool_call_signature = None
944
+ return
945
+
946
+ args = (
947
+ record.args
948
+ if isinstance(record.args, dict)
949
+ else {"_raw": record.args}
950
+ )
951
+ try:
952
+ signature = self._build_tool_signature(record.tool_name, args)
953
+ except Exception: # pragma: no cover - defensive guard
954
+ signature = None
955
+ self._last_tool_call_signature = signature
956
+
957
+ def _format_tool_limit_notice(self) -> Optional[str]:
958
+ record = self._last_tool_call_record
959
+ description = self._describe_tool_call(record)
960
+ if description is None:
961
+ return None
962
+ notice_lines = [
963
+ "[Tool Call Causing Token Limit]",
964
+ description,
965
+ ]
966
+
967
+ if record is not None:
968
+ result = record.result
969
+ if isinstance(result, bytes):
970
+ result_repr = result.decode(errors="replace")
971
+ elif isinstance(result, str):
972
+ result_repr = result
973
+ else:
974
+ try:
975
+ result_repr = json.dumps(
976
+ result, ensure_ascii=False, sort_keys=True
977
+ )
978
+ except (TypeError, ValueError):
979
+ result_repr = str(result)
980
+
981
+ result_length = len(result_repr)
982
+ notice_lines.append(f"Tool result length: {result_length}")
983
+ if self.model_backend.token_limit != 999999999:
984
+ notice_lines.append(
985
+ f"Token limit: {self.model_backend.token_limit}"
986
+ )
987
+
988
+ return "\n".join(notice_lines)
989
+
990
+ @staticmethod
991
+ def _append_user_messages_section(
992
+ summary_content: str, user_messages: List[str]
993
+ ) -> str:
994
+ section_title = "- **All User Messages**:"
995
+ sanitized_messages: List[str] = []
996
+ for msg in user_messages:
997
+ if not isinstance(msg, str):
998
+ msg = str(msg)
999
+ cleaned = " ".join(msg.strip().splitlines())
1000
+ if cleaned:
1001
+ sanitized_messages.append(cleaned)
1002
+
1003
+ bullet_block = (
1004
+ "\n".join(f"- {m}" for m in sanitized_messages)
1005
+ if sanitized_messages
1006
+ else "- None noted"
1007
+ )
1008
+ user_section = f"{section_title}\n{bullet_block}"
1009
+
1010
+ summary_clean = summary_content.rstrip()
1011
+ separator = "\n\n" if summary_clean else ""
1012
+ return f"{summary_clean}{separator}{user_section}"
1013
+
1014
+ def _reset_summary_state(self) -> None:
1015
+ self._summary_token_count = 0 # Total tokens in summary messages
1016
+
1017
+ def _calculate_next_summary_threshold(self) -> int:
1018
+ r"""Calculate the next token threshold that should trigger
1019
+ summarization.
1020
+
1021
+ The threshold calculation follows a progressive strategy:
1022
+ - First time: token_limit * (summarize_threshold / 100)
1023
+ - Subsequent times: (limit - summary_token) / 2 + summary_token
1024
+
1025
+ This ensures that as summaries accumulate, the threshold adapts
1026
+ to maintain a reasonable balance between context and summaries.
1027
+
1028
+ Returns:
1029
+ int: The token count threshold for next summarization.
1030
+ """
1031
+ token_limit = self.model_backend.token_limit
1032
+ summary_token_count = self._summary_token_count
1033
+
1034
+ # First summarization: use the percentage threshold
1035
+ if summary_token_count == 0:
1036
+ threshold = int(token_limit * self.summarize_threshold / 100)
1037
+ else:
1038
+ # Subsequent summarizations: adaptive threshold
1039
+ threshold = int(
1040
+ (token_limit - summary_token_count)
1041
+ * self.summarize_threshold
1042
+ / 100
1043
+ + summary_token_count
1044
+ )
1045
+
1046
+ return threshold
1047
+
1048
+ def _update_memory_with_summary(
1049
+ self, summary: str, include_summaries: bool = False
1050
+ ) -> None:
1051
+ r"""Update memory with summary result.
1052
+
1053
+ This method handles memory clearing and restoration of summaries based
1054
+ on whether it's a progressive or full compression.
1055
+ """
1056
+
1057
+ summary_content: str = summary
1058
+
1059
+ existing_summaries = []
1060
+ if not include_summaries:
1061
+ messages, _ = self.memory.get_context()
1062
+ for msg in messages:
1063
+ content = msg.get('content', '')
1064
+ if isinstance(content, str) and content.startswith(
1065
+ '[CONTEXT_SUMMARY]'
1066
+ ):
1067
+ existing_summaries.append(msg)
1068
+
1069
+ # Clear memory
1070
+ self.clear_memory()
1071
+
1072
+ # Restore old summaries (for progressive compression)
1073
+ for old_summary in existing_summaries:
1074
+ content = old_summary.get('content', '')
1075
+ if not isinstance(content, str):
1076
+ content = str(content)
1077
+ summary_msg = BaseMessage.make_assistant_message(
1078
+ role_name="assistant", content=content
1079
+ )
1080
+ self.update_memory(summary_msg, OpenAIBackendRole.ASSISTANT)
1081
+
1082
+ # Add new summary
1083
+ new_summary_msg = BaseMessage.make_assistant_message(
1084
+ role_name="assistant", content=summary_content
1085
+ )
1086
+ self.update_memory(new_summary_msg, OpenAIBackendRole.ASSISTANT)
1087
+ input_message = BaseMessage.make_assistant_message(
1088
+ role_name="assistant",
1089
+ content=(
1090
+ "Please continue the conversation from "
1091
+ "where we left it off without asking the user any further "
1092
+ "questions. Continue with the last task that you were "
1093
+ "asked to work on."
1094
+ ),
1095
+ )
1096
+ self.update_memory(input_message, OpenAIBackendRole.ASSISTANT)
1097
+ # Update token count
1098
+ try:
1099
+ summary_tokens = (
1100
+ self.model_backend.token_counter.count_tokens_from_messages(
1101
+ [{"role": "assistant", "content": summary_content}]
1102
+ )
1103
+ )
1104
+
1105
+ if include_summaries: # Full compression - reset count
1106
+ self._summary_token_count = summary_tokens
1107
+ logger.info(
1108
+ f"Full compression: Summary with {summary_tokens} tokens. "
1109
+ f"Total summary tokens reset to: {summary_tokens}"
1110
+ )
1111
+ else: # Progressive compression - accumulate
1112
+ self._summary_token_count += summary_tokens
1113
+ logger.info(
1114
+ f"Progressive compression: New summary "
1115
+ f"with {summary_tokens} tokens. "
1116
+ f"Total summary tokens: "
1117
+ f"{self._summary_token_count}"
1118
+ )
1119
+ except Exception as e:
1120
+ logger.warning(f"Failed to count summary tokens: {e}")
1121
+
764
1122
  def _get_external_tool_names(self) -> Set[str]:
765
1123
  r"""Returns a set of external tool names."""
766
1124
  return set(self._external_tool_schemas.keys())
@@ -822,16 +1180,6 @@ class ChatAgent(BaseAgent):
822
1180
  ) -> None:
823
1181
  r"""Updates the agent memory with a new message.
824
1182
 
825
- If the single *message* exceeds the model's context window, it will
826
- be **automatically split into multiple smaller chunks** before being
827
- written into memory. This prevents later failures in
828
- `ScoreBasedContextCreator` where an over-sized message cannot fit
829
- into the available token budget at all.
830
-
831
- This slicing logic handles both regular text messages (in the
832
- `content` field) and long tool call results (in the `result` field of
833
- a `FunctionCallingMessage`).
834
-
835
1183
  Args:
836
1184
  message (BaseMessage): The new message to add to the stored
837
1185
  messages.
@@ -841,151 +1189,15 @@ class ChatAgent(BaseAgent):
841
1189
  (default: :obj:`None`)
842
1190
  (default: obj:`None`)
843
1191
  """
844
-
845
- # 1. Helper to write a record to memory
846
- def _write_single_record(
847
- message: BaseMessage, role: OpenAIBackendRole, timestamp: float
848
- ):
849
- self.memory.write_record(
850
- MemoryRecord(
851
- message=message,
852
- role_at_backend=role,
853
- timestamp=timestamp,
854
- agent_id=self.agent_id,
855
- )
856
- )
857
-
858
- base_ts = (
859
- timestamp
1192
+ record = MemoryRecord(
1193
+ message=message,
1194
+ role_at_backend=role,
1195
+ timestamp=timestamp
860
1196
  if timestamp is not None
861
- else time.time_ns() / 1_000_000_000
862
- )
863
-
864
- # 2. Get token handling utilities, fallback if unavailable
865
- try:
866
- context_creator = self.memory.get_context_creator()
867
- token_counter = context_creator.token_counter
868
- token_limit = context_creator.token_limit
869
- except AttributeError:
870
- _write_single_record(message, role, base_ts)
871
- return
872
-
873
- # 3. Check if slicing is necessary
874
- try:
875
- current_tokens = token_counter.count_tokens_from_messages(
876
- [message.to_openai_message(role)]
877
- )
878
-
879
- _, ctx_tokens = self.memory.get_context()
880
-
881
- remaining_budget = max(0, token_limit - ctx_tokens)
882
-
883
- if current_tokens <= remaining_budget:
884
- _write_single_record(message, role, base_ts)
885
- return
886
- except Exception as e:
887
- logger.warning(
888
- f"Token calculation failed before chunking, "
889
- f"writing message as-is. Error: {e}"
890
- )
891
- _write_single_record(message, role, base_ts)
892
- return
893
-
894
- # 4. Perform slicing
895
- logger.warning(
896
- f"Message with {current_tokens} tokens exceeds remaining budget "
897
- f"of {remaining_budget}. Slicing into smaller chunks."
1197
+ else time.time_ns() / 1_000_000_000, # Nanosecond precision
1198
+ agent_id=self.agent_id,
898
1199
  )
899
-
900
- text_to_chunk: Optional[str] = None
901
- is_function_result = False
902
-
903
- if isinstance(message, FunctionCallingMessage) and isinstance(
904
- message.result, str
905
- ):
906
- text_to_chunk = message.result
907
- is_function_result = True
908
- elif isinstance(message.content, str):
909
- text_to_chunk = message.content
910
-
911
- if not text_to_chunk or not text_to_chunk.strip():
912
- _write_single_record(message, role, base_ts)
913
- return
914
- # Encode the entire text to get a list of all token IDs
915
- try:
916
- all_token_ids = token_counter.encode(text_to_chunk)
917
- except Exception as e:
918
- logger.error(f"Failed to encode text for chunking: {e}")
919
- _write_single_record(message, role, base_ts) # Fallback
920
- return
921
-
922
- if not all_token_ids:
923
- _write_single_record(message, role, base_ts) # Nothing to chunk
924
- return
925
-
926
- # 1. Base chunk size: one-tenth of the smaller of (a) total token
927
- # limit and (b) current remaining budget. This prevents us from
928
- # creating chunks that are guaranteed to overflow the
929
- # immediate context window.
930
- base_chunk_size = max(1, remaining_budget) // 10
931
-
932
- # 2. Each chunk gets a textual prefix such as:
933
- # "[chunk 3/12 of a long message]\n"
934
- # The prefix itself consumes tokens, so if we do not subtract its
935
- # length the *total* tokens of the outgoing message (prefix + body)
936
- # can exceed the intended bound. We estimate the prefix length
937
- # with a representative example that is safely long enough for the
938
- # vast majority of cases (three-digit indices).
939
- sample_prefix = "[chunk 1/1000 of a long message]\n"
940
- prefix_token_len = len(token_counter.encode(sample_prefix))
941
-
942
- # 3. The real capacity for the message body is therefore the base
943
- # chunk size minus the prefix length. Fallback to at least one
944
- # token to avoid zero or negative sizes.
945
- chunk_body_limit = max(1, base_chunk_size - prefix_token_len)
946
-
947
- # 4. Calculate how many chunks we will need with this body size.
948
- num_chunks = math.ceil(len(all_token_ids) / chunk_body_limit)
949
- group_id = str(uuid.uuid4())
950
-
951
- for i in range(num_chunks):
952
- start_idx = i * chunk_body_limit
953
- end_idx = start_idx + chunk_body_limit
954
- chunk_token_ids = all_token_ids[start_idx:end_idx]
955
-
956
- chunk_body = token_counter.decode(chunk_token_ids)
957
-
958
- prefix = f"[chunk {i + 1}/{num_chunks} of a long message]\n"
959
- new_body = prefix + chunk_body
960
-
961
- if is_function_result and isinstance(
962
- message, FunctionCallingMessage
963
- ):
964
- new_msg: BaseMessage = FunctionCallingMessage(
965
- role_name=message.role_name,
966
- role_type=message.role_type,
967
- meta_dict=message.meta_dict,
968
- content=message.content,
969
- func_name=message.func_name,
970
- args=message.args,
971
- result=new_body,
972
- tool_call_id=message.tool_call_id,
973
- )
974
- else:
975
- new_msg = message.create_new_instance(new_body)
976
-
977
- meta = (new_msg.meta_dict or {}).copy()
978
- meta.update(
979
- {
980
- "chunk_idx": i + 1,
981
- "chunk_total": num_chunks,
982
- "chunk_group_id": group_id,
983
- }
984
- )
985
- new_msg.meta_dict = meta
986
-
987
- # Increment timestamp slightly to maintain order
988
- _write_single_record(new_msg, role, base_ts + i * 1e-6)
1200
+ self.memory.write_record(record)
989
1201
 
990
1202
  def load_memory(self, memory: AgentMemory) -> None:
991
1203
  r"""Load the provided memory into the agent.
@@ -1070,6 +1282,8 @@ class ChatAgent(BaseAgent):
1070
1282
  summary_prompt: Optional[str] = None,
1071
1283
  response_format: Optional[Type[BaseModel]] = None,
1072
1284
  working_directory: Optional[Union[str, Path]] = None,
1285
+ include_summaries: bool = False,
1286
+ add_user_messages: bool = True,
1073
1287
  ) -> Dict[str, Any]:
1074
1288
  r"""Summarize the agent's current conversation context and persist it
1075
1289
  to a markdown file.
@@ -1089,10 +1303,16 @@ class ChatAgent(BaseAgent):
1089
1303
  defining the expected structure of the response. If provided,
1090
1304
  the summary will be generated as structured output and included
1091
1305
  in the result.
1306
+ include_summaries (bool): Whether to include previously generated
1307
+ summaries in the content to be summarized. If False (default),
1308
+ only non-summary messages will be summarized. If True, all
1309
+ messages including previous summaries will be summarized
1310
+ (full compression). (default: :obj:`False`)
1092
1311
  working_directory (Optional[str|Path]): Optional directory to save
1093
1312
  the markdown summary file. If provided, overrides the default
1094
1313
  directory used by ContextUtility.
1095
-
1314
+ add_user_messages (bool): Whether add user messages to summary.
1315
+ (default: :obj:`True`)
1096
1316
  Returns:
1097
1317
  Dict[str, Any]: A dictionary containing the summary text, file
1098
1318
  path, status message, and optionally structured_summary if
@@ -1138,10 +1358,17 @@ class ChatAgent(BaseAgent):
1138
1358
 
1139
1359
  # Convert messages to conversation text
1140
1360
  conversation_lines = []
1361
+ user_messages: List[str] = []
1141
1362
  for message in messages:
1142
1363
  role = message.get('role', 'unknown')
1143
1364
  content = message.get('content', '')
1144
1365
 
1366
+ # Skip summary messages if include_summaries is False
1367
+ if not include_summaries and isinstance(content, str):
1368
+ # Check if this is a summary message by looking for marker
1369
+ if content.startswith('[CONTEXT_SUMMARY]'):
1370
+ continue
1371
+
1145
1372
  # Handle tool call messages (assistant calling tools)
1146
1373
  tool_calls = message.get('tool_calls')
1147
1374
  if tool_calls and isinstance(tool_calls, (list, tuple)):
@@ -1193,6 +1420,9 @@ class ChatAgent(BaseAgent):
1193
1420
 
1194
1421
  # Handle regular content messages (user/assistant/system)
1195
1422
  elif content:
1423
+ content = str(content)
1424
+ if role == 'user':
1425
+ user_messages.append(content)
1196
1426
  conversation_lines.append(f"{role}: {content}")
1197
1427
 
1198
1428
  conversation_text = "\n".join(conversation_lines).strip()
@@ -1223,11 +1453,7 @@ class ChatAgent(BaseAgent):
1223
1453
  f"{conversation_text}"
1224
1454
  )
1225
1455
  else:
1226
- prompt_text = (
1227
- "Summarize the context information in concise markdown "
1228
- "bullet points highlighting key decisions, action items.\n"
1229
- f"Context information:\n{conversation_text}"
1230
- )
1456
+ prompt_text = build_default_summary_prompt(conversation_text)
1231
1457
 
1232
1458
  try:
1233
1459
  # Use structured output if response_format is provided
@@ -1297,6 +1523,10 @@ class ChatAgent(BaseAgent):
1297
1523
  summary_content = context_util.structured_output_to_markdown(
1298
1524
  structured_data=structured_output, metadata=metadata
1299
1525
  )
1526
+ if add_user_messages:
1527
+ summary_content = self._append_user_messages_section(
1528
+ summary_content, user_messages
1529
+ )
1300
1530
 
1301
1531
  # Save the markdown (either custom structured or default)
1302
1532
  save_status = context_util.save_markdown_file(
@@ -1311,7 +1541,10 @@ class ChatAgent(BaseAgent):
1311
1541
  file_path = (
1312
1542
  context_util.get_working_directory() / f"{base_filename}.md"
1313
1543
  )
1314
-
1544
+ summary_content = (
1545
+ f"[CONTEXT_SUMMARY] The following is a summary of our "
1546
+ f"conversation from a previous session: {summary_content}"
1547
+ )
1315
1548
  # Prepare result dictionary
1316
1549
  result_dict = {
1317
1550
  "summary": summary_content,
@@ -1336,6 +1569,8 @@ class ChatAgent(BaseAgent):
1336
1569
  summary_prompt: Optional[str] = None,
1337
1570
  response_format: Optional[Type[BaseModel]] = None,
1338
1571
  working_directory: Optional[Union[str, Path]] = None,
1572
+ include_summaries: bool = False,
1573
+ add_user_messages: bool = True,
1339
1574
  ) -> Dict[str, Any]:
1340
1575
  r"""Asynchronously summarize the agent's current conversation context
1341
1576
  and persist it to a markdown file.
@@ -1358,7 +1593,13 @@ class ChatAgent(BaseAgent):
1358
1593
  working_directory (Optional[str|Path]): Optional directory to save
1359
1594
  the markdown summary file. If provided, overrides the default
1360
1595
  directory used by ContextUtility.
1361
-
1596
+ include_summaries (bool): Whether to include previously generated
1597
+ summaries in the content to be summarized. If False (default),
1598
+ only non-summary messages will be summarized. If True, all
1599
+ messages including previous summaries will be summarized
1600
+ (full compression). (default: :obj:`False`)
1601
+ add_user_messages (bool): Whether add user messages to summary.
1602
+ (default: :obj:`True`)
1362
1603
  Returns:
1363
1604
  Dict[str, Any]: A dictionary containing the summary text, file
1364
1605
  path, status message, and optionally structured_summary if
@@ -1394,10 +1635,17 @@ class ChatAgent(BaseAgent):
1394
1635
 
1395
1636
  # Convert messages to conversation text
1396
1637
  conversation_lines = []
1638
+ user_messages: List[str] = []
1397
1639
  for message in messages:
1398
1640
  role = message.get('role', 'unknown')
1399
1641
  content = message.get('content', '')
1400
1642
 
1643
+ # Skip summary messages if include_summaries is False
1644
+ if not include_summaries and isinstance(content, str):
1645
+ # Check if this is a summary message by looking for marker
1646
+ if content.startswith('[CONTEXT_SUMMARY]'):
1647
+ continue
1648
+
1401
1649
  # Handle tool call messages (assistant calling tools)
1402
1650
  tool_calls = message.get('tool_calls')
1403
1651
  if tool_calls and isinstance(tool_calls, (list, tuple)):
@@ -1449,6 +1697,9 @@ class ChatAgent(BaseAgent):
1449
1697
 
1450
1698
  # Handle regular content messages (user/assistant/system)
1451
1699
  elif content:
1700
+ content = str(content)
1701
+ if role == 'user':
1702
+ user_messages.append(content)
1452
1703
  conversation_lines.append(f"{role}: {content}")
1453
1704
 
1454
1705
  conversation_text = "\n".join(conversation_lines).strip()
@@ -1479,11 +1730,7 @@ class ChatAgent(BaseAgent):
1479
1730
  f"{conversation_text}"
1480
1731
  )
1481
1732
  else:
1482
- prompt_text = (
1483
- "Summarize the context information in concise markdown "
1484
- "bullet points highlighting key decisions, action items.\n"
1485
- f"Context information:\n{conversation_text}"
1486
- )
1733
+ prompt_text = build_default_summary_prompt(conversation_text)
1487
1734
 
1488
1735
  try:
1489
1736
  # Use structured output if response_format is provided
@@ -1562,6 +1809,10 @@ class ChatAgent(BaseAgent):
1562
1809
  summary_content = context_util.structured_output_to_markdown(
1563
1810
  structured_data=structured_output, metadata=metadata
1564
1811
  )
1812
+ if add_user_messages:
1813
+ summary_content = self._append_user_messages_section(
1814
+ summary_content, user_messages
1815
+ )
1565
1816
 
1566
1817
  # Save the markdown (either custom structured or default)
1567
1818
  save_status = context_util.save_markdown_file(
@@ -1577,6 +1828,11 @@ class ChatAgent(BaseAgent):
1577
1828
  context_util.get_working_directory() / f"{base_filename}.md"
1578
1829
  )
1579
1830
 
1831
+ summary_content = (
1832
+ f"[CONTEXT_SUMMARY] The following is a summary of our "
1833
+ f"conversation from a previous session: {summary_content}"
1834
+ )
1835
+
1580
1836
  # Prepare result dictionary
1581
1837
  result_dict = {
1582
1838
  "summary": summary_content,
@@ -1604,7 +1860,14 @@ class ChatAgent(BaseAgent):
1604
1860
  self.memory.clear()
1605
1861
 
1606
1862
  if self.system_message is not None:
1607
- self.update_memory(self.system_message, OpenAIBackendRole.SYSTEM)
1863
+ self.memory.write_record(
1864
+ MemoryRecord(
1865
+ message=self.system_message,
1866
+ role_at_backend=OpenAIBackendRole.SYSTEM,
1867
+ timestamp=time.time_ns() / 1_000_000_000,
1868
+ agent_id=self.agent_id,
1869
+ )
1870
+ )
1608
1871
 
1609
1872
  def _generate_system_message_for_output_language(
1610
1873
  self,
@@ -1629,26 +1892,70 @@ class ChatAgent(BaseAgent):
1629
1892
  content = self._original_system_message.content + language_prompt
1630
1893
  return self._original_system_message.create_new_instance(content)
1631
1894
  else:
1632
- return BaseMessage.make_assistant_message(
1633
- role_name="Assistant",
1634
- content=language_prompt,
1635
- )
1895
+ return BaseMessage.make_system_message(language_prompt)
1636
1896
 
1637
1897
  def init_messages(self) -> None:
1638
1898
  r"""Initializes the stored messages list with the current system
1639
1899
  message.
1640
1900
  """
1641
- self.memory.clear()
1642
- # Write system message to memory if provided
1643
- if self.system_message is not None:
1644
- self.memory.write_record(
1645
- MemoryRecord(
1646
- message=self.system_message,
1647
- role_at_backend=OpenAIBackendRole.SYSTEM,
1648
- timestamp=time.time_ns() / 1_000_000_000,
1649
- agent_id=self.agent_id,
1650
- )
1651
- )
1901
+ self._reset_summary_state()
1902
+ self.clear_memory()
1903
+
1904
+ def update_system_message(
1905
+ self,
1906
+ system_message: Union[BaseMessage, str],
1907
+ reset_memory: bool = True,
1908
+ ) -> None:
1909
+ r"""Update the system message.
1910
+ It will reset conversation with new system message.
1911
+
1912
+ Args:
1913
+ system_message (Union[BaseMessage, str]): The new system message.
1914
+ Can be either a BaseMessage object or a string.
1915
+ If a string is provided, it will be converted
1916
+ into a BaseMessage object.
1917
+ reset_memory (bool):
1918
+ Whether to reinitialize conversation messages after updating
1919
+ the system message. Defaults to True.
1920
+ """
1921
+ if system_message is None:
1922
+ raise ValueError("system_message is required and cannot be None. ")
1923
+ self._original_system_message = (
1924
+ BaseMessage.make_system_message(system_message)
1925
+ if isinstance(system_message, str)
1926
+ else system_message
1927
+ )
1928
+ self._system_message = (
1929
+ self._generate_system_message_for_output_language()
1930
+ )
1931
+ if reset_memory:
1932
+ self.init_messages()
1933
+
1934
+ def append_to_system_message(
1935
+ self, content: str, reset_memory: bool = True
1936
+ ) -> None:
1937
+ """Append additional context to existing system message.
1938
+
1939
+ Args:
1940
+ content (str): The additional system message.
1941
+ reset_memory (bool):
1942
+ Whether to reinitialize conversation messages after appending
1943
+ additional context. Defaults to True.
1944
+ """
1945
+ original_content = (
1946
+ self._original_system_message.content
1947
+ if self._original_system_message
1948
+ else ""
1949
+ )
1950
+ new_system_message = original_content + '\n' + content
1951
+ self._original_system_message = BaseMessage.make_system_message(
1952
+ new_system_message
1953
+ )
1954
+ self._system_message = (
1955
+ self._generate_system_message_for_output_language()
1956
+ )
1957
+ if reset_memory:
1958
+ self.init_messages()
1652
1959
 
1653
1960
  def reset_to_original_system_message(self) -> None:
1654
1961
  r"""Reset system message to original, removing any appended context.
@@ -2072,22 +2379,122 @@ class ChatAgent(BaseAgent):
2072
2379
 
2073
2380
  try:
2074
2381
  openai_messages, num_tokens = self.memory.get_context()
2382
+ if self.summarize_threshold is not None:
2383
+ threshold = self._calculate_next_summary_threshold()
2384
+ summary_token_count = self._summary_token_count
2385
+ token_limit = self.model_backend.token_limit
2386
+
2387
+ if num_tokens <= token_limit:
2388
+ if (
2389
+ summary_token_count
2390
+ > token_limit * self.summary_window_ratio
2391
+ ):
2392
+ logger.info(
2393
+ f"Summary tokens ({summary_token_count}) "
2394
+ f"exceed limit, full compression."
2395
+ )
2396
+ # Summarize everything (including summaries)
2397
+ summary = self.summarize(include_summaries=True)
2398
+ self._update_memory_with_summary(
2399
+ summary.get("summary", ""),
2400
+ include_summaries=True,
2401
+ )
2402
+ elif num_tokens > threshold:
2403
+ logger.info(
2404
+ f"Token count ({num_tokens}) exceed threshold "
2405
+ f"({threshold}). Triggering summarization."
2406
+ )
2407
+ # Only summarize non-summary content
2408
+ summary = self.summarize(include_summaries=False)
2409
+ self._update_memory_with_summary(
2410
+ summary.get("summary", ""),
2411
+ include_summaries=False,
2412
+ )
2075
2413
  accumulated_context_tokens += num_tokens
2076
2414
  except RuntimeError as e:
2077
2415
  return self._step_terminate(
2078
2416
  e.args[1], tool_call_records, "max_tokens_exceeded"
2079
2417
  )
2080
- # Get response from model backend
2081
- response = self._get_model_response(
2082
- openai_messages,
2083
- num_tokens=num_tokens,
2084
- current_iteration=iteration_count,
2085
- response_format=response_format,
2086
- tool_schemas=[]
2087
- if disable_tools
2088
- else self._get_full_tool_schemas(),
2089
- prev_num_openai_messages=prev_num_openai_messages,
2090
- )
2418
+ # Get response from model backend with token limit error handling
2419
+ try:
2420
+ response = self._get_model_response(
2421
+ openai_messages,
2422
+ num_tokens=num_tokens,
2423
+ current_iteration=iteration_count,
2424
+ response_format=response_format,
2425
+ tool_schemas=[]
2426
+ if disable_tools
2427
+ else self._get_full_tool_schemas(),
2428
+ prev_num_openai_messages=prev_num_openai_messages,
2429
+ )
2430
+ except Exception as exc:
2431
+ logger.exception("Model error: %s", exc)
2432
+
2433
+ if self._is_token_limit_error(exc):
2434
+ tool_signature = self._last_tool_call_signature
2435
+ if (
2436
+ tool_signature is not None
2437
+ and tool_signature
2438
+ == self._last_token_limit_tool_signature
2439
+ ):
2440
+ description = self._describe_tool_call(
2441
+ self._last_tool_call_record
2442
+ )
2443
+ repeated_msg = (
2444
+ "Context exceeded again by the same tool call."
2445
+ )
2446
+ if description:
2447
+ repeated_msg += f" {description}"
2448
+ raise RuntimeError(repeated_msg) from exc
2449
+
2450
+ user_message_count = sum(
2451
+ 1
2452
+ for msg in openai_messages
2453
+ if getattr(msg, "role", None) == "user"
2454
+ )
2455
+ if (
2456
+ user_message_count == 1
2457
+ and getattr(openai_messages[-1], "role", None)
2458
+ == "user"
2459
+ ):
2460
+ raise RuntimeError(
2461
+ "The provided user input alone exceeds the "
2462
+ "context window. Please shorten the input."
2463
+ ) from exc
2464
+
2465
+ logger.warning(
2466
+ "Token limit exceeded error detected. "
2467
+ "Summarizing context."
2468
+ )
2469
+
2470
+ recent_records: List[ContextRecord]
2471
+ try:
2472
+ recent_records = self.memory.retrieve()
2473
+ except Exception: # pragma: no cover - defensive guard
2474
+ recent_records = []
2475
+
2476
+ indices_to_remove = (
2477
+ self._find_indices_to_remove_for_last_tool_pair(
2478
+ recent_records
2479
+ )
2480
+ )
2481
+ self.memory.remove_records_by_indices(indices_to_remove)
2482
+
2483
+ summary = self.summarize(include_summaries=False)
2484
+ tool_notice = self._format_tool_limit_notice()
2485
+ summary_messages = summary.get("summary", "")
2486
+
2487
+ if tool_notice:
2488
+ summary_messages += "\n\n" + tool_notice
2489
+
2490
+ self._update_memory_with_summary(
2491
+ summary_messages, include_summaries=False
2492
+ )
2493
+ self._last_token_limit_tool_signature = tool_signature
2494
+ return self._step_impl(input_message, response_format)
2495
+
2496
+ raise
2497
+
2091
2498
  prev_num_openai_messages = len(openai_messages)
2092
2499
  iteration_count += 1
2093
2500
 
@@ -2282,6 +2689,7 @@ class ChatAgent(BaseAgent):
2282
2689
  step_token_usage = self._create_token_usage_tracker()
2283
2690
  iteration_count: int = 0
2284
2691
  prev_num_openai_messages: int = 0
2692
+
2285
2693
  while True:
2286
2694
  if self.pause_event is not None and not self.pause_event.is_set():
2287
2695
  if isinstance(self.pause_event, asyncio.Event):
@@ -2292,21 +2700,128 @@ class ChatAgent(BaseAgent):
2292
2700
  await loop.run_in_executor(None, self.pause_event.wait)
2293
2701
  try:
2294
2702
  openai_messages, num_tokens = self.memory.get_context()
2703
+ if self.summarize_threshold is not None:
2704
+ threshold = self._calculate_next_summary_threshold()
2705
+ summary_token_count = self._summary_token_count
2706
+ token_limit = self.model_backend.token_limit
2707
+
2708
+ if num_tokens <= token_limit:
2709
+ if (
2710
+ summary_token_count
2711
+ > token_limit * self.summary_window_ratio
2712
+ ):
2713
+ logger.info(
2714
+ f"Summary tokens ({summary_token_count}) "
2715
+ f"exceed limit, full compression."
2716
+ )
2717
+ # Summarize everything (including summaries)
2718
+ summary = await self.asummarize(
2719
+ include_summaries=True
2720
+ )
2721
+ self._update_memory_with_summary(
2722
+ summary.get("summary", ""),
2723
+ include_summaries=True,
2724
+ )
2725
+ elif num_tokens > threshold:
2726
+ logger.info(
2727
+ f"Token count ({num_tokens}) exceed threshold "
2728
+ "({threshold}). Triggering summarization."
2729
+ )
2730
+ # Only summarize non-summary content
2731
+ summary = await self.asummarize(
2732
+ include_summaries=False
2733
+ )
2734
+ self._update_memory_with_summary(
2735
+ summary.get("summary", ""),
2736
+ include_summaries=False,
2737
+ )
2295
2738
  accumulated_context_tokens += num_tokens
2296
2739
  except RuntimeError as e:
2297
2740
  return self._step_terminate(
2298
2741
  e.args[1], tool_call_records, "max_tokens_exceeded"
2299
2742
  )
2300
- response = await self._aget_model_response(
2301
- openai_messages,
2302
- num_tokens=num_tokens,
2303
- current_iteration=iteration_count,
2304
- response_format=response_format,
2305
- tool_schemas=[]
2306
- if disable_tools
2307
- else self._get_full_tool_schemas(),
2308
- prev_num_openai_messages=prev_num_openai_messages,
2309
- )
2743
+ # Get response from model backend with token limit error handling
2744
+ try:
2745
+ response = await self._aget_model_response(
2746
+ openai_messages,
2747
+ num_tokens=num_tokens,
2748
+ current_iteration=iteration_count,
2749
+ response_format=response_format,
2750
+ tool_schemas=[]
2751
+ if disable_tools
2752
+ else self._get_full_tool_schemas(),
2753
+ prev_num_openai_messages=prev_num_openai_messages,
2754
+ )
2755
+ except Exception as exc:
2756
+ logger.exception("Model error: %s", exc)
2757
+
2758
+ if self._is_token_limit_error(exc):
2759
+ tool_signature = self._last_tool_call_signature
2760
+ if (
2761
+ tool_signature is not None
2762
+ and tool_signature
2763
+ == self._last_token_limit_tool_signature
2764
+ ):
2765
+ description = self._describe_tool_call(
2766
+ self._last_tool_call_record
2767
+ )
2768
+ repeated_msg = (
2769
+ "Context exceeded again by the same tool call."
2770
+ )
2771
+ if description:
2772
+ repeated_msg += f" {description}"
2773
+ raise RuntimeError(repeated_msg) from exc
2774
+
2775
+ user_message_count = sum(
2776
+ 1
2777
+ for msg in openai_messages
2778
+ if getattr(msg, "role", None) == "user"
2779
+ )
2780
+ if (
2781
+ user_message_count == 1
2782
+ and getattr(openai_messages[-1], "role", None)
2783
+ == "user"
2784
+ ):
2785
+ raise RuntimeError(
2786
+ "The provided user input alone exceeds the"
2787
+ "context window. Please shorten the input."
2788
+ ) from exc
2789
+
2790
+ logger.warning(
2791
+ "Token limit exceeded error detected. "
2792
+ "Summarizing context."
2793
+ )
2794
+
2795
+ recent_records: List[ContextRecord]
2796
+ try:
2797
+ recent_records = self.memory.retrieve()
2798
+ except Exception: # pragma: no cover - defensive guard
2799
+ recent_records = []
2800
+
2801
+ indices_to_remove = (
2802
+ self._find_indices_to_remove_for_last_tool_pair(
2803
+ recent_records
2804
+ )
2805
+ )
2806
+ self.memory.remove_records_by_indices(indices_to_remove)
2807
+
2808
+ summary = await self.asummarize()
2809
+
2810
+ tool_notice = self._format_tool_limit_notice()
2811
+ summary_messages = summary.get("summary", "")
2812
+
2813
+ if tool_notice:
2814
+ summary_messages += "\n\n" + tool_notice
2815
+ self._update_memory_with_summary(
2816
+ summary_messages, include_summaries=False
2817
+ )
2818
+ self._last_token_limit_tool_signature = tool_signature
2819
+ return await self._astep_non_streaming_task(
2820
+ input_message, response_format
2821
+ )
2822
+
2823
+ raise
2824
+
2310
2825
  prev_num_openai_messages = len(openai_messages)
2311
2826
  iteration_count += 1
2312
2827
 
@@ -2383,6 +2898,8 @@ class ChatAgent(BaseAgent):
2383
2898
  if self.prune_tool_calls_from_memory and tool_call_records:
2384
2899
  self.memory.clean_tool_calls()
2385
2900
 
2901
+ self._last_token_limit_user_signature = None
2902
+
2386
2903
  return self._convert_to_chatagent_response(
2387
2904
  response,
2388
2905
  tool_call_records,
@@ -2479,6 +2996,8 @@ class ChatAgent(BaseAgent):
2479
2996
  if response:
2480
2997
  break
2481
2998
  except RateLimitError as e:
2999
+ if self._is_token_limit_error(e):
3000
+ raise
2482
3001
  last_error = e
2483
3002
  if attempt < self.retry_attempts - 1:
2484
3003
  delay = min(self.retry_delay * (2**attempt), 60.0)
@@ -2496,7 +3015,6 @@ class ChatAgent(BaseAgent):
2496
3015
  except Exception:
2497
3016
  logger.error(
2498
3017
  f"Model error: {self.model_backend.model_type}",
2499
- exc_info=True,
2500
3018
  )
2501
3019
  raise
2502
3020
  else:
@@ -2543,6 +3061,8 @@ class ChatAgent(BaseAgent):
2543
3061
  if response:
2544
3062
  break
2545
3063
  except RateLimitError as e:
3064
+ if self._is_token_limit_error(e):
3065
+ raise
2546
3066
  last_error = e
2547
3067
  if attempt < self.retry_attempts - 1:
2548
3068
  delay = min(self.retry_delay * (2**attempt), 60.0)
@@ -3050,6 +3570,7 @@ class ChatAgent(BaseAgent):
3050
3570
  tool_call_id=tool_call_id,
3051
3571
  )
3052
3572
 
3573
+ self._update_last_tool_call_state(tool_record)
3053
3574
  return tool_record
3054
3575
 
3055
3576
  def _stream(
@@ -3611,12 +4132,14 @@ class ChatAgent(BaseAgent):
3611
4132
  timestamp=base_timestamp + 1e-6,
3612
4133
  )
3613
4134
 
3614
- return ToolCallingRecord(
4135
+ tool_record = ToolCallingRecord(
3615
4136
  tool_name=function_name,
3616
4137
  args=args,
3617
4138
  result=result,
3618
4139
  tool_call_id=tool_call_id,
3619
4140
  )
4141
+ self._update_last_tool_call_state(tool_record)
4142
+ return tool_record
3620
4143
 
3621
4144
  except Exception as e:
3622
4145
  error_msg = (
@@ -3638,12 +4161,14 @@ class ChatAgent(BaseAgent):
3638
4161
 
3639
4162
  self.update_memory(func_msg, OpenAIBackendRole.FUNCTION)
3640
4163
 
3641
- return ToolCallingRecord(
4164
+ tool_record = ToolCallingRecord(
3642
4165
  tool_name=function_name,
3643
4166
  args=args,
3644
4167
  result=result,
3645
4168
  tool_call_id=tool_call_id,
3646
4169
  )
4170
+ self._update_last_tool_call_state(tool_record)
4171
+ return tool_record
3647
4172
  else:
3648
4173
  logger.warning(
3649
4174
  f"Tool '{function_name}' not found in internal tools"
@@ -3665,6 +4190,23 @@ class ChatAgent(BaseAgent):
3665
4190
  tool_call_id = tool_call_data['id']
3666
4191
 
3667
4192
  if function_name in self._internal_tools:
4193
+ # Create the tool call message
4194
+ assist_msg = FunctionCallingMessage(
4195
+ role_name=self.role_name,
4196
+ role_type=self.role_type,
4197
+ meta_dict=None,
4198
+ content="",
4199
+ func_name=function_name,
4200
+ args=args,
4201
+ tool_call_id=tool_call_id,
4202
+ )
4203
+ assist_ts = time.time_ns() / 1_000_000_000
4204
+ self.update_memory(
4205
+ assist_msg,
4206
+ OpenAIBackendRole.ASSISTANT,
4207
+ timestamp=assist_ts,
4208
+ )
4209
+
3668
4210
  tool = self._internal_tools[function_name]
3669
4211
  try:
3670
4212
  # Try different invocation paths in order of preference
@@ -3693,19 +4235,8 @@ class ChatAgent(BaseAgent):
3693
4235
  else:
3694
4236
  # Fallback: synchronous call
3695
4237
  result = tool(**args)
3696
- # First, create and record the assistant message with tool
3697
- # call
3698
- assist_msg = FunctionCallingMessage(
3699
- role_name=self.role_name,
3700
- role_type=self.role_type,
3701
- meta_dict=None,
3702
- content="",
3703
- func_name=function_name,
3704
- args=args,
3705
- tool_call_id=tool_call_id,
3706
- )
3707
4238
 
3708
- # Then create the tool response message
4239
+ # Create the tool response message
3709
4240
  func_msg = FunctionCallingMessage(
3710
4241
  role_name=self.role_name,
3711
4242
  role_type=self.role_type,
@@ -3715,31 +4246,21 @@ class ChatAgent(BaseAgent):
3715
4246
  result=result,
3716
4247
  tool_call_id=tool_call_id,
3717
4248
  )
3718
-
3719
- # Record both messages with precise timestamps to ensure
3720
- # correct ordering
3721
- current_time_ns = time.time_ns()
3722
- base_timestamp = (
3723
- current_time_ns / 1_000_000_000
3724
- ) # Convert to seconds
3725
-
3726
- self.update_memory(
3727
- assist_msg,
3728
- OpenAIBackendRole.ASSISTANT,
3729
- timestamp=base_timestamp,
3730
- )
4249
+ func_ts = time.time_ns() / 1_000_000_000
3731
4250
  self.update_memory(
3732
4251
  func_msg,
3733
4252
  OpenAIBackendRole.FUNCTION,
3734
- timestamp=base_timestamp + 1e-6,
4253
+ timestamp=func_ts,
3735
4254
  )
3736
4255
 
3737
- return ToolCallingRecord(
4256
+ tool_record = ToolCallingRecord(
3738
4257
  tool_name=function_name,
3739
4258
  args=args,
3740
4259
  result=result,
3741
4260
  tool_call_id=tool_call_id,
3742
4261
  )
4262
+ self._update_last_tool_call_state(tool_record)
4263
+ return tool_record
3743
4264
 
3744
4265
  except Exception as e:
3745
4266
  error_msg = (
@@ -3758,15 +4279,21 @@ class ChatAgent(BaseAgent):
3758
4279
  result=result,
3759
4280
  tool_call_id=tool_call_id,
3760
4281
  )
4282
+ func_ts = time.time_ns() / 1_000_000_000
4283
+ self.update_memory(
4284
+ func_msg,
4285
+ OpenAIBackendRole.FUNCTION,
4286
+ timestamp=func_ts,
4287
+ )
3761
4288
 
3762
- self.update_memory(func_msg, OpenAIBackendRole.FUNCTION)
3763
-
3764
- return ToolCallingRecord(
4289
+ tool_record = ToolCallingRecord(
3765
4290
  tool_name=function_name,
3766
4291
  args=args,
3767
4292
  result=result,
3768
4293
  tool_call_id=tool_call_id,
3769
4294
  )
4295
+ self._update_last_tool_call_state(tool_record)
4296
+ return tool_record
3770
4297
  else:
3771
4298
  logger.warning(
3772
4299
  f"Tool '{function_name}' not found in internal tools"
@@ -4468,23 +4995,29 @@ class ChatAgent(BaseAgent):
4468
4995
  # Toolkit doesn't support cloning, use original
4469
4996
  cloned_toolkits[toolkit_id] = toolkit_instance
4470
4997
 
4471
- if getattr(
4472
- tool.func, "__message_integration_enhanced__", False
4473
- ):
4474
- cloned_tools.append(
4475
- FunctionTool(
4476
- func=tool.func,
4477
- openai_tool_schema=tool.get_openai_tool_schema(),
4478
- )
4479
- )
4480
- continue
4481
-
4482
4998
  # Get the method from the cloned (or original) toolkit
4483
4999
  toolkit = cloned_toolkits[toolkit_id]
4484
5000
  method_name = tool.func.__name__
4485
5001
 
5002
+ # Check if toolkit was actually cloned or just reused
5003
+ toolkit_was_cloned = toolkit is not toolkit_instance
5004
+
4486
5005
  if hasattr(toolkit, method_name):
4487
5006
  new_method = getattr(toolkit, method_name)
5007
+
5008
+ # If toolkit wasn't cloned (stateless), preserve the
5009
+ # original function to maintain any enhancements/wrappers
5010
+ if not toolkit_was_cloned:
5011
+ # Toolkit is stateless, safe to reuse original function
5012
+ cloned_tools.append(
5013
+ FunctionTool(
5014
+ func=tool.func,
5015
+ openai_tool_schema=tool.get_openai_tool_schema(),
5016
+ )
5017
+ )
5018
+ continue
5019
+
5020
+ # Toolkit was cloned, use the new method
4488
5021
  # Wrap cloned method into a new FunctionTool,
4489
5022
  # preserving schema
4490
5023
  try: