camel-ai 0.2.78__py3-none-any.whl → 0.2.79a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of camel-ai might be problematic. Click here for more details.

Files changed (39) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/_utils.py +38 -0
  3. camel/agents/chat_agent.py +1112 -287
  4. camel/datasets/base_generator.py +39 -10
  5. camel/environments/single_step.py +28 -3
  6. camel/memories/__init__.py +1 -2
  7. camel/memories/agent_memories.py +34 -0
  8. camel/memories/base.py +26 -0
  9. camel/memories/blocks/chat_history_block.py +117 -17
  10. camel/memories/context_creators/score_based.py +25 -384
  11. camel/messages/base.py +26 -0
  12. camel/models/aws_bedrock_model.py +1 -17
  13. camel/models/azure_openai_model.py +113 -67
  14. camel/models/model_factory.py +17 -1
  15. camel/models/moonshot_model.py +102 -5
  16. camel/models/openai_compatible_model.py +62 -32
  17. camel/models/openai_model.py +61 -35
  18. camel/models/samba_model.py +34 -15
  19. camel/models/sglang_model.py +41 -11
  20. camel/societies/workforce/__init__.py +2 -0
  21. camel/societies/workforce/events.py +122 -0
  22. camel/societies/workforce/role_playing_worker.py +15 -11
  23. camel/societies/workforce/single_agent_worker.py +143 -291
  24. camel/societies/workforce/utils.py +2 -1
  25. camel/societies/workforce/workflow_memory_manager.py +772 -0
  26. camel/societies/workforce/workforce.py +513 -188
  27. camel/societies/workforce/workforce_callback.py +74 -0
  28. camel/societies/workforce/workforce_logger.py +144 -140
  29. camel/societies/workforce/workforce_metrics.py +33 -0
  30. camel/storages/vectordb_storages/oceanbase.py +5 -4
  31. camel/toolkits/file_toolkit.py +166 -0
  32. camel/toolkits/message_integration.py +15 -13
  33. camel/toolkits/terminal_toolkit/terminal_toolkit.py +112 -79
  34. camel/types/enums.py +1 -0
  35. camel/utils/context_utils.py +201 -2
  36. {camel_ai-0.2.78.dist-info → camel_ai-0.2.79a1.dist-info}/METADATA +14 -13
  37. {camel_ai-0.2.78.dist-info → camel_ai-0.2.79a1.dist-info}/RECORD +39 -35
  38. {camel_ai-0.2.78.dist-info → camel_ai-0.2.79a1.dist-info}/WHEEL +0 -0
  39. {camel_ai-0.2.78.dist-info → camel_ai-0.2.79a1.dist-info}/licenses/LICENSE +0 -0
@@ -20,7 +20,6 @@ import concurrent.futures
20
20
  import hashlib
21
21
  import inspect
22
22
  import json
23
- import math
24
23
  import os
25
24
  import random
26
25
  import re
@@ -57,6 +56,7 @@ from pydantic import BaseModel, ValidationError
57
56
 
58
57
  from camel.agents._types import ModelResponse, ToolCallRequest
59
58
  from camel.agents._utils import (
59
+ build_default_summary_prompt,
60
60
  convert_to_function_tool,
61
61
  convert_to_schema,
62
62
  get_info_dict,
@@ -68,10 +68,10 @@ from camel.logger import get_logger
68
68
  from camel.memories import (
69
69
  AgentMemory,
70
70
  ChatHistoryMemory,
71
+ ContextRecord,
71
72
  MemoryRecord,
72
73
  ScoreBasedContextCreator,
73
74
  )
74
- from camel.memories.blocks.chat_history_block import EmptyMemoryWarning
75
75
  from camel.messages import (
76
76
  BaseMessage,
77
77
  FunctionCallingMessage,
@@ -103,6 +103,16 @@ from camel.utils import (
103
103
  from camel.utils.commons import dependencies_required
104
104
  from camel.utils.context_utils import ContextUtility
105
105
 
106
+ TOKEN_LIMIT_ERROR_MARKERS = (
107
+ "context_length_exceeded",
108
+ "prompt is too long",
109
+ "exceeded your current quota",
110
+ "tokens must be reduced",
111
+ "context length",
112
+ "token count",
113
+ "context limit",
114
+ )
115
+
106
116
  if TYPE_CHECKING:
107
117
  from camel.terminators import ResponseTerminator
108
118
 
@@ -355,9 +365,9 @@ class ChatAgent(BaseAgent):
355
365
  message_window_size (int, optional): The maximum number of previous
356
366
  messages to include in the context window. If `None`, no windowing
357
367
  is performed. (default: :obj:`None`)
358
- token_limit (int, optional): The maximum number of tokens in a context.
359
- The context will be automatically pruned to fulfill the limitation.
360
- If `None`, it will be set according to the backend model.
368
+ summarize_threshold (int, optional): The percentage of the context
369
+ window that triggers summarization. If `None`, will trigger
370
+ summarization when the context window is full.
361
371
  (default: :obj:`None`)
362
372
  output_language (str, optional): The language to be output by the
363
373
  agent. (default: :obj:`None`)
@@ -415,6 +425,10 @@ class ChatAgent(BaseAgent):
415
425
  updates return accumulated content (current behavior). When False,
416
426
  partial updates return only the incremental delta. (default:
417
427
  :obj:`True`)
428
+ summary_window_ratio (float, optional): Maximum fraction of the total
429
+ context window that can be occupied by summary information. Used
430
+ to limit how much of the model's context is reserved for
431
+ summarization results. (default: :obj:`0.6`)
418
432
  """
419
433
 
420
434
  def __init__(
@@ -437,6 +451,7 @@ class ChatAgent(BaseAgent):
437
451
  ] = None,
438
452
  memory: Optional[AgentMemory] = None,
439
453
  message_window_size: Optional[int] = None,
454
+ summarize_threshold: Optional[int] = 50,
440
455
  token_limit: Optional[int] = None,
441
456
  output_language: Optional[str] = None,
442
457
  tools: Optional[List[Union[FunctionTool, Callable]]] = None,
@@ -459,6 +474,7 @@ class ChatAgent(BaseAgent):
459
474
  retry_delay: float = 1.0,
460
475
  step_timeout: Optional[float] = None,
461
476
  stream_accumulate: bool = True,
477
+ summary_window_ratio: float = 0.6,
462
478
  ) -> None:
463
479
  if isinstance(model, ModelManager):
464
480
  self.model_backend = model
@@ -477,7 +493,7 @@ class ChatAgent(BaseAgent):
477
493
  # Set up memory
478
494
  context_creator = ScoreBasedContextCreator(
479
495
  self.model_backend.token_counter,
480
- token_limit or self.model_backend.token_limit,
496
+ self.model_backend.token_limit,
481
497
  )
482
498
 
483
499
  self._memory: AgentMemory = memory or ChatHistoryMemory(
@@ -492,9 +508,7 @@ class ChatAgent(BaseAgent):
492
508
 
493
509
  # Set up system message and initialize messages
494
510
  self._original_system_message = (
495
- BaseMessage.make_assistant_message(
496
- role_name="Assistant", content=system_message
497
- )
511
+ BaseMessage.make_system_message(system_message)
498
512
  if isinstance(system_message, str)
499
513
  else system_message
500
514
  )
@@ -504,6 +518,21 @@ class ChatAgent(BaseAgent):
504
518
  )
505
519
  self.init_messages()
506
520
 
521
+ # Set up summarize threshold with validation
522
+ if summarize_threshold is not None:
523
+ if not (0 < summarize_threshold <= 100):
524
+ raise ValueError(
525
+ f"summarize_threshold must be between 0 and 100, "
526
+ f"got {summarize_threshold}"
527
+ )
528
+ logger.info(
529
+ f"Automatic context compression is enabled. Will trigger "
530
+ f"summarization when context window exceeds "
531
+ f"{summarize_threshold}% of the total token limit."
532
+ )
533
+ self.summarize_threshold = summarize_threshold
534
+ self._reset_summary_state()
535
+
507
536
  # Set up role name and role type
508
537
  self.role_name: str = (
509
538
  getattr(self.system_message, "role_name", None) or "assistant"
@@ -551,11 +580,16 @@ class ChatAgent(BaseAgent):
551
580
  self._context_utility: Optional[ContextUtility] = None
552
581
  self._context_summary_agent: Optional["ChatAgent"] = None
553
582
  self.stream_accumulate = stream_accumulate
583
+ self._last_tool_call_record: Optional[ToolCallingRecord] = None
584
+ self._last_tool_call_signature: Optional[str] = None
585
+ self._last_token_limit_tool_signature: Optional[str] = None
586
+ self.summary_window_ratio = summary_window_ratio
554
587
 
555
588
  def reset(self):
556
589
  r"""Resets the :obj:`ChatAgent` to its initial state."""
557
590
  self.terminated = False
558
591
  self.init_messages()
592
+ self._reset_summary_state()
559
593
  for terminator in self.response_terminators:
560
594
  terminator.reset()
561
595
 
@@ -762,6 +796,329 @@ class ChatAgent(BaseAgent):
762
796
  for func_tool in self._internal_tools.values()
763
797
  ]
764
798
 
799
+ @staticmethod
800
+ def _is_token_limit_error(error: Exception) -> bool:
801
+ r"""Return True when the exception message indicates a token limit."""
802
+ error_message = str(error).lower()
803
+ return any(
804
+ marker in error_message for marker in TOKEN_LIMIT_ERROR_MARKERS
805
+ )
806
+
807
+ @staticmethod
808
+ def _is_tool_related_record(record: MemoryRecord) -> bool:
809
+ r"""Determine whether the given memory record
810
+ belongs to a tool call."""
811
+ if record.role_at_backend in {
812
+ OpenAIBackendRole.TOOL,
813
+ OpenAIBackendRole.FUNCTION,
814
+ }:
815
+ return True
816
+
817
+ if (
818
+ record.role_at_backend == OpenAIBackendRole.ASSISTANT
819
+ and isinstance(record.message, FunctionCallingMessage)
820
+ ):
821
+ return True
822
+
823
+ return False
824
+
825
+ def _find_indices_to_remove_for_last_tool_pair(
826
+ self, recent_records: List[ContextRecord]
827
+ ) -> List[int]:
828
+ """Find indices of records that should be removed to clean up the most
829
+ recent incomplete tool interaction pair.
830
+
831
+ This method identifies tool call/result pairs by tool_call_id and
832
+ returns the exact indices to remove, allowing non-contiguous deletions.
833
+
834
+ Logic:
835
+ - If the last record is a tool result (TOOL/FUNCTION) with a
836
+ tool_call_id, find the matching assistant call anywhere in history
837
+ and return both indices.
838
+ - If the last record is an assistant tool call without a result yet,
839
+ return just that index.
840
+ - For normal messages (non tool-related): remove just the last one.
841
+ - Fallback: If no tool_call_id is available, use heuristic (last 2 if
842
+ tool-related, otherwise last 1).
843
+
844
+ Returns:
845
+ List[int]: Indices to remove (may be non-contiguous).
846
+ """
847
+ if not recent_records:
848
+ return []
849
+
850
+ last_idx = len(recent_records) - 1
851
+ last_record = recent_records[last_idx].memory_record
852
+
853
+ # Case A: Last is an ASSISTANT tool call with no result yet
854
+ if (
855
+ last_record.role_at_backend == OpenAIBackendRole.ASSISTANT
856
+ and isinstance(last_record.message, FunctionCallingMessage)
857
+ and last_record.message.result is None
858
+ ):
859
+ return [last_idx]
860
+
861
+ # Case B: Last is TOOL/FUNCTION result, try id-based pairing
862
+ if last_record.role_at_backend in {
863
+ OpenAIBackendRole.TOOL,
864
+ OpenAIBackendRole.FUNCTION,
865
+ }:
866
+ tool_id = None
867
+ if isinstance(last_record.message, FunctionCallingMessage):
868
+ tool_id = last_record.message.tool_call_id
869
+
870
+ if tool_id:
871
+ for idx in range(len(recent_records) - 2, -1, -1):
872
+ rec = recent_records[idx].memory_record
873
+ if rec.role_at_backend != OpenAIBackendRole.ASSISTANT:
874
+ continue
875
+
876
+ # Check if this assistant message contains the tool_call_id
877
+ matched = False
878
+
879
+ # Case 1: FunctionCallingMessage (single tool call)
880
+ if isinstance(rec.message, FunctionCallingMessage):
881
+ if rec.message.tool_call_id == tool_id:
882
+ matched = True
883
+
884
+ # Case 2: BaseMessage with multiple tool_calls in meta_dict
885
+ elif (
886
+ hasattr(rec.message, "meta_dict")
887
+ and rec.message.meta_dict
888
+ ):
889
+ tool_calls_list = rec.message.meta_dict.get(
890
+ "tool_calls", []
891
+ )
892
+ if isinstance(tool_calls_list, list):
893
+ for tc in tool_calls_list:
894
+ if (
895
+ isinstance(tc, dict)
896
+ and tc.get("id") == tool_id
897
+ ):
898
+ matched = True
899
+ break
900
+
901
+ if matched:
902
+ # Return both assistant call and tool result indices
903
+ return [idx, last_idx]
904
+
905
+ # Fallback: no tool_call_id, use heuristic
906
+ if self._is_tool_related_record(last_record):
907
+ # Remove last 2 (assume they are paired)
908
+ return [last_idx - 1, last_idx] if last_idx > 0 else [last_idx]
909
+ else:
910
+ return [last_idx]
911
+
912
+ # Default: non tool-related tail => remove last one
913
+ return [last_idx]
914
+
915
+ @staticmethod
916
+ def _serialize_tool_args(args: Dict[str, Any]) -> str:
917
+ try:
918
+ return json.dumps(args, ensure_ascii=False, sort_keys=True)
919
+ except TypeError:
920
+ return str(args)
921
+
922
+ @classmethod
923
+ def _build_tool_signature(
924
+ cls, func_name: str, args: Dict[str, Any]
925
+ ) -> str:
926
+ args_repr = cls._serialize_tool_args(args)
927
+ return f"{func_name}:{args_repr}"
928
+
929
+ def _describe_tool_call(
930
+ self, record: Optional[ToolCallingRecord]
931
+ ) -> Optional[str]:
932
+ if record is None:
933
+ return None
934
+ args_repr = self._serialize_tool_args(record.args)
935
+ return f"Tool `{record.tool_name}` invoked with arguments {args_repr}."
936
+
937
+ def _update_last_tool_call_state(
938
+ self, record: Optional[ToolCallingRecord]
939
+ ) -> None:
940
+ """Track the most recent tool call and its identifying signature."""
941
+ self._last_tool_call_record = record
942
+ if record is None:
943
+ self._last_tool_call_signature = None
944
+ return
945
+
946
+ args = (
947
+ record.args
948
+ if isinstance(record.args, dict)
949
+ else {"_raw": record.args}
950
+ )
951
+ try:
952
+ signature = self._build_tool_signature(record.tool_name, args)
953
+ except Exception: # pragma: no cover - defensive guard
954
+ signature = None
955
+ self._last_tool_call_signature = signature
956
+
957
+ def _format_tool_limit_notice(self) -> Optional[str]:
958
+ record = self._last_tool_call_record
959
+ description = self._describe_tool_call(record)
960
+ if description is None:
961
+ return None
962
+ notice_lines = [
963
+ "[Tool Call Causing Token Limit]",
964
+ description,
965
+ ]
966
+
967
+ if record is not None:
968
+ result = record.result
969
+ if isinstance(result, bytes):
970
+ result_repr = result.decode(errors="replace")
971
+ elif isinstance(result, str):
972
+ result_repr = result
973
+ else:
974
+ try:
975
+ result_repr = json.dumps(
976
+ result, ensure_ascii=False, sort_keys=True
977
+ )
978
+ except (TypeError, ValueError):
979
+ result_repr = str(result)
980
+
981
+ result_length = len(result_repr)
982
+ notice_lines.append(f"Tool result length: {result_length}")
983
+ if self.model_backend.token_limit != 999999999:
984
+ notice_lines.append(
985
+ f"Token limit: {self.model_backend.token_limit}"
986
+ )
987
+
988
+ return "\n".join(notice_lines)
989
+
990
+ @staticmethod
991
+ def _append_user_messages_section(
992
+ summary_content: str, user_messages: List[str]
993
+ ) -> str:
994
+ section_title = "- **All User Messages**:"
995
+ sanitized_messages: List[str] = []
996
+ for msg in user_messages:
997
+ if not isinstance(msg, str):
998
+ msg = str(msg)
999
+ cleaned = " ".join(msg.strip().splitlines())
1000
+ if cleaned:
1001
+ sanitized_messages.append(cleaned)
1002
+
1003
+ bullet_block = (
1004
+ "\n".join(f"- {m}" for m in sanitized_messages)
1005
+ if sanitized_messages
1006
+ else "- None noted"
1007
+ )
1008
+ user_section = f"{section_title}\n{bullet_block}"
1009
+
1010
+ summary_clean = summary_content.rstrip()
1011
+ separator = "\n\n" if summary_clean else ""
1012
+ return f"{summary_clean}{separator}{user_section}"
1013
+
1014
+ def _reset_summary_state(self) -> None:
1015
+ self._summary_token_count = 0 # Total tokens in summary messages
1016
+
1017
+ def _calculate_next_summary_threshold(self) -> int:
1018
+ r"""Calculate the next token threshold that should trigger
1019
+ summarization.
1020
+
1021
+ The threshold calculation follows a progressive strategy:
1022
+ - First time: token_limit * (summarize_threshold / 100)
1023
+ - Subsequent times: (limit - summary_token) / 2 + summary_token
1024
+
1025
+ This ensures that as summaries accumulate, the threshold adapts
1026
+ to maintain a reasonable balance between context and summaries.
1027
+
1028
+ Returns:
1029
+ int: The token count threshold for next summarization.
1030
+ """
1031
+ token_limit = self.model_backend.token_limit
1032
+ summary_token_count = self._summary_token_count
1033
+
1034
+ # First summarization: use the percentage threshold
1035
+ if summary_token_count == 0:
1036
+ threshold = int(token_limit * self.summarize_threshold / 100)
1037
+ else:
1038
+ # Subsequent summarizations: adaptive threshold
1039
+ threshold = int(
1040
+ (token_limit - summary_token_count)
1041
+ * self.summarize_threshold
1042
+ / 100
1043
+ + summary_token_count
1044
+ )
1045
+
1046
+ return threshold
1047
+
1048
+ def _update_memory_with_summary(
1049
+ self, summary: str, include_summaries: bool = False
1050
+ ) -> None:
1051
+ r"""Update memory with summary result.
1052
+
1053
+ This method handles memory clearing and restoration of summaries based
1054
+ on whether it's a progressive or full compression.
1055
+ """
1056
+
1057
+ summary_content: str = summary
1058
+
1059
+ existing_summaries = []
1060
+ if not include_summaries:
1061
+ messages, _ = self.memory.get_context()
1062
+ for msg in messages:
1063
+ content = msg.get('content', '')
1064
+ if isinstance(content, str) and content.startswith(
1065
+ '[CONTEXT_SUMMARY]'
1066
+ ):
1067
+ existing_summaries.append(msg)
1068
+
1069
+ # Clear memory
1070
+ self.clear_memory()
1071
+
1072
+ # Restore old summaries (for progressive compression)
1073
+ for old_summary in existing_summaries:
1074
+ content = old_summary.get('content', '')
1075
+ if not isinstance(content, str):
1076
+ content = str(content)
1077
+ summary_msg = BaseMessage.make_assistant_message(
1078
+ role_name="assistant", content=content
1079
+ )
1080
+ self.update_memory(summary_msg, OpenAIBackendRole.ASSISTANT)
1081
+
1082
+ # Add new summary
1083
+ new_summary_msg = BaseMessage.make_assistant_message(
1084
+ role_name="assistant", content=summary_content
1085
+ )
1086
+ self.update_memory(new_summary_msg, OpenAIBackendRole.ASSISTANT)
1087
+ input_message = BaseMessage.make_assistant_message(
1088
+ role_name="assistant",
1089
+ content=(
1090
+ "Please continue the conversation from "
1091
+ "where we left it off without asking the user any further "
1092
+ "questions. Continue with the last task that you were "
1093
+ "asked to work on."
1094
+ ),
1095
+ )
1096
+ self.update_memory(input_message, OpenAIBackendRole.ASSISTANT)
1097
+ # Update token count
1098
+ try:
1099
+ summary_tokens = (
1100
+ self.model_backend.token_counter.count_tokens_from_messages(
1101
+ [{"role": "assistant", "content": summary_content}]
1102
+ )
1103
+ )
1104
+
1105
+ if include_summaries: # Full compression - reset count
1106
+ self._summary_token_count = summary_tokens
1107
+ logger.info(
1108
+ f"Full compression: Summary with {summary_tokens} tokens. "
1109
+ f"Total summary tokens reset to: {summary_tokens}"
1110
+ )
1111
+ else: # Progressive compression - accumulate
1112
+ self._summary_token_count += summary_tokens
1113
+ logger.info(
1114
+ f"Progressive compression: New summary "
1115
+ f"with {summary_tokens} tokens. "
1116
+ f"Total summary tokens: "
1117
+ f"{self._summary_token_count}"
1118
+ )
1119
+ except Exception as e:
1120
+ logger.warning(f"Failed to count summary tokens: {e}")
1121
+
765
1122
  def _get_external_tool_names(self) -> Set[str]:
766
1123
  r"""Returns a set of external tool names."""
767
1124
  return set(self._external_tool_schemas.keys())
@@ -823,16 +1180,6 @@ class ChatAgent(BaseAgent):
823
1180
  ) -> None:
824
1181
  r"""Updates the agent memory with a new message.
825
1182
 
826
- If the single *message* exceeds the model's context window, it will
827
- be **automatically split into multiple smaller chunks** before being
828
- written into memory. This prevents later failures in
829
- `ScoreBasedContextCreator` where an over-sized message cannot fit
830
- into the available token budget at all.
831
-
832
- This slicing logic handles both regular text messages (in the
833
- `content` field) and long tool call results (in the `result` field of
834
- a `FunctionCallingMessage`).
835
-
836
1183
  Args:
837
1184
  message (BaseMessage): The new message to add to the stored
838
1185
  messages.
@@ -842,153 +1189,15 @@ class ChatAgent(BaseAgent):
842
1189
  (default: :obj:`None`)
843
1190
  (default: obj:`None`)
844
1191
  """
845
-
846
- # 1. Helper to write a record to memory
847
- def _write_single_record(
848
- message: BaseMessage, role: OpenAIBackendRole, timestamp: float
849
- ):
850
- self.memory.write_record(
851
- MemoryRecord(
852
- message=message,
853
- role_at_backend=role,
854
- timestamp=timestamp,
855
- agent_id=self.agent_id,
856
- )
857
- )
858
-
859
- base_ts = (
860
- timestamp
1192
+ record = MemoryRecord(
1193
+ message=message,
1194
+ role_at_backend=role,
1195
+ timestamp=timestamp
861
1196
  if timestamp is not None
862
- else time.time_ns() / 1_000_000_000
863
- )
864
-
865
- # 2. Get token handling utilities, fallback if unavailable
866
- try:
867
- context_creator = self.memory.get_context_creator()
868
- token_counter = context_creator.token_counter
869
- token_limit = context_creator.token_limit
870
- except AttributeError:
871
- _write_single_record(message, role, base_ts)
872
- return
873
-
874
- # 3. Check if slicing is necessary
875
- try:
876
- current_tokens = token_counter.count_tokens_from_messages(
877
- [message.to_openai_message(role)]
878
- )
879
-
880
- with warnings.catch_warnings():
881
- warnings.filterwarnings("ignore", category=EmptyMemoryWarning)
882
- _, ctx_tokens = self.memory.get_context()
883
-
884
- remaining_budget = max(0, token_limit - ctx_tokens)
885
-
886
- if current_tokens <= remaining_budget:
887
- _write_single_record(message, role, base_ts)
888
- return
889
- except Exception as e:
890
- logger.warning(
891
- f"Token calculation failed before chunking, "
892
- f"writing message as-is. Error: {e}"
893
- )
894
- _write_single_record(message, role, base_ts)
895
- return
896
-
897
- # 4. Perform slicing
898
- logger.warning(
899
- f"Message with {current_tokens} tokens exceeds remaining budget "
900
- f"of {remaining_budget}. Slicing into smaller chunks."
1197
+ else time.time_ns() / 1_000_000_000, # Nanosecond precision
1198
+ agent_id=self.agent_id,
901
1199
  )
902
-
903
- text_to_chunk: Optional[str] = None
904
- is_function_result = False
905
-
906
- if isinstance(message, FunctionCallingMessage) and isinstance(
907
- message.result, str
908
- ):
909
- text_to_chunk = message.result
910
- is_function_result = True
911
- elif isinstance(message.content, str):
912
- text_to_chunk = message.content
913
-
914
- if not text_to_chunk or not text_to_chunk.strip():
915
- _write_single_record(message, role, base_ts)
916
- return
917
- # Encode the entire text to get a list of all token IDs
918
- try:
919
- all_token_ids = token_counter.encode(text_to_chunk)
920
- except Exception as e:
921
- logger.error(f"Failed to encode text for chunking: {e}")
922
- _write_single_record(message, role, base_ts) # Fallback
923
- return
924
-
925
- if not all_token_ids:
926
- _write_single_record(message, role, base_ts) # Nothing to chunk
927
- return
928
-
929
- # 1. Base chunk size: one-tenth of the smaller of (a) total token
930
- # limit and (b) current remaining budget. This prevents us from
931
- # creating chunks that are guaranteed to overflow the
932
- # immediate context window.
933
- base_chunk_size = max(1, remaining_budget) // 10
934
-
935
- # 2. Each chunk gets a textual prefix such as:
936
- # "[chunk 3/12 of a long message]\n"
937
- # The prefix itself consumes tokens, so if we do not subtract its
938
- # length the *total* tokens of the outgoing message (prefix + body)
939
- # can exceed the intended bound. We estimate the prefix length
940
- # with a representative example that is safely long enough for the
941
- # vast majority of cases (three-digit indices).
942
- sample_prefix = "[chunk 1/1000 of a long message]\n"
943
- prefix_token_len = len(token_counter.encode(sample_prefix))
944
-
945
- # 3. The real capacity for the message body is therefore the base
946
- # chunk size minus the prefix length. Fallback to at least one
947
- # token to avoid zero or negative sizes.
948
- chunk_body_limit = max(1, base_chunk_size - prefix_token_len)
949
-
950
- # 4. Calculate how many chunks we will need with this body size.
951
- num_chunks = math.ceil(len(all_token_ids) / chunk_body_limit)
952
- group_id = str(uuid.uuid4())
953
-
954
- for i in range(num_chunks):
955
- start_idx = i * chunk_body_limit
956
- end_idx = start_idx + chunk_body_limit
957
- chunk_token_ids = all_token_ids[start_idx:end_idx]
958
-
959
- chunk_body = token_counter.decode(chunk_token_ids)
960
-
961
- prefix = f"[chunk {i + 1}/{num_chunks} of a long message]\n"
962
- new_body = prefix + chunk_body
963
-
964
- if is_function_result and isinstance(
965
- message, FunctionCallingMessage
966
- ):
967
- new_msg: BaseMessage = FunctionCallingMessage(
968
- role_name=message.role_name,
969
- role_type=message.role_type,
970
- meta_dict=message.meta_dict,
971
- content=message.content,
972
- func_name=message.func_name,
973
- args=message.args,
974
- result=new_body,
975
- tool_call_id=message.tool_call_id,
976
- )
977
- else:
978
- new_msg = message.create_new_instance(new_body)
979
-
980
- meta = (new_msg.meta_dict or {}).copy()
981
- meta.update(
982
- {
983
- "chunk_idx": i + 1,
984
- "chunk_total": num_chunks,
985
- "chunk_group_id": group_id,
986
- }
987
- )
988
- new_msg.meta_dict = meta
989
-
990
- # Increment timestamp slightly to maintain order
991
- _write_single_record(new_msg, role, base_ts + i * 1e-6)
1200
+ self.memory.write_record(record)
992
1201
 
993
1202
  def load_memory(self, memory: AgentMemory) -> None:
994
1203
  r"""Load the provided memory into the agent.
@@ -1042,40 +1251,333 @@ class ChatAgent(BaseAgent):
1042
1251
  f"Skipping invalid record: malformed message "
1043
1252
  f"structure in {record_dict}"
1044
1253
  )
1045
- continue
1254
+ continue
1255
+
1256
+ try:
1257
+ record = MemoryRecord.from_dict(record_dict)
1258
+ self.memory.write_records([record])
1259
+ except Exception as e:
1260
+ logger.warning(
1261
+ f"Error converting record to MemoryRecord: {e}. "
1262
+ f"Record: {record_dict}"
1263
+ )
1264
+ logger.info(f"Memory loaded from {path}")
1265
+
1266
+ def save_memory(self, path: str) -> None:
1267
+ r"""Retrieves the current conversation data from memory and writes it
1268
+ into a JSON file using JsonStorage.
1269
+
1270
+ Args:
1271
+ path (str): Target file path to store JSON data.
1272
+ """
1273
+ json_store = JsonStorage(Path(path))
1274
+ context_records = self.memory.retrieve()
1275
+ to_save = [cr.memory_record.to_dict() for cr in context_records]
1276
+ json_store.save(to_save)
1277
+ logger.info(f"Memory saved to {path}")
1278
+
1279
+ def summarize(
1280
+ self,
1281
+ filename: Optional[str] = None,
1282
+ summary_prompt: Optional[str] = None,
1283
+ response_format: Optional[Type[BaseModel]] = None,
1284
+ working_directory: Optional[Union[str, Path]] = None,
1285
+ include_summaries: bool = False,
1286
+ add_user_messages: bool = True,
1287
+ ) -> Dict[str, Any]:
1288
+ r"""Summarize the agent's current conversation context and persist it
1289
+ to a markdown file.
1290
+
1291
+ .. deprecated:: 0.2.80
1292
+ Use :meth:`asummarize` for async/await support and better
1293
+ performance in parallel summarization workflows.
1294
+
1295
+ Args:
1296
+ filename (Optional[str]): The base filename (without extension) to
1297
+ use for the markdown file. Defaults to a timestamped name when
1298
+ not provided.
1299
+ summary_prompt (Optional[str]): Custom prompt for the summarizer.
1300
+ When omitted, a default prompt highlighting key decisions,
1301
+ action items, and open questions is used.
1302
+ response_format (Optional[Type[BaseModel]]): A Pydantic model
1303
+ defining the expected structure of the response. If provided,
1304
+ the summary will be generated as structured output and included
1305
+ in the result.
1306
+ include_summaries (bool): Whether to include previously generated
1307
+ summaries in the content to be summarized. If False (default),
1308
+ only non-summary messages will be summarized. If True, all
1309
+ messages including previous summaries will be summarized
1310
+ (full compression). (default: :obj:`False`)
1311
+ working_directory (Optional[str|Path]): Optional directory to save
1312
+ the markdown summary file. If provided, overrides the default
1313
+ directory used by ContextUtility.
1314
+ add_user_messages (bool): Whether add user messages to summary.
1315
+ (default: :obj:`True`)
1316
+ Returns:
1317
+ Dict[str, Any]: A dictionary containing the summary text, file
1318
+ path, status message, and optionally structured_summary if
1319
+ response_format was provided.
1320
+
1321
+ See Also:
1322
+ :meth:`asummarize`: Async version for non-blocking LLM calls.
1323
+ """
1324
+
1325
+ warnings.warn(
1326
+ "summarize() is synchronous. Consider using asummarize() "
1327
+ "for async/await support and better performance.",
1328
+ DeprecationWarning,
1329
+ stacklevel=2,
1330
+ )
1331
+
1332
+ result: Dict[str, Any] = {
1333
+ "summary": "",
1334
+ "file_path": None,
1335
+ "status": "",
1336
+ }
1337
+
1338
+ try:
1339
+ # Use external context if set, otherwise create local one
1340
+ if self._context_utility is None:
1341
+ if working_directory is not None:
1342
+ self._context_utility = ContextUtility(
1343
+ working_directory=str(working_directory)
1344
+ )
1345
+ else:
1346
+ self._context_utility = ContextUtility()
1347
+ context_util = self._context_utility
1348
+
1349
+ # Get conversation directly from agent's memory
1350
+ messages, _ = self.memory.get_context()
1351
+
1352
+ if not messages:
1353
+ status_message = (
1354
+ "No conversation context available to summarize."
1355
+ )
1356
+ result["status"] = status_message
1357
+ return result
1358
+
1359
+ # Convert messages to conversation text
1360
+ conversation_lines = []
1361
+ user_messages: List[str] = []
1362
+ for message in messages:
1363
+ role = message.get('role', 'unknown')
1364
+ content = message.get('content', '')
1365
+
1366
+ # Skip summary messages if include_summaries is False
1367
+ if not include_summaries and isinstance(content, str):
1368
+ # Check if this is a summary message by looking for marker
1369
+ if content.startswith('[CONTEXT_SUMMARY]'):
1370
+ continue
1371
+
1372
+ # Handle tool call messages (assistant calling tools)
1373
+ tool_calls = message.get('tool_calls')
1374
+ if tool_calls and isinstance(tool_calls, (list, tuple)):
1375
+ for tool_call in tool_calls:
1376
+ # Handle both dict and object formats
1377
+ if isinstance(tool_call, dict):
1378
+ func_name = tool_call.get('function', {}).get(
1379
+ 'name', 'unknown_tool'
1380
+ )
1381
+ func_args_str = tool_call.get('function', {}).get(
1382
+ 'arguments', '{}'
1383
+ )
1384
+ else:
1385
+ # Handle object format (Pydantic or similar)
1386
+ func_name = getattr(
1387
+ getattr(tool_call, 'function', None),
1388
+ 'name',
1389
+ 'unknown_tool',
1390
+ )
1391
+ func_args_str = getattr(
1392
+ getattr(tool_call, 'function', None),
1393
+ 'arguments',
1394
+ '{}',
1395
+ )
1396
+
1397
+ # Parse and format arguments for readability
1398
+ try:
1399
+ import json
1400
+
1401
+ args_dict = json.loads(func_args_str)
1402
+ args_formatted = ', '.join(
1403
+ f"{k}={v}" for k, v in args_dict.items()
1404
+ )
1405
+ except (json.JSONDecodeError, ValueError, TypeError):
1406
+ args_formatted = func_args_str
1407
+
1408
+ conversation_lines.append(
1409
+ f"[TOOL CALL] {func_name}({args_formatted})"
1410
+ )
1411
+
1412
+ # Handle tool response messages
1413
+ elif role == 'tool':
1414
+ tool_name = message.get('name', 'unknown_tool')
1415
+ if not content:
1416
+ content = str(message.get('content', ''))
1417
+ conversation_lines.append(
1418
+ f"[TOOL RESULT] {tool_name} → {content}"
1419
+ )
1420
+
1421
+ # Handle regular content messages (user/assistant/system)
1422
+ elif content:
1423
+ content = str(content)
1424
+ if role == 'user':
1425
+ user_messages.append(content)
1426
+ conversation_lines.append(f"{role}: {content}")
1427
+
1428
+ conversation_text = "\n".join(conversation_lines).strip()
1429
+
1430
+ if not conversation_text:
1431
+ status_message = (
1432
+ "Conversation context is empty; skipping summary."
1433
+ )
1434
+ result["status"] = status_message
1435
+ return result
1436
+
1437
+ if self._context_summary_agent is None:
1438
+ self._context_summary_agent = ChatAgent(
1439
+ system_message=(
1440
+ "You are a helpful assistant that summarizes "
1441
+ "conversations"
1442
+ ),
1443
+ model=self.model_backend,
1444
+ agent_id=f"{self.agent_id}_context_summarizer",
1445
+ )
1446
+ else:
1447
+ self._context_summary_agent.reset()
1448
+
1449
+ if summary_prompt:
1450
+ prompt_text = (
1451
+ f"{summary_prompt.rstrip()}\n\n"
1452
+ f"AGENT CONVERSATION TO BE SUMMARIZED:\n"
1453
+ f"{conversation_text}"
1454
+ )
1455
+ else:
1456
+ prompt_text = build_default_summary_prompt(conversation_text)
1457
+
1458
+ try:
1459
+ # Use structured output if response_format is provided
1460
+ if response_format:
1461
+ response = self._context_summary_agent.step(
1462
+ prompt_text, response_format=response_format
1463
+ )
1464
+ else:
1465
+ response = self._context_summary_agent.step(prompt_text)
1466
+ except Exception as step_exc:
1467
+ error_message = (
1468
+ f"Failed to generate summary using model: {step_exc}"
1469
+ )
1470
+ logger.error(error_message)
1471
+ result["status"] = error_message
1472
+ return result
1473
+
1474
+ if not response.msgs:
1475
+ status_message = (
1476
+ "Failed to generate summary from model response."
1477
+ )
1478
+ result["status"] = status_message
1479
+ return result
1480
+
1481
+ summary_content = response.msgs[-1].content.strip()
1482
+ if not summary_content:
1483
+ status_message = "Generated summary is empty."
1484
+ result["status"] = status_message
1485
+ return result
1486
+
1487
+ # handle structured output if response_format was provided
1488
+ structured_output = None
1489
+ if response_format and response.msgs[-1].parsed:
1490
+ structured_output = response.msgs[-1].parsed
1491
+
1492
+ # determine filename: use provided filename, or extract from
1493
+ # structured output, or generate timestamp
1494
+ if filename:
1495
+ base_filename = filename
1496
+ elif structured_output and hasattr(
1497
+ structured_output, 'task_title'
1498
+ ):
1499
+ # use task_title from structured output for filename
1500
+ task_title = structured_output.task_title
1501
+ clean_title = ContextUtility.sanitize_workflow_filename(
1502
+ task_title
1503
+ )
1504
+ base_filename = (
1505
+ f"{clean_title}_workflow" if clean_title else "workflow"
1506
+ )
1507
+ else:
1508
+ base_filename = f"context_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}" # noqa: E501
1509
+
1510
+ base_filename = Path(base_filename).with_suffix("").name
1511
+
1512
+ metadata = context_util.get_session_metadata()
1513
+ metadata.update(
1514
+ {
1515
+ "agent_id": self.agent_id,
1516
+ "message_count": len(messages),
1517
+ }
1518
+ )
1519
+
1520
+ # convert structured output to custom markdown if present
1521
+ if structured_output:
1522
+ # convert structured output to custom markdown
1523
+ summary_content = context_util.structured_output_to_markdown(
1524
+ structured_data=structured_output, metadata=metadata
1525
+ )
1526
+ if add_user_messages:
1527
+ summary_content = self._append_user_messages_section(
1528
+ summary_content, user_messages
1529
+ )
1046
1530
 
1047
- try:
1048
- record = MemoryRecord.from_dict(record_dict)
1049
- self.memory.write_records([record])
1050
- except Exception as e:
1051
- logger.warning(
1052
- f"Error converting record to MemoryRecord: {e}. "
1053
- f"Record: {record_dict}"
1054
- )
1055
- logger.info(f"Memory loaded from {path}")
1531
+ # Save the markdown (either custom structured or default)
1532
+ save_status = context_util.save_markdown_file(
1533
+ base_filename,
1534
+ summary_content,
1535
+ title="Conversation Summary"
1536
+ if not structured_output
1537
+ else None,
1538
+ metadata=metadata if not structured_output else None,
1539
+ )
1056
1540
 
1057
- def save_memory(self, path: str) -> None:
1058
- r"""Retrieves the current conversation data from memory and writes it
1059
- into a JSON file using JsonStorage.
1541
+ file_path = (
1542
+ context_util.get_working_directory() / f"{base_filename}.md"
1543
+ )
1544
+ summary_content = (
1545
+ f"[CONTEXT_SUMMARY] The following is a summary of our "
1546
+ f"conversation from a previous session: {summary_content}"
1547
+ )
1548
+ # Prepare result dictionary
1549
+ result_dict = {
1550
+ "summary": summary_content,
1551
+ "file_path": str(file_path),
1552
+ "status": save_status,
1553
+ "structured_summary": structured_output,
1554
+ }
1060
1555
 
1061
- Args:
1062
- path (str): Target file path to store JSON data.
1063
- """
1064
- json_store = JsonStorage(Path(path))
1065
- context_records = self.memory.retrieve()
1066
- to_save = [cr.memory_record.to_dict() for cr in context_records]
1067
- json_store.save(to_save)
1068
- logger.info(f"Memory saved to {path}")
1556
+ result.update(result_dict)
1557
+ logger.info("Conversation summary saved to %s", file_path)
1558
+ return result
1069
1559
 
1070
- def summarize(
1560
+ except Exception as exc:
1561
+ error_message = f"Failed to summarize conversation context: {exc}"
1562
+ logger.error(error_message)
1563
+ result["status"] = error_message
1564
+ return result
1565
+
1566
+ async def asummarize(
1071
1567
  self,
1072
1568
  filename: Optional[str] = None,
1073
1569
  summary_prompt: Optional[str] = None,
1074
1570
  response_format: Optional[Type[BaseModel]] = None,
1075
1571
  working_directory: Optional[Union[str, Path]] = None,
1572
+ include_summaries: bool = False,
1573
+ add_user_messages: bool = True,
1076
1574
  ) -> Dict[str, Any]:
1077
- r"""Summarize the agent's current conversation context and persist it
1078
- to a markdown file.
1575
+ r"""Asynchronously summarize the agent's current conversation context
1576
+ and persist it to a markdown file.
1577
+
1578
+ This is the async version of summarize() that uses astep() for
1579
+ non-blocking LLM calls, enabling parallel summarization of multiple
1580
+ agents.
1079
1581
 
1080
1582
  Args:
1081
1583
  filename (Optional[str]): The base filename (without extension) to
@@ -1091,7 +1593,13 @@ class ChatAgent(BaseAgent):
1091
1593
  working_directory (Optional[str|Path]): Optional directory to save
1092
1594
  the markdown summary file. If provided, overrides the default
1093
1595
  directory used by ContextUtility.
1094
-
1596
+ include_summaries (bool): Whether to include previously generated
1597
+ summaries in the content to be summarized. If False (default),
1598
+ only non-summary messages will be summarized. If True, all
1599
+ messages including previous summaries will be summarized
1600
+ (full compression). (default: :obj:`False`)
1601
+ add_user_messages (bool): Whether add user messages to summary.
1602
+ (default: :obj:`True`)
1095
1603
  Returns:
1096
1604
  Dict[str, Any]: A dictionary containing the summary text, file
1097
1605
  path, status message, and optionally structured_summary if
@@ -1127,10 +1635,17 @@ class ChatAgent(BaseAgent):
1127
1635
 
1128
1636
  # Convert messages to conversation text
1129
1637
  conversation_lines = []
1638
+ user_messages: List[str] = []
1130
1639
  for message in messages:
1131
1640
  role = message.get('role', 'unknown')
1132
1641
  content = message.get('content', '')
1133
1642
 
1643
+ # Skip summary messages if include_summaries is False
1644
+ if not include_summaries and isinstance(content, str):
1645
+ # Check if this is a summary message by looking for marker
1646
+ if content.startswith('[CONTEXT_SUMMARY]'):
1647
+ continue
1648
+
1134
1649
  # Handle tool call messages (assistant calling tools)
1135
1650
  tool_calls = message.get('tool_calls')
1136
1651
  if tool_calls and isinstance(tool_calls, (list, tuple)):
@@ -1182,6 +1697,9 @@ class ChatAgent(BaseAgent):
1182
1697
 
1183
1698
  # Handle regular content messages (user/assistant/system)
1184
1699
  elif content:
1700
+ content = str(content)
1701
+ if role == 'user':
1702
+ user_messages.append(content)
1185
1703
  conversation_lines.append(f"{role}: {content}")
1186
1704
 
1187
1705
  conversation_text = "\n".join(conversation_lines).strip()
@@ -1212,20 +1730,25 @@ class ChatAgent(BaseAgent):
1212
1730
  f"{conversation_text}"
1213
1731
  )
1214
1732
  else:
1215
- prompt_text = (
1216
- "Summarize the context information in concise markdown "
1217
- "bullet points highlighting key decisions, action items.\n"
1218
- f"Context information:\n{conversation_text}"
1219
- )
1733
+ prompt_text = build_default_summary_prompt(conversation_text)
1220
1734
 
1221
1735
  try:
1222
1736
  # Use structured output if response_format is provided
1223
1737
  if response_format:
1224
- response = self._context_summary_agent.step(
1738
+ response = await self._context_summary_agent.astep(
1225
1739
  prompt_text, response_format=response_format
1226
1740
  )
1227
1741
  else:
1228
- response = self._context_summary_agent.step(prompt_text)
1742
+ response = await self._context_summary_agent.astep(
1743
+ prompt_text
1744
+ )
1745
+
1746
+ # Handle streaming response
1747
+ if isinstance(response, AsyncStreamingChatAgentResponse):
1748
+ # Collect final response
1749
+ final_response = await response
1750
+ response = final_response
1751
+
1229
1752
  except Exception as step_exc:
1230
1753
  error_message = (
1231
1754
  f"Failed to generate summary using model: {step_exc}"
@@ -1247,11 +1770,29 @@ class ChatAgent(BaseAgent):
1247
1770
  result["status"] = status_message
1248
1771
  return result
1249
1772
 
1250
- base_filename = (
1251
- filename
1252
- if filename
1253
- else f"context_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}" # noqa: E501
1254
- )
1773
+ # handle structured output if response_format was provided
1774
+ structured_output = None
1775
+ if response_format and response.msgs[-1].parsed:
1776
+ structured_output = response.msgs[-1].parsed
1777
+
1778
+ # determine filename: use provided filename, or extract from
1779
+ # structured output, or generate timestamp
1780
+ if filename:
1781
+ base_filename = filename
1782
+ elif structured_output and hasattr(
1783
+ structured_output, 'task_title'
1784
+ ):
1785
+ # use task_title from structured output for filename
1786
+ task_title = structured_output.task_title
1787
+ clean_title = ContextUtility.sanitize_workflow_filename(
1788
+ task_title
1789
+ )
1790
+ base_filename = (
1791
+ f"{clean_title}_workflow" if clean_title else "workflow"
1792
+ )
1793
+ else:
1794
+ base_filename = f"context_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}" # noqa: E501
1795
+
1255
1796
  base_filename = Path(base_filename).with_suffix("").name
1256
1797
 
1257
1798
  metadata = context_util.get_session_metadata()
@@ -1262,14 +1803,16 @@ class ChatAgent(BaseAgent):
1262
1803
  }
1263
1804
  )
1264
1805
 
1265
- # Handle structured output if response_format was provided
1266
- structured_output = None
1267
- if response_format and response.msgs[-1].parsed:
1268
- structured_output = response.msgs[-1].parsed
1269
- # Convert structured output to custom markdown
1806
+ # convert structured output to custom markdown if present
1807
+ if structured_output:
1808
+ # convert structured output to custom markdown
1270
1809
  summary_content = context_util.structured_output_to_markdown(
1271
1810
  structured_data=structured_output, metadata=metadata
1272
1811
  )
1812
+ if add_user_messages:
1813
+ summary_content = self._append_user_messages_section(
1814
+ summary_content, user_messages
1815
+ )
1273
1816
 
1274
1817
  # Save the markdown (either custom structured or default)
1275
1818
  save_status = context_util.save_markdown_file(
@@ -1285,6 +1828,11 @@ class ChatAgent(BaseAgent):
1285
1828
  context_util.get_working_directory() / f"{base_filename}.md"
1286
1829
  )
1287
1830
 
1831
+ summary_content = (
1832
+ f"[CONTEXT_SUMMARY] The following is a summary of our "
1833
+ f"conversation from a previous session: {summary_content}"
1834
+ )
1835
+
1288
1836
  # Prepare result dictionary
1289
1837
  result_dict = {
1290
1838
  "summary": summary_content,
@@ -1312,7 +1860,14 @@ class ChatAgent(BaseAgent):
1312
1860
  self.memory.clear()
1313
1861
 
1314
1862
  if self.system_message is not None:
1315
- self.update_memory(self.system_message, OpenAIBackendRole.SYSTEM)
1863
+ self.memory.write_record(
1864
+ MemoryRecord(
1865
+ message=self.system_message,
1866
+ role_at_backend=OpenAIBackendRole.SYSTEM,
1867
+ timestamp=time.time_ns() / 1_000_000_000,
1868
+ agent_id=self.agent_id,
1869
+ )
1870
+ )
1316
1871
 
1317
1872
  def _generate_system_message_for_output_language(
1318
1873
  self,
@@ -1337,26 +1892,70 @@ class ChatAgent(BaseAgent):
1337
1892
  content = self._original_system_message.content + language_prompt
1338
1893
  return self._original_system_message.create_new_instance(content)
1339
1894
  else:
1340
- return BaseMessage.make_assistant_message(
1341
- role_name="Assistant",
1342
- content=language_prompt,
1343
- )
1895
+ return BaseMessage.make_system_message(language_prompt)
1344
1896
 
1345
1897
  def init_messages(self) -> None:
1346
1898
  r"""Initializes the stored messages list with the current system
1347
1899
  message.
1348
1900
  """
1349
- self.memory.clear()
1350
- # avoid UserWarning: The `ChatHistoryMemory` is empty.
1351
- if self.system_message is not None:
1352
- self.memory.write_record(
1353
- MemoryRecord(
1354
- message=self.system_message,
1355
- role_at_backend=OpenAIBackendRole.SYSTEM,
1356
- timestamp=time.time_ns() / 1_000_000_000,
1357
- agent_id=self.agent_id,
1358
- )
1359
- )
1901
+ self._reset_summary_state()
1902
+ self.clear_memory()
1903
+
1904
+ def update_system_message(
1905
+ self,
1906
+ system_message: Union[BaseMessage, str],
1907
+ reset_memory: bool = True,
1908
+ ) -> None:
1909
+ r"""Update the system message.
1910
+ It will reset conversation with new system message.
1911
+
1912
+ Args:
1913
+ system_message (Union[BaseMessage, str]): The new system message.
1914
+ Can be either a BaseMessage object or a string.
1915
+ If a string is provided, it will be converted
1916
+ into a BaseMessage object.
1917
+ reset_memory (bool):
1918
+ Whether to reinitialize conversation messages after updating
1919
+ the system message. Defaults to True.
1920
+ """
1921
+ if system_message is None:
1922
+ raise ValueError("system_message is required and cannot be None. ")
1923
+ self._original_system_message = (
1924
+ BaseMessage.make_system_message(system_message)
1925
+ if isinstance(system_message, str)
1926
+ else system_message
1927
+ )
1928
+ self._system_message = (
1929
+ self._generate_system_message_for_output_language()
1930
+ )
1931
+ if reset_memory:
1932
+ self.init_messages()
1933
+
1934
+ def append_to_system_message(
1935
+ self, content: str, reset_memory: bool = True
1936
+ ) -> None:
1937
+ """Append additional context to existing system message.
1938
+
1939
+ Args:
1940
+ content (str): The additional system message.
1941
+ reset_memory (bool):
1942
+ Whether to reinitialize conversation messages after appending
1943
+ additional context. Defaults to True.
1944
+ """
1945
+ original_content = (
1946
+ self._original_system_message.content
1947
+ if self._original_system_message
1948
+ else ""
1949
+ )
1950
+ new_system_message = original_content + '\n' + content
1951
+ self._original_system_message = BaseMessage.make_system_message(
1952
+ new_system_message
1953
+ )
1954
+ self._system_message = (
1955
+ self._generate_system_message_for_output_language()
1956
+ )
1957
+ if reset_memory:
1958
+ self.init_messages()
1360
1959
 
1361
1960
  def reset_to_original_system_message(self) -> None:
1362
1961
  r"""Reset system message to original, removing any appended context.
@@ -1780,22 +2379,122 @@ class ChatAgent(BaseAgent):
1780
2379
 
1781
2380
  try:
1782
2381
  openai_messages, num_tokens = self.memory.get_context()
2382
+ if self.summarize_threshold is not None:
2383
+ threshold = self._calculate_next_summary_threshold()
2384
+ summary_token_count = self._summary_token_count
2385
+ token_limit = self.model_backend.token_limit
2386
+
2387
+ if num_tokens <= token_limit:
2388
+ if (
2389
+ summary_token_count
2390
+ > token_limit * self.summary_window_ratio
2391
+ ):
2392
+ logger.info(
2393
+ f"Summary tokens ({summary_token_count}) "
2394
+ f"exceed limit, full compression."
2395
+ )
2396
+ # Summarize everything (including summaries)
2397
+ summary = self.summarize(include_summaries=True)
2398
+ self._update_memory_with_summary(
2399
+ summary.get("summary", ""),
2400
+ include_summaries=True,
2401
+ )
2402
+ elif num_tokens > threshold:
2403
+ logger.info(
2404
+ f"Token count ({num_tokens}) exceed threshold "
2405
+ f"({threshold}). Triggering summarization."
2406
+ )
2407
+ # Only summarize non-summary content
2408
+ summary = self.summarize(include_summaries=False)
2409
+ self._update_memory_with_summary(
2410
+ summary.get("summary", ""),
2411
+ include_summaries=False,
2412
+ )
1783
2413
  accumulated_context_tokens += num_tokens
1784
2414
  except RuntimeError as e:
1785
2415
  return self._step_terminate(
1786
2416
  e.args[1], tool_call_records, "max_tokens_exceeded"
1787
2417
  )
1788
- # Get response from model backend
1789
- response = self._get_model_response(
1790
- openai_messages,
1791
- num_tokens=num_tokens,
1792
- current_iteration=iteration_count,
1793
- response_format=response_format,
1794
- tool_schemas=[]
1795
- if disable_tools
1796
- else self._get_full_tool_schemas(),
1797
- prev_num_openai_messages=prev_num_openai_messages,
1798
- )
2418
+ # Get response from model backend with token limit error handling
2419
+ try:
2420
+ response = self._get_model_response(
2421
+ openai_messages,
2422
+ num_tokens=num_tokens,
2423
+ current_iteration=iteration_count,
2424
+ response_format=response_format,
2425
+ tool_schemas=[]
2426
+ if disable_tools
2427
+ else self._get_full_tool_schemas(),
2428
+ prev_num_openai_messages=prev_num_openai_messages,
2429
+ )
2430
+ except Exception as exc:
2431
+ logger.exception("Model error: %s", exc)
2432
+
2433
+ if self._is_token_limit_error(exc):
2434
+ tool_signature = self._last_tool_call_signature
2435
+ if (
2436
+ tool_signature is not None
2437
+ and tool_signature
2438
+ == self._last_token_limit_tool_signature
2439
+ ):
2440
+ description = self._describe_tool_call(
2441
+ self._last_tool_call_record
2442
+ )
2443
+ repeated_msg = (
2444
+ "Context exceeded again by the same tool call."
2445
+ )
2446
+ if description:
2447
+ repeated_msg += f" {description}"
2448
+ raise RuntimeError(repeated_msg) from exc
2449
+
2450
+ user_message_count = sum(
2451
+ 1
2452
+ for msg in openai_messages
2453
+ if getattr(msg, "role", None) == "user"
2454
+ )
2455
+ if (
2456
+ user_message_count == 1
2457
+ and getattr(openai_messages[-1], "role", None)
2458
+ == "user"
2459
+ ):
2460
+ raise RuntimeError(
2461
+ "The provided user input alone exceeds the "
2462
+ "context window. Please shorten the input."
2463
+ ) from exc
2464
+
2465
+ logger.warning(
2466
+ "Token limit exceeded error detected. "
2467
+ "Summarizing context."
2468
+ )
2469
+
2470
+ recent_records: List[ContextRecord]
2471
+ try:
2472
+ recent_records = self.memory.retrieve()
2473
+ except Exception: # pragma: no cover - defensive guard
2474
+ recent_records = []
2475
+
2476
+ indices_to_remove = (
2477
+ self._find_indices_to_remove_for_last_tool_pair(
2478
+ recent_records
2479
+ )
2480
+ )
2481
+ self.memory.remove_records_by_indices(indices_to_remove)
2482
+
2483
+ summary = self.summarize(include_summaries=False)
2484
+ tool_notice = self._format_tool_limit_notice()
2485
+ summary_messages = summary.get("summary", "")
2486
+
2487
+ if tool_notice:
2488
+ summary_messages += "\n\n" + tool_notice
2489
+
2490
+ self._update_memory_with_summary(
2491
+ summary_messages, include_summaries=False
2492
+ )
2493
+ self._last_token_limit_tool_signature = tool_signature
2494
+ return self._step_impl(input_message, response_format)
2495
+
2496
+ raise
2497
+
1799
2498
  prev_num_openai_messages = len(openai_messages)
1800
2499
  iteration_count += 1
1801
2500
 
@@ -1990,6 +2689,7 @@ class ChatAgent(BaseAgent):
1990
2689
  step_token_usage = self._create_token_usage_tracker()
1991
2690
  iteration_count: int = 0
1992
2691
  prev_num_openai_messages: int = 0
2692
+
1993
2693
  while True:
1994
2694
  if self.pause_event is not None and not self.pause_event.is_set():
1995
2695
  if isinstance(self.pause_event, asyncio.Event):
@@ -2000,21 +2700,128 @@ class ChatAgent(BaseAgent):
2000
2700
  await loop.run_in_executor(None, self.pause_event.wait)
2001
2701
  try:
2002
2702
  openai_messages, num_tokens = self.memory.get_context()
2703
+ if self.summarize_threshold is not None:
2704
+ threshold = self._calculate_next_summary_threshold()
2705
+ summary_token_count = self._summary_token_count
2706
+ token_limit = self.model_backend.token_limit
2707
+
2708
+ if num_tokens <= token_limit:
2709
+ if (
2710
+ summary_token_count
2711
+ > token_limit * self.summary_window_ratio
2712
+ ):
2713
+ logger.info(
2714
+ f"Summary tokens ({summary_token_count}) "
2715
+ f"exceed limit, full compression."
2716
+ )
2717
+ # Summarize everything (including summaries)
2718
+ summary = await self.asummarize(
2719
+ include_summaries=True
2720
+ )
2721
+ self._update_memory_with_summary(
2722
+ summary.get("summary", ""),
2723
+ include_summaries=True,
2724
+ )
2725
+ elif num_tokens > threshold:
2726
+ logger.info(
2727
+ f"Token count ({num_tokens}) exceed threshold "
2728
+ "({threshold}). Triggering summarization."
2729
+ )
2730
+ # Only summarize non-summary content
2731
+ summary = await self.asummarize(
2732
+ include_summaries=False
2733
+ )
2734
+ self._update_memory_with_summary(
2735
+ summary.get("summary", ""),
2736
+ include_summaries=False,
2737
+ )
2003
2738
  accumulated_context_tokens += num_tokens
2004
2739
  except RuntimeError as e:
2005
2740
  return self._step_terminate(
2006
2741
  e.args[1], tool_call_records, "max_tokens_exceeded"
2007
2742
  )
2008
- response = await self._aget_model_response(
2009
- openai_messages,
2010
- num_tokens=num_tokens,
2011
- current_iteration=iteration_count,
2012
- response_format=response_format,
2013
- tool_schemas=[]
2014
- if disable_tools
2015
- else self._get_full_tool_schemas(),
2016
- prev_num_openai_messages=prev_num_openai_messages,
2017
- )
2743
+ # Get response from model backend with token limit error handling
2744
+ try:
2745
+ response = await self._aget_model_response(
2746
+ openai_messages,
2747
+ num_tokens=num_tokens,
2748
+ current_iteration=iteration_count,
2749
+ response_format=response_format,
2750
+ tool_schemas=[]
2751
+ if disable_tools
2752
+ else self._get_full_tool_schemas(),
2753
+ prev_num_openai_messages=prev_num_openai_messages,
2754
+ )
2755
+ except Exception as exc:
2756
+ logger.exception("Model error: %s", exc)
2757
+
2758
+ if self._is_token_limit_error(exc):
2759
+ tool_signature = self._last_tool_call_signature
2760
+ if (
2761
+ tool_signature is not None
2762
+ and tool_signature
2763
+ == self._last_token_limit_tool_signature
2764
+ ):
2765
+ description = self._describe_tool_call(
2766
+ self._last_tool_call_record
2767
+ )
2768
+ repeated_msg = (
2769
+ "Context exceeded again by the same tool call."
2770
+ )
2771
+ if description:
2772
+ repeated_msg += f" {description}"
2773
+ raise RuntimeError(repeated_msg) from exc
2774
+
2775
+ user_message_count = sum(
2776
+ 1
2777
+ for msg in openai_messages
2778
+ if getattr(msg, "role", None) == "user"
2779
+ )
2780
+ if (
2781
+ user_message_count == 1
2782
+ and getattr(openai_messages[-1], "role", None)
2783
+ == "user"
2784
+ ):
2785
+ raise RuntimeError(
2786
+ "The provided user input alone exceeds the"
2787
+ "context window. Please shorten the input."
2788
+ ) from exc
2789
+
2790
+ logger.warning(
2791
+ "Token limit exceeded error detected. "
2792
+ "Summarizing context."
2793
+ )
2794
+
2795
+ recent_records: List[ContextRecord]
2796
+ try:
2797
+ recent_records = self.memory.retrieve()
2798
+ except Exception: # pragma: no cover - defensive guard
2799
+ recent_records = []
2800
+
2801
+ indices_to_remove = (
2802
+ self._find_indices_to_remove_for_last_tool_pair(
2803
+ recent_records
2804
+ )
2805
+ )
2806
+ self.memory.remove_records_by_indices(indices_to_remove)
2807
+
2808
+ summary = await self.asummarize()
2809
+
2810
+ tool_notice = self._format_tool_limit_notice()
2811
+ summary_messages = summary.get("summary", "")
2812
+
2813
+ if tool_notice:
2814
+ summary_messages += "\n\n" + tool_notice
2815
+ self._update_memory_with_summary(
2816
+ summary_messages, include_summaries=False
2817
+ )
2818
+ self._last_token_limit_tool_signature = tool_signature
2819
+ return await self._astep_non_streaming_task(
2820
+ input_message, response_format
2821
+ )
2822
+
2823
+ raise
2824
+
2018
2825
  prev_num_openai_messages = len(openai_messages)
2019
2826
  iteration_count += 1
2020
2827
 
@@ -2091,6 +2898,8 @@ class ChatAgent(BaseAgent):
2091
2898
  if self.prune_tool_calls_from_memory and tool_call_records:
2092
2899
  self.memory.clean_tool_calls()
2093
2900
 
2901
+ self._last_token_limit_user_signature = None
2902
+
2094
2903
  return self._convert_to_chatagent_response(
2095
2904
  response,
2096
2905
  tool_call_records,
@@ -2187,6 +2996,8 @@ class ChatAgent(BaseAgent):
2187
2996
  if response:
2188
2997
  break
2189
2998
  except RateLimitError as e:
2999
+ if self._is_token_limit_error(e):
3000
+ raise
2190
3001
  last_error = e
2191
3002
  if attempt < self.retry_attempts - 1:
2192
3003
  delay = min(self.retry_delay * (2**attempt), 60.0)
@@ -2204,7 +3015,6 @@ class ChatAgent(BaseAgent):
2204
3015
  except Exception:
2205
3016
  logger.error(
2206
3017
  f"Model error: {self.model_backend.model_type}",
2207
- exc_info=True,
2208
3018
  )
2209
3019
  raise
2210
3020
  else:
@@ -2251,6 +3061,8 @@ class ChatAgent(BaseAgent):
2251
3061
  if response:
2252
3062
  break
2253
3063
  except RateLimitError as e:
3064
+ if self._is_token_limit_error(e):
3065
+ raise
2254
3066
  last_error = e
2255
3067
  if attempt < self.retry_attempts - 1:
2256
3068
  delay = min(self.retry_delay * (2**attempt), 60.0)
@@ -2758,6 +3570,7 @@ class ChatAgent(BaseAgent):
2758
3570
  tool_call_id=tool_call_id,
2759
3571
  )
2760
3572
 
3573
+ self._update_last_tool_call_state(tool_record)
2761
3574
  return tool_record
2762
3575
 
2763
3576
  def _stream(
@@ -3319,12 +4132,14 @@ class ChatAgent(BaseAgent):
3319
4132
  timestamp=base_timestamp + 1e-6,
3320
4133
  )
3321
4134
 
3322
- return ToolCallingRecord(
4135
+ tool_record = ToolCallingRecord(
3323
4136
  tool_name=function_name,
3324
4137
  args=args,
3325
4138
  result=result,
3326
4139
  tool_call_id=tool_call_id,
3327
4140
  )
4141
+ self._update_last_tool_call_state(tool_record)
4142
+ return tool_record
3328
4143
 
3329
4144
  except Exception as e:
3330
4145
  error_msg = (
@@ -3346,12 +4161,14 @@ class ChatAgent(BaseAgent):
3346
4161
 
3347
4162
  self.update_memory(func_msg, OpenAIBackendRole.FUNCTION)
3348
4163
 
3349
- return ToolCallingRecord(
4164
+ tool_record = ToolCallingRecord(
3350
4165
  tool_name=function_name,
3351
4166
  args=args,
3352
4167
  result=result,
3353
4168
  tool_call_id=tool_call_id,
3354
4169
  )
4170
+ self._update_last_tool_call_state(tool_record)
4171
+ return tool_record
3355
4172
  else:
3356
4173
  logger.warning(
3357
4174
  f"Tool '{function_name}' not found in internal tools"
@@ -3373,6 +4190,23 @@ class ChatAgent(BaseAgent):
3373
4190
  tool_call_id = tool_call_data['id']
3374
4191
 
3375
4192
  if function_name in self._internal_tools:
4193
+ # Create the tool call message
4194
+ assist_msg = FunctionCallingMessage(
4195
+ role_name=self.role_name,
4196
+ role_type=self.role_type,
4197
+ meta_dict=None,
4198
+ content="",
4199
+ func_name=function_name,
4200
+ args=args,
4201
+ tool_call_id=tool_call_id,
4202
+ )
4203
+ assist_ts = time.time_ns() / 1_000_000_000
4204
+ self.update_memory(
4205
+ assist_msg,
4206
+ OpenAIBackendRole.ASSISTANT,
4207
+ timestamp=assist_ts,
4208
+ )
4209
+
3376
4210
  tool = self._internal_tools[function_name]
3377
4211
  try:
3378
4212
  # Try different invocation paths in order of preference
@@ -3401,19 +4235,8 @@ class ChatAgent(BaseAgent):
3401
4235
  else:
3402
4236
  # Fallback: synchronous call
3403
4237
  result = tool(**args)
3404
- # First, create and record the assistant message with tool
3405
- # call
3406
- assist_msg = FunctionCallingMessage(
3407
- role_name=self.role_name,
3408
- role_type=self.role_type,
3409
- meta_dict=None,
3410
- content="",
3411
- func_name=function_name,
3412
- args=args,
3413
- tool_call_id=tool_call_id,
3414
- )
3415
4238
 
3416
- # Then create the tool response message
4239
+ # Create the tool response message
3417
4240
  func_msg = FunctionCallingMessage(
3418
4241
  role_name=self.role_name,
3419
4242
  role_type=self.role_type,
@@ -3423,31 +4246,21 @@ class ChatAgent(BaseAgent):
3423
4246
  result=result,
3424
4247
  tool_call_id=tool_call_id,
3425
4248
  )
3426
-
3427
- # Record both messages with precise timestamps to ensure
3428
- # correct ordering
3429
- current_time_ns = time.time_ns()
3430
- base_timestamp = (
3431
- current_time_ns / 1_000_000_000
3432
- ) # Convert to seconds
3433
-
3434
- self.update_memory(
3435
- assist_msg,
3436
- OpenAIBackendRole.ASSISTANT,
3437
- timestamp=base_timestamp,
3438
- )
4249
+ func_ts = time.time_ns() / 1_000_000_000
3439
4250
  self.update_memory(
3440
4251
  func_msg,
3441
4252
  OpenAIBackendRole.FUNCTION,
3442
- timestamp=base_timestamp + 1e-6,
4253
+ timestamp=func_ts,
3443
4254
  )
3444
4255
 
3445
- return ToolCallingRecord(
4256
+ tool_record = ToolCallingRecord(
3446
4257
  tool_name=function_name,
3447
4258
  args=args,
3448
4259
  result=result,
3449
4260
  tool_call_id=tool_call_id,
3450
4261
  )
4262
+ self._update_last_tool_call_state(tool_record)
4263
+ return tool_record
3451
4264
 
3452
4265
  except Exception as e:
3453
4266
  error_msg = (
@@ -3466,15 +4279,21 @@ class ChatAgent(BaseAgent):
3466
4279
  result=result,
3467
4280
  tool_call_id=tool_call_id,
3468
4281
  )
4282
+ func_ts = time.time_ns() / 1_000_000_000
4283
+ self.update_memory(
4284
+ func_msg,
4285
+ OpenAIBackendRole.FUNCTION,
4286
+ timestamp=func_ts,
4287
+ )
3469
4288
 
3470
- self.update_memory(func_msg, OpenAIBackendRole.FUNCTION)
3471
-
3472
- return ToolCallingRecord(
4289
+ tool_record = ToolCallingRecord(
3473
4290
  tool_name=function_name,
3474
4291
  args=args,
3475
4292
  result=result,
3476
4293
  tool_call_id=tool_call_id,
3477
4294
  )
4295
+ self._update_last_tool_call_state(tool_record)
4296
+ return tool_record
3478
4297
  else:
3479
4298
  logger.warning(
3480
4299
  f"Tool '{function_name}' not found in internal tools"
@@ -4176,23 +4995,29 @@ class ChatAgent(BaseAgent):
4176
4995
  # Toolkit doesn't support cloning, use original
4177
4996
  cloned_toolkits[toolkit_id] = toolkit_instance
4178
4997
 
4179
- if getattr(
4180
- tool.func, "__message_integration_enhanced__", False
4181
- ):
4182
- cloned_tools.append(
4183
- FunctionTool(
4184
- func=tool.func,
4185
- openai_tool_schema=tool.get_openai_tool_schema(),
4186
- )
4187
- )
4188
- continue
4189
-
4190
4998
  # Get the method from the cloned (or original) toolkit
4191
4999
  toolkit = cloned_toolkits[toolkit_id]
4192
5000
  method_name = tool.func.__name__
4193
5001
 
5002
+ # Check if toolkit was actually cloned or just reused
5003
+ toolkit_was_cloned = toolkit is not toolkit_instance
5004
+
4194
5005
  if hasattr(toolkit, method_name):
4195
5006
  new_method = getattr(toolkit, method_name)
5007
+
5008
+ # If toolkit wasn't cloned (stateless), preserve the
5009
+ # original function to maintain any enhancements/wrappers
5010
+ if not toolkit_was_cloned:
5011
+ # Toolkit is stateless, safe to reuse original function
5012
+ cloned_tools.append(
5013
+ FunctionTool(
5014
+ func=tool.func,
5015
+ openai_tool_schema=tool.get_openai_tool_schema(),
5016
+ )
5017
+ )
5018
+ continue
5019
+
5020
+ # Toolkit was cloned, use the new method
4196
5021
  # Wrap cloned method into a new FunctionTool,
4197
5022
  # preserving schema
4198
5023
  try: