camel-ai 0.2.73a4__py3-none-any.whl → 0.2.80a2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. camel/__init__.py +1 -1
  2. camel/agents/_utils.py +38 -0
  3. camel/agents/chat_agent.py +2217 -519
  4. camel/agents/mcp_agent.py +30 -27
  5. camel/configs/__init__.py +15 -0
  6. camel/configs/aihubmix_config.py +88 -0
  7. camel/configs/amd_config.py +70 -0
  8. camel/configs/cometapi_config.py +104 -0
  9. camel/configs/minimax_config.py +93 -0
  10. camel/configs/nebius_config.py +103 -0
  11. camel/data_collectors/alpaca_collector.py +15 -6
  12. camel/datasets/base_generator.py +39 -10
  13. camel/environments/single_step.py +28 -3
  14. camel/environments/tic_tac_toe.py +1 -1
  15. camel/interpreters/__init__.py +2 -0
  16. camel/interpreters/docker/Dockerfile +3 -12
  17. camel/interpreters/e2b_interpreter.py +34 -1
  18. camel/interpreters/microsandbox_interpreter.py +395 -0
  19. camel/loaders/__init__.py +11 -2
  20. camel/loaders/chunkr_reader.py +9 -0
  21. camel/memories/agent_memories.py +48 -4
  22. camel/memories/base.py +26 -0
  23. camel/memories/blocks/chat_history_block.py +122 -4
  24. camel/memories/context_creators/score_based.py +25 -384
  25. camel/memories/records.py +88 -8
  26. camel/messages/base.py +153 -34
  27. camel/models/__init__.py +10 -0
  28. camel/models/aihubmix_model.py +83 -0
  29. camel/models/aiml_model.py +1 -16
  30. camel/models/amd_model.py +101 -0
  31. camel/models/anthropic_model.py +6 -19
  32. camel/models/aws_bedrock_model.py +2 -33
  33. camel/models/azure_openai_model.py +114 -89
  34. camel/models/base_audio_model.py +3 -1
  35. camel/models/base_model.py +32 -14
  36. camel/models/cohere_model.py +1 -16
  37. camel/models/cometapi_model.py +83 -0
  38. camel/models/crynux_model.py +1 -16
  39. camel/models/deepseek_model.py +1 -16
  40. camel/models/fish_audio_model.py +6 -0
  41. camel/models/gemini_model.py +36 -18
  42. camel/models/groq_model.py +1 -17
  43. camel/models/internlm_model.py +1 -16
  44. camel/models/litellm_model.py +1 -16
  45. camel/models/lmstudio_model.py +1 -17
  46. camel/models/minimax_model.py +83 -0
  47. camel/models/mistral_model.py +1 -16
  48. camel/models/model_factory.py +27 -1
  49. camel/models/modelscope_model.py +1 -16
  50. camel/models/moonshot_model.py +105 -24
  51. camel/models/nebius_model.py +83 -0
  52. camel/models/nemotron_model.py +0 -5
  53. camel/models/netmind_model.py +1 -16
  54. camel/models/novita_model.py +1 -16
  55. camel/models/nvidia_model.py +1 -16
  56. camel/models/ollama_model.py +4 -19
  57. camel/models/openai_compatible_model.py +62 -41
  58. camel/models/openai_model.py +62 -57
  59. camel/models/openrouter_model.py +1 -17
  60. camel/models/ppio_model.py +1 -16
  61. camel/models/qianfan_model.py +1 -16
  62. camel/models/qwen_model.py +1 -16
  63. camel/models/reka_model.py +1 -16
  64. camel/models/samba_model.py +34 -47
  65. camel/models/sglang_model.py +64 -31
  66. camel/models/siliconflow_model.py +1 -16
  67. camel/models/stub_model.py +0 -4
  68. camel/models/togetherai_model.py +1 -16
  69. camel/models/vllm_model.py +1 -16
  70. camel/models/volcano_model.py +0 -17
  71. camel/models/watsonx_model.py +1 -16
  72. camel/models/yi_model.py +1 -16
  73. camel/models/zhipuai_model.py +60 -16
  74. camel/parsers/__init__.py +18 -0
  75. camel/parsers/mcp_tool_call_parser.py +176 -0
  76. camel/retrievers/auto_retriever.py +1 -0
  77. camel/runtimes/daytona_runtime.py +11 -12
  78. camel/societies/__init__.py +2 -0
  79. camel/societies/workforce/__init__.py +2 -0
  80. camel/societies/workforce/events.py +122 -0
  81. camel/societies/workforce/prompts.py +146 -66
  82. camel/societies/workforce/role_playing_worker.py +15 -11
  83. camel/societies/workforce/single_agent_worker.py +302 -65
  84. camel/societies/workforce/structured_output_handler.py +30 -18
  85. camel/societies/workforce/task_channel.py +163 -27
  86. camel/societies/workforce/utils.py +107 -13
  87. camel/societies/workforce/workflow_memory_manager.py +772 -0
  88. camel/societies/workforce/workforce.py +1949 -579
  89. camel/societies/workforce/workforce_callback.py +74 -0
  90. camel/societies/workforce/workforce_logger.py +168 -145
  91. camel/societies/workforce/workforce_metrics.py +33 -0
  92. camel/storages/key_value_storages/json.py +15 -2
  93. camel/storages/key_value_storages/mem0_cloud.py +48 -47
  94. camel/storages/object_storages/google_cloud.py +1 -1
  95. camel/storages/vectordb_storages/oceanbase.py +13 -13
  96. camel/storages/vectordb_storages/qdrant.py +3 -3
  97. camel/storages/vectordb_storages/tidb.py +8 -6
  98. camel/tasks/task.py +4 -3
  99. camel/toolkits/__init__.py +20 -7
  100. camel/toolkits/aci_toolkit.py +45 -0
  101. camel/toolkits/base.py +6 -4
  102. camel/toolkits/code_execution.py +28 -1
  103. camel/toolkits/context_summarizer_toolkit.py +684 -0
  104. camel/toolkits/dappier_toolkit.py +5 -1
  105. camel/toolkits/dingtalk.py +1135 -0
  106. camel/toolkits/edgeone_pages_mcp_toolkit.py +11 -31
  107. camel/toolkits/excel_toolkit.py +1 -1
  108. camel/toolkits/{file_write_toolkit.py → file_toolkit.py} +430 -36
  109. camel/toolkits/function_tool.py +13 -3
  110. camel/toolkits/github_toolkit.py +104 -17
  111. camel/toolkits/gmail_toolkit.py +1839 -0
  112. camel/toolkits/google_calendar_toolkit.py +38 -4
  113. camel/toolkits/google_drive_mcp_toolkit.py +12 -31
  114. camel/toolkits/hybrid_browser_toolkit/config_loader.py +15 -0
  115. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +77 -8
  116. camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +884 -88
  117. camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
  118. camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +5 -612
  119. camel/toolkits/hybrid_browser_toolkit/ts/package.json +0 -1
  120. camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +959 -89
  121. camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +9 -2
  122. camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +281 -213
  123. camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
  124. camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
  125. camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
  126. camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +23 -3
  127. camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +72 -7
  128. camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +582 -132
  129. camel/toolkits/hybrid_browser_toolkit_py/actions.py +158 -0
  130. camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +55 -8
  131. camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +43 -0
  132. camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +321 -8
  133. camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +10 -4
  134. camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +45 -4
  135. camel/toolkits/{openai_image_toolkit.py → image_generation_toolkit.py} +151 -53
  136. camel/toolkits/klavis_toolkit.py +5 -1
  137. camel/toolkits/markitdown_toolkit.py +27 -1
  138. camel/toolkits/math_toolkit.py +64 -10
  139. camel/toolkits/mcp_toolkit.py +366 -71
  140. camel/toolkits/memory_toolkit.py +5 -1
  141. camel/toolkits/message_integration.py +18 -13
  142. camel/toolkits/minimax_mcp_toolkit.py +195 -0
  143. camel/toolkits/note_taking_toolkit.py +19 -10
  144. camel/toolkits/notion_mcp_toolkit.py +16 -26
  145. camel/toolkits/openbb_toolkit.py +5 -1
  146. camel/toolkits/origene_mcp_toolkit.py +8 -49
  147. camel/toolkits/playwright_mcp_toolkit.py +12 -31
  148. camel/toolkits/resend_toolkit.py +168 -0
  149. camel/toolkits/search_toolkit.py +264 -91
  150. camel/toolkits/slack_toolkit.py +64 -10
  151. camel/toolkits/terminal_toolkit/__init__.py +18 -0
  152. camel/toolkits/terminal_toolkit/terminal_toolkit.py +957 -0
  153. camel/toolkits/terminal_toolkit/utils.py +532 -0
  154. camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
  155. camel/toolkits/video_analysis_toolkit.py +17 -11
  156. camel/toolkits/wechat_official_toolkit.py +483 -0
  157. camel/toolkits/zapier_toolkit.py +5 -1
  158. camel/types/__init__.py +2 -2
  159. camel/types/enums.py +274 -7
  160. camel/types/openai_types.py +2 -2
  161. camel/types/unified_model_type.py +15 -0
  162. camel/utils/commons.py +36 -5
  163. camel/utils/constants.py +3 -0
  164. camel/utils/context_utils.py +1003 -0
  165. camel/utils/mcp.py +138 -4
  166. camel/utils/token_counting.py +43 -20
  167. {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/METADATA +223 -83
  168. {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/RECORD +170 -141
  169. camel/loaders/pandas_reader.py +0 -368
  170. camel/toolkits/openai_agent_toolkit.py +0 -135
  171. camel/toolkits/terminal_toolkit.py +0 -1550
  172. {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/WHEEL +0 -0
  173. {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/licenses/LICENSE +0 -0
@@ -14,13 +14,23 @@
14
14
  from __future__ import annotations
15
15
 
16
16
  import asyncio
17
+ import atexit
18
+ import base64
19
+ import concurrent.futures
20
+ import hashlib
21
+ import inspect
17
22
  import json
18
- import logging
19
- import queue
23
+ import os
24
+ import random
25
+ import re
26
+ import tempfile
20
27
  import textwrap
21
28
  import threading
22
29
  import time
23
30
  import uuid
31
+ import warnings
32
+ from dataclasses import dataclass
33
+ from datetime import datetime
24
34
  from pathlib import Path
25
35
  from typing import (
26
36
  TYPE_CHECKING,
@@ -40,12 +50,14 @@ from typing import (
40
50
 
41
51
  from openai import (
42
52
  AsyncStream,
53
+ RateLimitError,
43
54
  Stream,
44
55
  )
45
56
  from pydantic import BaseModel, ValidationError
46
57
 
47
58
  from camel.agents._types import ModelResponse, ToolCallRequest
48
59
  from camel.agents._utils import (
60
+ build_default_summary_prompt,
49
61
  convert_to_function_tool,
50
62
  convert_to_schema,
51
63
  get_info_dict,
@@ -57,6 +69,7 @@ from camel.logger import get_logger
57
69
  from camel.memories import (
58
70
  AgentMemory,
59
71
  ChatHistoryMemory,
72
+ ContextRecord,
60
73
  MemoryRecord,
61
74
  ScoreBasedContextCreator,
62
75
  )
@@ -85,20 +98,46 @@ from camel.types import (
85
98
  )
86
99
  from camel.types.agents import ToolCallingRecord
87
100
  from camel.utils import (
101
+ Constants,
88
102
  get_model_encoding,
89
103
  model_from_json_schema,
90
104
  )
91
105
  from camel.utils.commons import dependencies_required
106
+ from camel.utils.context_utils import ContextUtility
107
+
108
+ TOKEN_LIMIT_ERROR_MARKERS = (
109
+ "context_length_exceeded",
110
+ "prompt is too long",
111
+ "exceeded your current quota",
112
+ "tokens must be reduced",
113
+ "context length",
114
+ "token count",
115
+ "context limit",
116
+ )
92
117
 
93
118
  if TYPE_CHECKING:
94
119
  from camel.terminators import ResponseTerminator
95
120
 
96
121
  logger = get_logger(__name__)
97
122
 
123
+ # Cleanup temp files on exit
124
+ _temp_files: Set[str] = set()
125
+ _temp_files_lock = threading.Lock()
126
+
127
+
128
+ def _cleanup_temp_files():
129
+ with _temp_files_lock:
130
+ for path in _temp_files:
131
+ try:
132
+ os.unlink(path)
133
+ except Exception:
134
+ pass
135
+
136
+
137
+ atexit.register(_cleanup_temp_files)
138
+
98
139
  # AgentOps decorator setting
99
140
  try:
100
- import os
101
-
102
141
  if os.getenv("AGENTOPS_API_KEY") is not None:
103
142
  from agentops import track_agent
104
143
  else:
@@ -132,13 +171,23 @@ SIMPLE_FORMAT_PROMPT = TextPrompt(
132
171
  )
133
172
 
134
173
 
174
+ @dataclass
175
+ class _ToolOutputHistoryEntry:
176
+ tool_name: str
177
+ tool_call_id: str
178
+ result_text: str
179
+ record_uuids: List[str]
180
+ record_timestamps: List[float]
181
+ cached: bool = False
182
+
183
+
135
184
  class StreamContentAccumulator:
136
185
  r"""Manages content accumulation across streaming responses to ensure
137
186
  all responses contain complete cumulative content."""
138
187
 
139
188
  def __init__(self):
140
189
  self.base_content = "" # Content before tool calls
141
- self.current_content = "" # Current streaming content
190
+ self.current_content = [] # Accumulated streaming fragments
142
191
  self.tool_status_messages = [] # Accumulated tool status messages
143
192
 
144
193
  def set_base_content(self, content: str):
@@ -147,7 +196,7 @@ class StreamContentAccumulator:
147
196
 
148
197
  def add_streaming_content(self, new_content: str):
149
198
  r"""Add new streaming content."""
150
- self.current_content += new_content
199
+ self.current_content.append(new_content)
151
200
 
152
201
  def add_tool_status(self, status_message: str):
153
202
  r"""Add a tool status message."""
@@ -156,16 +205,18 @@ class StreamContentAccumulator:
156
205
  def get_full_content(self) -> str:
157
206
  r"""Get the complete accumulated content."""
158
207
  tool_messages = "".join(self.tool_status_messages)
159
- return self.base_content + tool_messages + self.current_content
208
+ current = "".join(self.current_content)
209
+ return self.base_content + tool_messages + current
160
210
 
161
211
  def get_content_with_new_status(self, status_message: str) -> str:
162
212
  r"""Get content with a new status message appended."""
163
213
  tool_messages = "".join([*self.tool_status_messages, status_message])
164
- return self.base_content + tool_messages + self.current_content
214
+ current = "".join(self.current_content)
215
+ return self.base_content + tool_messages + current
165
216
 
166
217
  def reset_streaming_content(self):
167
218
  r"""Reset only the streaming content, keep base and tool status."""
168
- self.current_content = ""
219
+ self.current_content = []
169
220
 
170
221
 
171
222
  class StreamingChatAgentResponse:
@@ -186,13 +237,10 @@ class StreamingChatAgentResponse:
186
237
  def _ensure_latest_response(self):
187
238
  r"""Ensure we have the latest response by consuming the generator."""
188
239
  if not self._consumed:
189
- try:
190
- for response in self._generator:
191
- self._responses.append(response)
192
- self._current_response = response
193
- self._consumed = True
194
- except StopIteration:
195
- self._consumed = True
240
+ for response in self._generator:
241
+ self._responses.append(response)
242
+ self._current_response = response
243
+ self._consumed = True
196
244
 
197
245
  @property
198
246
  def msgs(self) -> List[BaseMessage]:
@@ -230,17 +278,14 @@ class StreamingChatAgentResponse:
230
278
  r"""Make this object iterable."""
231
279
  if self._consumed:
232
280
  # If already consumed, iterate over stored responses
233
- return iter(self._responses)
281
+ yield from self._responses
234
282
  else:
235
283
  # If not consumed, consume and yield
236
- try:
237
- for response in self._generator:
238
- self._responses.append(response)
239
- self._current_response = response
240
- yield response
241
- self._consumed = True
242
- except StopIteration:
243
- self._consumed = True
284
+ for response in self._generator:
285
+ self._responses.append(response)
286
+ self._current_response = response
287
+ yield response
288
+ self._consumed = True
244
289
 
245
290
  def __getattr__(self, name):
246
291
  r"""Forward any other attribute access to the latest response."""
@@ -271,13 +316,10 @@ class AsyncStreamingChatAgentResponse:
271
316
  async def _ensure_latest_response(self):
272
317
  r"""Ensure the latest response by consuming the async generator."""
273
318
  if not self._consumed:
274
- try:
275
- async for response in self._async_generator:
276
- self._responses.append(response)
277
- self._current_response = response
278
- self._consumed = True
279
- except StopAsyncIteration:
280
- self._consumed = True
319
+ async for response in self._async_generator:
320
+ self._responses.append(response)
321
+ self._current_response = response
322
+ self._consumed = True
281
323
 
282
324
  async def _get_final_response(self) -> ChatAgentResponse:
283
325
  r"""Get the final response after consuming the entire stream."""
@@ -303,14 +345,11 @@ class AsyncStreamingChatAgentResponse:
303
345
  else:
304
346
  # If not consumed, consume and yield
305
347
  async def _consume_and_yield():
306
- try:
307
- async for response in self._async_generator:
308
- self._responses.append(response)
309
- self._current_response = response
310
- yield response
311
- self._consumed = True
312
- except StopAsyncIteration:
313
- self._consumed = True
348
+ async for response in self._async_generator:
349
+ self._responses.append(response)
350
+ self._current_response = response
351
+ yield response
352
+ self._consumed = True
314
353
 
315
354
  return _consume_and_yield()
316
355
 
@@ -338,9 +377,9 @@ class ChatAgent(BaseAgent):
338
377
  message_window_size (int, optional): The maximum number of previous
339
378
  messages to include in the context window. If `None`, no windowing
340
379
  is performed. (default: :obj:`None`)
341
- token_limit (int, optional): The maximum number of tokens in a context.
342
- The context will be automatically pruned to fulfill the limitation.
343
- If `None`, it will be set according to the backend model.
380
+ summarize_threshold (int, optional): The percentage of the context
381
+ window that triggers summarization. If `None`, will trigger
382
+ summarization when the context window is full.
344
383
  (default: :obj:`None`)
345
384
  output_language (str, optional): The language to be output by the
346
385
  agent. (default: :obj:`None`)
@@ -378,14 +417,35 @@ class ChatAgent(BaseAgent):
378
417
  for individual tool execution. If None, wait indefinitely.
379
418
  mask_tool_output (Optional[bool]): Whether to return a sanitized
380
419
  placeholder instead of the raw tool output. (default: :obj:`False`)
381
- pause_event (Optional[asyncio.Event]): Event to signal pause of the
382
- agent's operation. When clear, the agent will pause its execution.
383
- (default: :obj:`None`)
420
+ pause_event (Optional[Union[threading.Event, asyncio.Event]]): Event to
421
+ signal pause of the agent's operation. When clear, the agent will
422
+ pause its execution. Use threading.Event for sync operations or
423
+ asyncio.Event for async operations. (default: :obj:`None`)
384
424
  prune_tool_calls_from_memory (bool): Whether to clean tool
385
425
  call messages from memory after response generation to save token
386
426
  usage. When enabled, removes FUNCTION/TOOL role messages and
387
427
  ASSISTANT messages with tool_calls after each step.
388
428
  (default: :obj:`False`)
429
+ enable_snapshot_clean (bool, optional): Whether to clean snapshot
430
+ markers and references from historical tool outputs in memory.
431
+ This removes verbose DOM markers (like [ref=...]) from older tool
432
+ results while keeping the latest output intact for immediate use.
433
+ (default: :obj:`False`)
434
+ retry_attempts (int, optional): Maximum number of retry attempts for
435
+ rate limit errors. (default: :obj:`3`)
436
+ retry_delay (float, optional): Initial delay in seconds between
437
+ retries. Uses exponential backoff. (default: :obj:`1.0`)
438
+ step_timeout (Optional[float], optional): Timeout in seconds for the
439
+ entire step operation. If None, no timeout is applied.
440
+ (default: :obj:`None`)
441
+ stream_accumulate (bool, optional): When True, partial streaming
442
+ updates return accumulated content (current behavior). When False,
443
+ partial updates return only the incremental delta. (default:
444
+ :obj:`True`)
445
+ summary_window_ratio (float, optional): Maximum fraction of the total
446
+ context window that can be occupied by summary information. Used
447
+ to limit how much of the model's context is reserved for
448
+ summarization results. (default: :obj:`0.6`)
389
449
  """
390
450
 
391
451
  def __init__(
@@ -408,6 +468,7 @@ class ChatAgent(BaseAgent):
408
468
  ] = None,
409
469
  memory: Optional[AgentMemory] = None,
410
470
  message_window_size: Optional[int] = None,
471
+ summarize_threshold: Optional[int] = 50,
411
472
  token_limit: Optional[int] = None,
412
473
  output_language: Optional[str] = None,
413
474
  tools: Optional[List[Union[FunctionTool, Callable]]] = None,
@@ -422,10 +483,16 @@ class ChatAgent(BaseAgent):
422
483
  max_iteration: Optional[int] = None,
423
484
  agent_id: Optional[str] = None,
424
485
  stop_event: Optional[threading.Event] = None,
425
- tool_execution_timeout: Optional[float] = None,
486
+ tool_execution_timeout: Optional[float] = Constants.TIMEOUT_THRESHOLD,
426
487
  mask_tool_output: bool = False,
427
- pause_event: Optional[asyncio.Event] = None,
488
+ pause_event: Optional[Union[threading.Event, asyncio.Event]] = None,
428
489
  prune_tool_calls_from_memory: bool = False,
490
+ enable_snapshot_clean: bool = False,
491
+ retry_attempts: int = 3,
492
+ retry_delay: float = 1.0,
493
+ step_timeout: Optional[float] = Constants.TIMEOUT_THRESHOLD,
494
+ stream_accumulate: bool = True,
495
+ summary_window_ratio: float = 0.6,
429
496
  ) -> None:
430
497
  if isinstance(model, ModelManager):
431
498
  self.model_backend = model
@@ -441,10 +508,13 @@ class ChatAgent(BaseAgent):
441
508
  # Assign unique ID
442
509
  self.agent_id = agent_id if agent_id else str(uuid.uuid4())
443
510
 
511
+ self._enable_snapshot_clean = enable_snapshot_clean
512
+ self._tool_output_history: List[_ToolOutputHistoryEntry] = []
513
+
444
514
  # Set up memory
445
515
  context_creator = ScoreBasedContextCreator(
446
516
  self.model_backend.token_counter,
447
- token_limit or self.model_backend.token_limit,
517
+ self.model_backend.token_limit,
448
518
  )
449
519
 
450
520
  self._memory: AgentMemory = memory or ChatHistoryMemory(
@@ -459,9 +529,7 @@ class ChatAgent(BaseAgent):
459
529
 
460
530
  # Set up system message and initialize messages
461
531
  self._original_system_message = (
462
- BaseMessage.make_assistant_message(
463
- role_name="Assistant", content=system_message
464
- )
532
+ BaseMessage.make_system_message(system_message)
465
533
  if isinstance(system_message, str)
466
534
  else system_message
467
535
  )
@@ -471,6 +539,21 @@ class ChatAgent(BaseAgent):
471
539
  )
472
540
  self.init_messages()
473
541
 
542
+ # Set up summarize threshold with validation
543
+ if summarize_threshold is not None:
544
+ if not (0 < summarize_threshold <= 100):
545
+ raise ValueError(
546
+ f"summarize_threshold must be between 0 and 100, "
547
+ f"got {summarize_threshold}"
548
+ )
549
+ logger.info(
550
+ f"Automatic context compression is enabled. Will trigger "
551
+ f"summarization when context window exceeds "
552
+ f"{summarize_threshold}% of the total token limit."
553
+ )
554
+ self.summarize_threshold = summarize_threshold
555
+ self._reset_summary_state()
556
+
474
557
  # Set up role name and role type
475
558
  self.role_name: str = (
476
559
  getattr(self.system_message, "role_name", None) or "assistant"
@@ -509,13 +592,25 @@ class ChatAgent(BaseAgent):
509
592
  self.tool_execution_timeout = tool_execution_timeout
510
593
  self.mask_tool_output = mask_tool_output
511
594
  self._secure_result_store: Dict[str, Any] = {}
595
+ self._secure_result_store_lock = threading.Lock()
512
596
  self.pause_event = pause_event
513
597
  self.prune_tool_calls_from_memory = prune_tool_calls_from_memory
598
+ self.retry_attempts = max(1, retry_attempts)
599
+ self.retry_delay = max(0.0, retry_delay)
600
+ self.step_timeout = step_timeout
601
+ self._context_utility: Optional[ContextUtility] = None
602
+ self._context_summary_agent: Optional["ChatAgent"] = None
603
+ self.stream_accumulate = stream_accumulate
604
+ self._last_tool_call_record: Optional[ToolCallingRecord] = None
605
+ self._last_tool_call_signature: Optional[str] = None
606
+ self._last_token_limit_tool_signature: Optional[str] = None
607
+ self.summary_window_ratio = summary_window_ratio
514
608
 
515
609
  def reset(self):
516
610
  r"""Resets the :obj:`ChatAgent` to its initial state."""
517
611
  self.terminated = False
518
612
  self.init_messages()
613
+ self._reset_summary_state()
519
614
  for terminator in self.response_terminators:
520
615
  terminator.reset()
521
616
 
@@ -699,6 +794,20 @@ class ChatAgent(BaseAgent):
699
794
  # Ensure the new memory has the system message
700
795
  self.init_messages()
701
796
 
797
+ def set_context_utility(
798
+ self, context_utility: Optional[ContextUtility]
799
+ ) -> None:
800
+ r"""Set the context utility for the agent.
801
+
802
+ This allows external components (like SingleAgentWorker) to provide
803
+ a shared context utility instance for workflow management.
804
+
805
+ Args:
806
+ context_utility (ContextUtility, optional): The context utility
807
+ to use. If None, the agent will create its own when needed.
808
+ """
809
+ self._context_utility = context_utility
810
+
702
811
  def _get_full_tool_schemas(self) -> List[Dict[str, Any]]:
703
812
  r"""Returns a list of tool schemas of all tools, including internal
704
813
  and external tools.
@@ -708,6 +817,329 @@ class ChatAgent(BaseAgent):
708
817
  for func_tool in self._internal_tools.values()
709
818
  ]
710
819
 
820
+ @staticmethod
821
+ def _is_token_limit_error(error: Exception) -> bool:
822
+ r"""Return True when the exception message indicates a token limit."""
823
+ error_message = str(error).lower()
824
+ return any(
825
+ marker in error_message for marker in TOKEN_LIMIT_ERROR_MARKERS
826
+ )
827
+
828
+ @staticmethod
829
+ def _is_tool_related_record(record: MemoryRecord) -> bool:
830
+ r"""Determine whether the given memory record
831
+ belongs to a tool call."""
832
+ if record.role_at_backend in {
833
+ OpenAIBackendRole.TOOL,
834
+ OpenAIBackendRole.FUNCTION,
835
+ }:
836
+ return True
837
+
838
+ if (
839
+ record.role_at_backend == OpenAIBackendRole.ASSISTANT
840
+ and isinstance(record.message, FunctionCallingMessage)
841
+ ):
842
+ return True
843
+
844
+ return False
845
+
846
+ def _find_indices_to_remove_for_last_tool_pair(
847
+ self, recent_records: List[ContextRecord]
848
+ ) -> List[int]:
849
+ """Find indices of records that should be removed to clean up the most
850
+ recent incomplete tool interaction pair.
851
+
852
+ This method identifies tool call/result pairs by tool_call_id and
853
+ returns the exact indices to remove, allowing non-contiguous deletions.
854
+
855
+ Logic:
856
+ - If the last record is a tool result (TOOL/FUNCTION) with a
857
+ tool_call_id, find the matching assistant call anywhere in history
858
+ and return both indices.
859
+ - If the last record is an assistant tool call without a result yet,
860
+ return just that index.
861
+ - For normal messages (non tool-related): remove just the last one.
862
+ - Fallback: If no tool_call_id is available, use heuristic (last 2 if
863
+ tool-related, otherwise last 1).
864
+
865
+ Returns:
866
+ List[int]: Indices to remove (may be non-contiguous).
867
+ """
868
+ if not recent_records:
869
+ return []
870
+
871
+ last_idx = len(recent_records) - 1
872
+ last_record = recent_records[last_idx].memory_record
873
+
874
+ # Case A: Last is an ASSISTANT tool call with no result yet
875
+ if (
876
+ last_record.role_at_backend == OpenAIBackendRole.ASSISTANT
877
+ and isinstance(last_record.message, FunctionCallingMessage)
878
+ and last_record.message.result is None
879
+ ):
880
+ return [last_idx]
881
+
882
+ # Case B: Last is TOOL/FUNCTION result, try id-based pairing
883
+ if last_record.role_at_backend in {
884
+ OpenAIBackendRole.TOOL,
885
+ OpenAIBackendRole.FUNCTION,
886
+ }:
887
+ tool_id = None
888
+ if isinstance(last_record.message, FunctionCallingMessage):
889
+ tool_id = last_record.message.tool_call_id
890
+
891
+ if tool_id:
892
+ for idx in range(len(recent_records) - 2, -1, -1):
893
+ rec = recent_records[idx].memory_record
894
+ if rec.role_at_backend != OpenAIBackendRole.ASSISTANT:
895
+ continue
896
+
897
+ # Check if this assistant message contains the tool_call_id
898
+ matched = False
899
+
900
+ # Case 1: FunctionCallingMessage (single tool call)
901
+ if isinstance(rec.message, FunctionCallingMessage):
902
+ if rec.message.tool_call_id == tool_id:
903
+ matched = True
904
+
905
+ # Case 2: BaseMessage with multiple tool_calls in meta_dict
906
+ elif (
907
+ hasattr(rec.message, "meta_dict")
908
+ and rec.message.meta_dict
909
+ ):
910
+ tool_calls_list = rec.message.meta_dict.get(
911
+ "tool_calls", []
912
+ )
913
+ if isinstance(tool_calls_list, list):
914
+ for tc in tool_calls_list:
915
+ if (
916
+ isinstance(tc, dict)
917
+ and tc.get("id") == tool_id
918
+ ):
919
+ matched = True
920
+ break
921
+
922
+ if matched:
923
+ # Return both assistant call and tool result indices
924
+ return [idx, last_idx]
925
+
926
+ # Fallback: no tool_call_id, use heuristic
927
+ if self._is_tool_related_record(last_record):
928
+ # Remove last 2 (assume they are paired)
929
+ return [last_idx - 1, last_idx] if last_idx > 0 else [last_idx]
930
+ else:
931
+ return [last_idx]
932
+
933
+ # Default: non tool-related tail => remove last one
934
+ return [last_idx]
935
+
936
+ @staticmethod
937
+ def _serialize_tool_args(args: Dict[str, Any]) -> str:
938
+ try:
939
+ return json.dumps(args, ensure_ascii=False, sort_keys=True)
940
+ except TypeError:
941
+ return str(args)
942
+
943
+ @classmethod
944
+ def _build_tool_signature(
945
+ cls, func_name: str, args: Dict[str, Any]
946
+ ) -> str:
947
+ args_repr = cls._serialize_tool_args(args)
948
+ return f"{func_name}:{args_repr}"
949
+
950
+ def _describe_tool_call(
951
+ self, record: Optional[ToolCallingRecord]
952
+ ) -> Optional[str]:
953
+ if record is None:
954
+ return None
955
+ args_repr = self._serialize_tool_args(record.args)
956
+ return f"Tool `{record.tool_name}` invoked with arguments {args_repr}."
957
+
958
+ def _update_last_tool_call_state(
959
+ self, record: Optional[ToolCallingRecord]
960
+ ) -> None:
961
+ """Track the most recent tool call and its identifying signature."""
962
+ self._last_tool_call_record = record
963
+ if record is None:
964
+ self._last_tool_call_signature = None
965
+ return
966
+
967
+ args = (
968
+ record.args
969
+ if isinstance(record.args, dict)
970
+ else {"_raw": record.args}
971
+ )
972
+ try:
973
+ signature = self._build_tool_signature(record.tool_name, args)
974
+ except Exception: # pragma: no cover - defensive guard
975
+ signature = None
976
+ self._last_tool_call_signature = signature
977
+
978
+ def _format_tool_limit_notice(self) -> Optional[str]:
979
+ record = self._last_tool_call_record
980
+ description = self._describe_tool_call(record)
981
+ if description is None:
982
+ return None
983
+ notice_lines = [
984
+ "[Tool Call Causing Token Limit]",
985
+ description,
986
+ ]
987
+
988
+ if record is not None:
989
+ result = record.result
990
+ if isinstance(result, bytes):
991
+ result_repr = result.decode(errors="replace")
992
+ elif isinstance(result, str):
993
+ result_repr = result
994
+ else:
995
+ try:
996
+ result_repr = json.dumps(
997
+ result, ensure_ascii=False, sort_keys=True
998
+ )
999
+ except (TypeError, ValueError):
1000
+ result_repr = str(result)
1001
+
1002
+ result_length = len(result_repr)
1003
+ notice_lines.append(f"Tool result length: {result_length}")
1004
+ if self.model_backend.token_limit != 999999999:
1005
+ notice_lines.append(
1006
+ f"Token limit: {self.model_backend.token_limit}"
1007
+ )
1008
+
1009
+ return "\n".join(notice_lines)
1010
+
1011
+ @staticmethod
1012
+ def _append_user_messages_section(
1013
+ summary_content: str, user_messages: List[str]
1014
+ ) -> str:
1015
+ section_title = "- **All User Messages**:"
1016
+ sanitized_messages: List[str] = []
1017
+ for msg in user_messages:
1018
+ if not isinstance(msg, str):
1019
+ msg = str(msg)
1020
+ cleaned = " ".join(msg.strip().splitlines())
1021
+ if cleaned:
1022
+ sanitized_messages.append(cleaned)
1023
+
1024
+ bullet_block = (
1025
+ "\n".join(f"- {m}" for m in sanitized_messages)
1026
+ if sanitized_messages
1027
+ else "- None noted"
1028
+ )
1029
+ user_section = f"{section_title}\n{bullet_block}"
1030
+
1031
+ summary_clean = summary_content.rstrip()
1032
+ separator = "\n\n" if summary_clean else ""
1033
+ return f"{summary_clean}{separator}{user_section}"
1034
+
1035
+ def _reset_summary_state(self) -> None:
1036
+ self._summary_token_count = 0 # Total tokens in summary messages
1037
+
1038
+ def _calculate_next_summary_threshold(self) -> int:
1039
+ r"""Calculate the next token threshold that should trigger
1040
+ summarization.
1041
+
1042
+ The threshold calculation follows a progressive strategy:
1043
+ - First time: token_limit * (summarize_threshold / 100)
1044
+ - Subsequent times: (limit - summary_token) / 2 + summary_token
1045
+
1046
+ This ensures that as summaries accumulate, the threshold adapts
1047
+ to maintain a reasonable balance between context and summaries.
1048
+
1049
+ Returns:
1050
+ int: The token count threshold for next summarization.
1051
+ """
1052
+ token_limit = self.model_backend.token_limit
1053
+ summary_token_count = self._summary_token_count
1054
+
1055
+ # First summarization: use the percentage threshold
1056
+ if summary_token_count == 0:
1057
+ threshold = int(token_limit * self.summarize_threshold / 100)
1058
+ else:
1059
+ # Subsequent summarizations: adaptive threshold
1060
+ threshold = int(
1061
+ (token_limit - summary_token_count)
1062
+ * self.summarize_threshold
1063
+ / 100
1064
+ + summary_token_count
1065
+ )
1066
+
1067
+ return threshold
1068
+
1069
+ def _update_memory_with_summary(
1070
+ self, summary: str, include_summaries: bool = False
1071
+ ) -> None:
1072
+ r"""Update memory with summary result.
1073
+
1074
+ This method handles memory clearing and restoration of summaries based
1075
+ on whether it's a progressive or full compression.
1076
+ """
1077
+
1078
+ summary_content: str = summary
1079
+
1080
+ existing_summaries = []
1081
+ if not include_summaries:
1082
+ messages, _ = self.memory.get_context()
1083
+ for msg in messages:
1084
+ content = msg.get('content', '')
1085
+ if isinstance(content, str) and content.startswith(
1086
+ '[CONTEXT_SUMMARY]'
1087
+ ):
1088
+ existing_summaries.append(msg)
1089
+
1090
+ # Clear memory
1091
+ self.clear_memory()
1092
+
1093
+ # Restore old summaries (for progressive compression)
1094
+ for old_summary in existing_summaries:
1095
+ content = old_summary.get('content', '')
1096
+ if not isinstance(content, str):
1097
+ content = str(content)
1098
+ summary_msg = BaseMessage.make_assistant_message(
1099
+ role_name="assistant", content=content
1100
+ )
1101
+ self.update_memory(summary_msg, OpenAIBackendRole.ASSISTANT)
1102
+
1103
+ # Add new summary
1104
+ new_summary_msg = BaseMessage.make_assistant_message(
1105
+ role_name="assistant", content=summary_content
1106
+ )
1107
+ self.update_memory(new_summary_msg, OpenAIBackendRole.ASSISTANT)
1108
+ input_message = BaseMessage.make_assistant_message(
1109
+ role_name="assistant",
1110
+ content=(
1111
+ "Please continue the conversation from "
1112
+ "where we left it off without asking the user any further "
1113
+ "questions. Continue with the last task that you were "
1114
+ "asked to work on."
1115
+ ),
1116
+ )
1117
+ self.update_memory(input_message, OpenAIBackendRole.ASSISTANT)
1118
+ # Update token count
1119
+ try:
1120
+ summary_tokens = (
1121
+ self.model_backend.token_counter.count_tokens_from_messages(
1122
+ [{"role": "assistant", "content": summary_content}]
1123
+ )
1124
+ )
1125
+
1126
+ if include_summaries: # Full compression - reset count
1127
+ self._summary_token_count = summary_tokens
1128
+ logger.info(
1129
+ f"Full compression: Summary with {summary_tokens} tokens. "
1130
+ f"Total summary tokens reset to: {summary_tokens}"
1131
+ )
1132
+ else: # Progressive compression - accumulate
1133
+ self._summary_token_count += summary_tokens
1134
+ logger.info(
1135
+ f"Progressive compression: New summary "
1136
+ f"with {summary_tokens} tokens. "
1137
+ f"Total summary tokens: "
1138
+ f"{self._summary_token_count}"
1139
+ )
1140
+ except Exception as e:
1141
+ logger.warning(f"Failed to count summary tokens: {e}")
1142
+
711
1143
  def _get_external_tool_names(self) -> Set[str]:
712
1144
  r"""Returns a set of external tool names."""
713
1145
  return set(self._external_tool_schemas.keys())
@@ -722,6 +1154,282 @@ class ChatAgent(BaseAgent):
722
1154
  for tool in tools:
723
1155
  self.add_tool(tool)
724
1156
 
1157
+ def _serialize_tool_result(self, result: Any) -> str:
1158
+ if isinstance(result, str):
1159
+ return result
1160
+ try:
1161
+ return json.dumps(result, ensure_ascii=False)
1162
+ except (TypeError, ValueError):
1163
+ return str(result)
1164
+
1165
+ def _clean_snapshot_line(self, line: str) -> str:
1166
+ r"""Clean a single snapshot line by removing prefixes and references.
1167
+
1168
+ This method handles snapshot lines in the format:
1169
+ - [prefix] "quoted text" [attributes] [ref=...]: description
1170
+
1171
+ It preserves:
1172
+ - Quoted text content (including brackets inside quotes)
1173
+ - Description text after the colon
1174
+
1175
+ It removes:
1176
+ - Line prefixes (e.g., "- button", "- tooltip", "generic:")
1177
+ - Attribute markers (e.g., [disabled], [ref=e47])
1178
+ - Lines with only element types
1179
+ - All indentation
1180
+
1181
+ Args:
1182
+ line: The original line content.
1183
+
1184
+ Returns:
1185
+ The cleaned line content, or empty string if line should be
1186
+ removed.
1187
+ """
1188
+ original = line.strip()
1189
+ if not original:
1190
+ return ''
1191
+
1192
+ # Check if line is just an element type marker
1193
+ # (e.g., "- generic:", "button:")
1194
+ if re.match(r'^(?:-\s+)?\w+\s*:?\s*$', original):
1195
+ return ''
1196
+
1197
+ # Remove element type prefix
1198
+ line = re.sub(r'^(?:-\s+)?\w+[\s:]+', '', original)
1199
+
1200
+ # Remove bracket markers while preserving quoted text
1201
+ quoted_parts = []
1202
+
1203
+ def save_quoted(match):
1204
+ quoted_parts.append(match.group(0))
1205
+ return f'__QUOTED_{len(quoted_parts)-1}__'
1206
+
1207
+ line = re.sub(r'"[^"]*"', save_quoted, line)
1208
+ line = re.sub(r'\s*\[[^\]]+\]\s*', ' ', line)
1209
+
1210
+ for i, quoted in enumerate(quoted_parts):
1211
+ line = line.replace(f'__QUOTED_{i}__', quoted)
1212
+
1213
+ # Clean up formatting
1214
+ line = re.sub(r'\s+', ' ', line).strip()
1215
+ line = re.sub(r'\s*:\s*', ': ', line)
1216
+ line = line.lstrip(': ').strip()
1217
+
1218
+ return '' if not line else line
1219
+
1220
+ def _clean_snapshot_content(self, content: str) -> str:
1221
+ r"""Clean snapshot content by removing prefixes, references, and
1222
+ deduplicating lines.
1223
+
1224
+ This method identifies snapshot lines (containing element keywords or
1225
+ references) and cleans them while preserving non-snapshot content.
1226
+ It also handles JSON-formatted tool outputs with snapshot fields.
1227
+
1228
+ Args:
1229
+ content: The original snapshot content.
1230
+
1231
+ Returns:
1232
+ The cleaned content with deduplicated lines.
1233
+ """
1234
+ try:
1235
+ import json
1236
+
1237
+ data = json.loads(content)
1238
+ modified = False
1239
+
1240
+ def clean_json_value(obj):
1241
+ nonlocal modified
1242
+ if isinstance(obj, dict):
1243
+ result = {}
1244
+ for key, value in obj.items():
1245
+ if key == 'snapshot' and isinstance(value, str):
1246
+ try:
1247
+ decoded_value = value.encode().decode(
1248
+ 'unicode_escape'
1249
+ )
1250
+ except (UnicodeDecodeError, AttributeError):
1251
+ decoded_value = value
1252
+
1253
+ needs_cleaning = (
1254
+ '- ' in decoded_value
1255
+ or '[ref=' in decoded_value
1256
+ or any(
1257
+ elem + ':' in decoded_value
1258
+ for elem in [
1259
+ 'generic',
1260
+ 'img',
1261
+ 'banner',
1262
+ 'list',
1263
+ 'listitem',
1264
+ 'search',
1265
+ 'navigation',
1266
+ ]
1267
+ )
1268
+ )
1269
+
1270
+ if needs_cleaning:
1271
+ cleaned_snapshot = self._clean_text_snapshot(
1272
+ decoded_value
1273
+ )
1274
+ result[key] = cleaned_snapshot
1275
+ modified = True
1276
+ else:
1277
+ result[key] = value
1278
+ else:
1279
+ result[key] = clean_json_value(value)
1280
+ return result
1281
+ elif isinstance(obj, list):
1282
+ return [clean_json_value(item) for item in obj]
1283
+ else:
1284
+ return obj
1285
+
1286
+ cleaned_data = clean_json_value(data)
1287
+
1288
+ if modified:
1289
+ return json.dumps(cleaned_data, ensure_ascii=False, indent=4)
1290
+ else:
1291
+ return content
1292
+
1293
+ except (json.JSONDecodeError, TypeError):
1294
+ return self._clean_text_snapshot(content)
1295
+
1296
+ def _clean_text_snapshot(self, content: str) -> str:
1297
+ r"""Clean plain text snapshot content.
1298
+
1299
+ This method:
1300
+ - Removes all indentation
1301
+ - Deletes empty lines
1302
+ - Deduplicates all lines
1303
+ - Cleans snapshot-specific markers
1304
+
1305
+ Args:
1306
+ content: The original snapshot text.
1307
+
1308
+ Returns:
1309
+ The cleaned content with deduplicated lines, no indentation,
1310
+ and no empty lines.
1311
+ """
1312
+ lines = content.split('\n')
1313
+ cleaned_lines = []
1314
+ seen = set()
1315
+
1316
+ for line in lines:
1317
+ stripped_line = line.strip()
1318
+
1319
+ if not stripped_line:
1320
+ continue
1321
+
1322
+ # Skip metadata lines (like "- /url:", "- /ref:")
1323
+ if re.match(r'^-?\s*/\w+\s*:', stripped_line):
1324
+ continue
1325
+
1326
+ is_snapshot_line = '[ref=' in stripped_line or re.match(
1327
+ r'^(?:-\s+)?\w+(?:[\s:]|$)', stripped_line
1328
+ )
1329
+
1330
+ if is_snapshot_line:
1331
+ cleaned = self._clean_snapshot_line(stripped_line)
1332
+ if cleaned and cleaned not in seen:
1333
+ cleaned_lines.append(cleaned)
1334
+ seen.add(cleaned)
1335
+ else:
1336
+ if stripped_line not in seen:
1337
+ cleaned_lines.append(stripped_line)
1338
+ seen.add(stripped_line)
1339
+
1340
+ return '\n'.join(cleaned_lines)
1341
+
1342
+ def _register_tool_output_for_cache(
1343
+ self,
1344
+ func_name: str,
1345
+ tool_call_id: str,
1346
+ result_text: str,
1347
+ records: List[MemoryRecord],
1348
+ ) -> None:
1349
+ if not records:
1350
+ return
1351
+
1352
+ entry = _ToolOutputHistoryEntry(
1353
+ tool_name=func_name,
1354
+ tool_call_id=tool_call_id,
1355
+ result_text=result_text,
1356
+ record_uuids=[str(record.uuid) for record in records],
1357
+ record_timestamps=[record.timestamp for record in records],
1358
+ )
1359
+ self._tool_output_history.append(entry)
1360
+ self._process_tool_output_cache()
1361
+
1362
+ def _process_tool_output_cache(self) -> None:
1363
+ if not self._enable_snapshot_clean or not self._tool_output_history:
1364
+ return
1365
+
1366
+ # Only clean older results; keep the latest expanded for immediate use.
1367
+ for entry in self._tool_output_history[:-1]:
1368
+ if entry.cached:
1369
+ continue
1370
+ self._clean_snapshot_in_memory(entry)
1371
+
1372
+ def _clean_snapshot_in_memory(
1373
+ self, entry: _ToolOutputHistoryEntry
1374
+ ) -> None:
1375
+ if not entry.record_uuids:
1376
+ return
1377
+
1378
+ # Clean snapshot markers and references from historical tool output
1379
+ result_text = entry.result_text
1380
+ if '- ' in result_text and '[ref=' in result_text:
1381
+ cleaned_result = self._clean_snapshot_content(result_text)
1382
+
1383
+ # Update the message in memory storage
1384
+ timestamp = (
1385
+ entry.record_timestamps[0]
1386
+ if entry.record_timestamps
1387
+ else time.time_ns() / 1_000_000_000
1388
+ )
1389
+ cleaned_message = FunctionCallingMessage(
1390
+ role_name=self.role_name,
1391
+ role_type=self.role_type,
1392
+ meta_dict={},
1393
+ content="",
1394
+ func_name=entry.tool_name,
1395
+ result=cleaned_result,
1396
+ tool_call_id=entry.tool_call_id,
1397
+ )
1398
+
1399
+ chat_history_block = getattr(
1400
+ self.memory, "_chat_history_block", None
1401
+ )
1402
+ storage = getattr(chat_history_block, "storage", None)
1403
+ if storage is None:
1404
+ return
1405
+
1406
+ existing_records = storage.load()
1407
+ updated_records = [
1408
+ record
1409
+ for record in existing_records
1410
+ if record["uuid"] not in entry.record_uuids
1411
+ ]
1412
+ new_record = MemoryRecord(
1413
+ message=cleaned_message,
1414
+ role_at_backend=OpenAIBackendRole.FUNCTION,
1415
+ timestamp=timestamp,
1416
+ agent_id=self.agent_id,
1417
+ )
1418
+ updated_records.append(new_record.to_dict())
1419
+ updated_records.sort(key=lambda record: record["timestamp"])
1420
+ storage.clear()
1421
+ storage.save(updated_records)
1422
+
1423
+ logger.info(
1424
+ "Cleaned snapshot in memory for tool output '%s' (%s)",
1425
+ entry.tool_name,
1426
+ entry.tool_call_id,
1427
+ )
1428
+
1429
+ entry.cached = True
1430
+ entry.record_uuids = [str(new_record.uuid)]
1431
+ entry.record_timestamps = [timestamp]
1432
+
725
1433
  def add_external_tool(
726
1434
  self, tool: Union[FunctionTool, Callable, Dict[str, Any]]
727
1435
  ) -> None:
@@ -766,19 +1474,10 @@ class ChatAgent(BaseAgent):
766
1474
  message: BaseMessage,
767
1475
  role: OpenAIBackendRole,
768
1476
  timestamp: Optional[float] = None,
769
- ) -> None:
1477
+ return_records: bool = False,
1478
+ ) -> Optional[List[MemoryRecord]]:
770
1479
  r"""Updates the agent memory with a new message.
771
1480
 
772
- If the single *message* exceeds the model's context window, it will
773
- be **automatically split into multiple smaller chunks** before being
774
- written into memory. This prevents later failures in
775
- `ScoreBasedContextCreator` where an over-sized message cannot fit
776
- into the available token budget at all.
777
-
778
- This slicing logic handles both regular text messages (in the
779
- `content` field) and long tool call results (in the `result` field of
780
- a `FunctionCallingMessage`).
781
-
782
1481
  Args:
783
1482
  message (BaseMessage): The new message to add to the stored
784
1483
  messages.
@@ -786,168 +1485,41 @@ class ChatAgent(BaseAgent):
786
1485
  timestamp (Optional[float], optional): Custom timestamp for the
787
1486
  memory record. If `None`, the current time will be used.
788
1487
  (default: :obj:`None`)
789
- (default: obj:`None`)
790
- """
791
- import math
792
- import time
793
- import uuid as _uuid
794
-
795
- # 1. Helper to write a record to memory
796
- def _write_single_record(
797
- message: BaseMessage, role: OpenAIBackendRole, timestamp: float
798
- ):
799
- self.memory.write_record(
800
- MemoryRecord(
801
- message=message,
802
- role_at_backend=role,
803
- timestamp=timestamp,
804
- agent_id=self.agent_id,
805
- )
806
- )
1488
+ return_records (bool, optional): When ``True`` the method returns
1489
+ the list of MemoryRecord objects written to memory.
1490
+ (default: :obj:`False`)
807
1491
 
808
- base_ts = (
809
- timestamp
1492
+ Returns:
1493
+ Optional[List[MemoryRecord]]: The records that were written when
1494
+ ``return_records`` is ``True``; otherwise ``None``.
1495
+ """
1496
+ record = MemoryRecord(
1497
+ message=message,
1498
+ role_at_backend=role,
1499
+ timestamp=timestamp
810
1500
  if timestamp is not None
811
- else time.time_ns() / 1_000_000_000
1501
+ else time.time_ns() / 1_000_000_000, # Nanosecond precision
1502
+ agent_id=self.agent_id,
812
1503
  )
1504
+ self.memory.write_record(record)
813
1505
 
814
- # 2. Get token handling utilities, fallback if unavailable
815
- try:
816
- context_creator = self.memory.get_context_creator()
817
- token_counter = context_creator.token_counter
818
- token_limit = context_creator.token_limit
819
- except AttributeError:
820
- _write_single_record(message, role, base_ts)
821
- return
1506
+ if return_records:
1507
+ return [record]
1508
+ return None
822
1509
 
823
- # 3. Check if slicing is necessary
824
- try:
825
- current_tokens = token_counter.count_tokens_from_messages(
826
- [message.to_openai_message(role)]
827
- )
828
- _, ctx_tokens = self.memory.get_context()
829
- remaining_budget = max(0, token_limit - ctx_tokens)
1510
+ def load_memory(self, memory: AgentMemory) -> None:
1511
+ r"""Load the provided memory into the agent.
830
1512
 
831
- if current_tokens <= remaining_budget:
832
- _write_single_record(message, role, base_ts)
833
- return
834
- except Exception as e:
835
- logger.warning(
836
- f"Token calculation failed before chunking, "
837
- f"writing message as-is. Error: {e}"
838
- )
839
- _write_single_record(message, role, base_ts)
840
- return
1513
+ Args:
1514
+ memory (AgentMemory): The memory to load into the agent.
841
1515
 
842
- # 4. Perform slicing
843
- logger.warning(
844
- f"Message with {current_tokens} tokens exceeds remaining budget "
845
- f"of {remaining_budget}. Slicing into smaller chunks."
846
- )
1516
+ Returns:
1517
+ None
1518
+ """
847
1519
 
848
- text_to_chunk: Optional[str] = None
849
- is_function_result = False
850
-
851
- if isinstance(message, FunctionCallingMessage) and isinstance(
852
- message.result, str
853
- ):
854
- text_to_chunk = message.result
855
- is_function_result = True
856
- elif isinstance(message.content, str):
857
- text_to_chunk = message.content
858
-
859
- if not text_to_chunk or not text_to_chunk.strip():
860
- _write_single_record(message, role, base_ts)
861
- return
862
- # Encode the entire text to get a list of all token IDs
863
- try:
864
- all_token_ids = token_counter.encode(text_to_chunk)
865
- except Exception as e:
866
- logger.error(f"Failed to encode text for chunking: {e}")
867
- _write_single_record(message, role, base_ts) # Fallback
868
- return
869
-
870
- if not all_token_ids:
871
- _write_single_record(message, role, base_ts) # Nothing to chunk
872
- return
873
-
874
- # 1. Base chunk size: one-tenth of the smaller of (a) total token
875
- # limit and (b) current remaining budget. This prevents us from
876
- # creating chunks that are guaranteed to overflow the
877
- # immediate context window.
878
- base_chunk_size = max(1, remaining_budget) // 10
879
-
880
- # 2. Each chunk gets a textual prefix such as:
881
- # "[chunk 3/12 of a long message]\n"
882
- # The prefix itself consumes tokens, so if we do not subtract its
883
- # length the *total* tokens of the outgoing message (prefix + body)
884
- # can exceed the intended bound. We estimate the prefix length
885
- # with a representative example that is safely long enough for the
886
- # vast majority of cases (three-digit indices).
887
- sample_prefix = "[chunk 1/1000 of a long message]\n"
888
- prefix_token_len = len(token_counter.encode(sample_prefix))
889
-
890
- # 3. The real capacity for the message body is therefore the base
891
- # chunk size minus the prefix length. Fallback to at least one
892
- # token to avoid zero or negative sizes.
893
- chunk_body_limit = max(1, base_chunk_size - prefix_token_len)
894
-
895
- # 4. Calculate how many chunks we will need with this body size.
896
- num_chunks = math.ceil(len(all_token_ids) / chunk_body_limit)
897
- group_id = str(_uuid.uuid4())
898
-
899
- for i in range(num_chunks):
900
- start_idx = i * chunk_body_limit
901
- end_idx = start_idx + chunk_body_limit
902
- chunk_token_ids = all_token_ids[start_idx:end_idx]
903
-
904
- chunk_body = token_counter.decode(chunk_token_ids)
905
-
906
- prefix = f"[chunk {i + 1}/{num_chunks} of a long message]\n"
907
- new_body = prefix + chunk_body
908
-
909
- if is_function_result and isinstance(
910
- message, FunctionCallingMessage
911
- ):
912
- new_msg: BaseMessage = FunctionCallingMessage(
913
- role_name=message.role_name,
914
- role_type=message.role_type,
915
- meta_dict=message.meta_dict,
916
- content=message.content,
917
- func_name=message.func_name,
918
- args=message.args,
919
- result=new_body,
920
- tool_call_id=message.tool_call_id,
921
- )
922
- else:
923
- new_msg = message.create_new_instance(new_body)
924
-
925
- meta = (new_msg.meta_dict or {}).copy()
926
- meta.update(
927
- {
928
- "chunk_idx": i + 1,
929
- "chunk_total": num_chunks,
930
- "chunk_group_id": group_id,
931
- }
932
- )
933
- new_msg.meta_dict = meta
934
-
935
- # Increment timestamp slightly to maintain order
936
- _write_single_record(new_msg, role, base_ts + i * 1e-6)
937
-
938
- def load_memory(self, memory: AgentMemory) -> None:
939
- r"""Load the provided memory into the agent.
940
-
941
- Args:
942
- memory (AgentMemory): The memory to load into the agent.
943
-
944
- Returns:
945
- None
946
- """
947
-
948
- for context_record in memory.retrieve():
949
- self.memory.write_record(context_record.memory_record)
950
- logger.info(f"Memory loaded from {memory}")
1520
+ for context_record in memory.retrieve():
1521
+ self.memory.write_record(context_record.memory_record)
1522
+ logger.info(f"Memory loaded from {memory}")
951
1523
 
952
1524
  def load_memory_from_path(self, path: str) -> None:
953
1525
  r"""Loads memory records from a JSON file filtered by this agent's ID.
@@ -1012,6 +1584,583 @@ class ChatAgent(BaseAgent):
1012
1584
  json_store.save(to_save)
1013
1585
  logger.info(f"Memory saved to {path}")
1014
1586
 
1587
+ def summarize(
1588
+ self,
1589
+ filename: Optional[str] = None,
1590
+ summary_prompt: Optional[str] = None,
1591
+ response_format: Optional[Type[BaseModel]] = None,
1592
+ working_directory: Optional[Union[str, Path]] = None,
1593
+ include_summaries: bool = False,
1594
+ add_user_messages: bool = True,
1595
+ ) -> Dict[str, Any]:
1596
+ r"""Summarize the agent's current conversation context and persist it
1597
+ to a markdown file.
1598
+
1599
+ .. deprecated:: 0.2.80
1600
+ Use :meth:`asummarize` for async/await support and better
1601
+ performance in parallel summarization workflows.
1602
+
1603
+ Args:
1604
+ filename (Optional[str]): The base filename (without extension) to
1605
+ use for the markdown file. Defaults to a timestamped name when
1606
+ not provided.
1607
+ summary_prompt (Optional[str]): Custom prompt for the summarizer.
1608
+ When omitted, a default prompt highlighting key decisions,
1609
+ action items, and open questions is used.
1610
+ response_format (Optional[Type[BaseModel]]): A Pydantic model
1611
+ defining the expected structure of the response. If provided,
1612
+ the summary will be generated as structured output and included
1613
+ in the result.
1614
+ include_summaries (bool): Whether to include previously generated
1615
+ summaries in the content to be summarized. If False (default),
1616
+ only non-summary messages will be summarized. If True, all
1617
+ messages including previous summaries will be summarized
1618
+ (full compression). (default: :obj:`False`)
1619
+ working_directory (Optional[str|Path]): Optional directory to save
1620
+ the markdown summary file. If provided, overrides the default
1621
+ directory used by ContextUtility.
1622
+ add_user_messages (bool): Whether add user messages to summary.
1623
+ (default: :obj:`True`)
1624
+ Returns:
1625
+ Dict[str, Any]: A dictionary containing the summary text, file
1626
+ path, status message, and optionally structured_summary if
1627
+ response_format was provided.
1628
+
1629
+ See Also:
1630
+ :meth:`asummarize`: Async version for non-blocking LLM calls.
1631
+ """
1632
+
1633
+ warnings.warn(
1634
+ "summarize() is synchronous. Consider using asummarize() "
1635
+ "for async/await support and better performance.",
1636
+ DeprecationWarning,
1637
+ stacklevel=2,
1638
+ )
1639
+
1640
+ result: Dict[str, Any] = {
1641
+ "summary": "",
1642
+ "file_path": None,
1643
+ "status": "",
1644
+ }
1645
+
1646
+ try:
1647
+ # Use external context if set, otherwise create local one
1648
+ if self._context_utility is None:
1649
+ if working_directory is not None:
1650
+ self._context_utility = ContextUtility(
1651
+ working_directory=str(working_directory)
1652
+ )
1653
+ else:
1654
+ self._context_utility = ContextUtility()
1655
+ context_util = self._context_utility
1656
+
1657
+ # Get conversation directly from agent's memory
1658
+ messages, _ = self.memory.get_context()
1659
+
1660
+ if not messages:
1661
+ status_message = (
1662
+ "No conversation context available to summarize."
1663
+ )
1664
+ result["status"] = status_message
1665
+ return result
1666
+
1667
+ # Convert messages to conversation text
1668
+ conversation_lines = []
1669
+ user_messages: List[str] = []
1670
+ for message in messages:
1671
+ role = message.get('role', 'unknown')
1672
+ content = message.get('content', '')
1673
+
1674
+ # Skip summary messages if include_summaries is False
1675
+ if not include_summaries and isinstance(content, str):
1676
+ # Check if this is a summary message by looking for marker
1677
+ if content.startswith('[CONTEXT_SUMMARY]'):
1678
+ continue
1679
+
1680
+ # Handle tool call messages (assistant calling tools)
1681
+ tool_calls = message.get('tool_calls')
1682
+ if tool_calls and isinstance(tool_calls, (list, tuple)):
1683
+ for tool_call in tool_calls:
1684
+ # Handle both dict and object formats
1685
+ if isinstance(tool_call, dict):
1686
+ func_name = tool_call.get('function', {}).get(
1687
+ 'name', 'unknown_tool'
1688
+ )
1689
+ func_args_str = tool_call.get('function', {}).get(
1690
+ 'arguments', '{}'
1691
+ )
1692
+ else:
1693
+ # Handle object format (Pydantic or similar)
1694
+ func_name = getattr(
1695
+ getattr(tool_call, 'function', None),
1696
+ 'name',
1697
+ 'unknown_tool',
1698
+ )
1699
+ func_args_str = getattr(
1700
+ getattr(tool_call, 'function', None),
1701
+ 'arguments',
1702
+ '{}',
1703
+ )
1704
+
1705
+ # Parse and format arguments for readability
1706
+ try:
1707
+ import json
1708
+
1709
+ args_dict = json.loads(func_args_str)
1710
+ args_formatted = ', '.join(
1711
+ f"{k}={v}" for k, v in args_dict.items()
1712
+ )
1713
+ except (json.JSONDecodeError, ValueError, TypeError):
1714
+ args_formatted = func_args_str
1715
+
1716
+ conversation_lines.append(
1717
+ f"[TOOL CALL] {func_name}({args_formatted})"
1718
+ )
1719
+
1720
+ # Handle tool response messages
1721
+ elif role == 'tool':
1722
+ tool_name = message.get('name', 'unknown_tool')
1723
+ if not content:
1724
+ content = str(message.get('content', ''))
1725
+ conversation_lines.append(
1726
+ f"[TOOL RESULT] {tool_name} → {content}"
1727
+ )
1728
+
1729
+ # Handle regular content messages (user/assistant/system)
1730
+ elif content:
1731
+ content = str(content)
1732
+ if role == 'user':
1733
+ user_messages.append(content)
1734
+ conversation_lines.append(f"{role}: {content}")
1735
+
1736
+ conversation_text = "\n".join(conversation_lines).strip()
1737
+
1738
+ if not conversation_text:
1739
+ status_message = (
1740
+ "Conversation context is empty; skipping summary."
1741
+ )
1742
+ result["status"] = status_message
1743
+ return result
1744
+
1745
+ if self._context_summary_agent is None:
1746
+ self._context_summary_agent = ChatAgent(
1747
+ system_message=(
1748
+ "You are a helpful assistant that summarizes "
1749
+ "conversations"
1750
+ ),
1751
+ model=self.model_backend,
1752
+ agent_id=f"{self.agent_id}_context_summarizer",
1753
+ summarize_threshold=None,
1754
+ )
1755
+ else:
1756
+ self._context_summary_agent.reset()
1757
+
1758
+ if summary_prompt:
1759
+ prompt_text = (
1760
+ f"{summary_prompt.rstrip()}\n\n"
1761
+ f"AGENT CONVERSATION TO BE SUMMARIZED:\n"
1762
+ f"{conversation_text}"
1763
+ )
1764
+ else:
1765
+ prompt_text = build_default_summary_prompt(conversation_text)
1766
+
1767
+ try:
1768
+ # Use structured output if response_format is provided
1769
+ if response_format:
1770
+ response = self._context_summary_agent.step(
1771
+ prompt_text, response_format=response_format
1772
+ )
1773
+ else:
1774
+ response = self._context_summary_agent.step(prompt_text)
1775
+ except Exception as step_exc:
1776
+ error_message = (
1777
+ f"Failed to generate summary using model: {step_exc}"
1778
+ )
1779
+ logger.error(error_message)
1780
+ result["status"] = error_message
1781
+ return result
1782
+
1783
+ if not response.msgs:
1784
+ status_message = (
1785
+ "Failed to generate summary from model response."
1786
+ )
1787
+ result["status"] = status_message
1788
+ return result
1789
+
1790
+ summary_content = response.msgs[-1].content.strip()
1791
+ if not summary_content:
1792
+ status_message = "Generated summary is empty."
1793
+ result["status"] = status_message
1794
+ return result
1795
+
1796
+ # handle structured output if response_format was provided
1797
+ structured_output = None
1798
+ if response_format and response.msgs[-1].parsed:
1799
+ structured_output = response.msgs[-1].parsed
1800
+
1801
+ # determine filename: use provided filename, or extract from
1802
+ # structured output, or generate timestamp
1803
+ if filename:
1804
+ base_filename = filename
1805
+ elif structured_output and hasattr(
1806
+ structured_output, 'task_title'
1807
+ ):
1808
+ # use task_title from structured output for filename
1809
+ task_title = structured_output.task_title
1810
+ clean_title = ContextUtility.sanitize_workflow_filename(
1811
+ task_title
1812
+ )
1813
+ base_filename = (
1814
+ f"{clean_title}_workflow" if clean_title else "workflow"
1815
+ )
1816
+ else:
1817
+ base_filename = f"context_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}" # noqa: E501
1818
+
1819
+ base_filename = Path(base_filename).with_suffix("").name
1820
+
1821
+ metadata = context_util.get_session_metadata()
1822
+ metadata.update(
1823
+ {
1824
+ "agent_id": self.agent_id,
1825
+ "message_count": len(messages),
1826
+ }
1827
+ )
1828
+
1829
+ # convert structured output to custom markdown if present
1830
+ if structured_output:
1831
+ # convert structured output to custom markdown
1832
+ summary_content = context_util.structured_output_to_markdown(
1833
+ structured_data=structured_output, metadata=metadata
1834
+ )
1835
+ if add_user_messages:
1836
+ summary_content = self._append_user_messages_section(
1837
+ summary_content, user_messages
1838
+ )
1839
+
1840
+ # Save the markdown (either custom structured or default)
1841
+ save_status = context_util.save_markdown_file(
1842
+ base_filename,
1843
+ summary_content,
1844
+ title="Conversation Summary"
1845
+ if not structured_output
1846
+ else None,
1847
+ metadata=metadata if not structured_output else None,
1848
+ )
1849
+
1850
+ file_path = (
1851
+ context_util.get_working_directory() / f"{base_filename}.md"
1852
+ )
1853
+ summary_content = (
1854
+ f"[CONTEXT_SUMMARY] The following is a summary of our "
1855
+ f"conversation from a previous session: {summary_content}"
1856
+ )
1857
+ # Prepare result dictionary
1858
+ result_dict = {
1859
+ "summary": summary_content,
1860
+ "file_path": str(file_path),
1861
+ "status": save_status,
1862
+ "structured_summary": structured_output,
1863
+ }
1864
+
1865
+ result.update(result_dict)
1866
+ logger.info("Conversation summary saved to %s", file_path)
1867
+ return result
1868
+
1869
+ except Exception as exc:
1870
+ error_message = f"Failed to summarize conversation context: {exc}"
1871
+ logger.error(error_message)
1872
+ result["status"] = error_message
1873
+ return result
1874
+
1875
+ async def asummarize(
1876
+ self,
1877
+ filename: Optional[str] = None,
1878
+ summary_prompt: Optional[str] = None,
1879
+ response_format: Optional[Type[BaseModel]] = None,
1880
+ working_directory: Optional[Union[str, Path]] = None,
1881
+ include_summaries: bool = False,
1882
+ add_user_messages: bool = True,
1883
+ ) -> Dict[str, Any]:
1884
+ r"""Asynchronously summarize the agent's current conversation context
1885
+ and persist it to a markdown file.
1886
+
1887
+ This is the async version of summarize() that uses astep() for
1888
+ non-blocking LLM calls, enabling parallel summarization of multiple
1889
+ agents.
1890
+
1891
+ Args:
1892
+ filename (Optional[str]): The base filename (without extension) to
1893
+ use for the markdown file. Defaults to a timestamped name when
1894
+ not provided.
1895
+ summary_prompt (Optional[str]): Custom prompt for the summarizer.
1896
+ When omitted, a default prompt highlighting key decisions,
1897
+ action items, and open questions is used.
1898
+ response_format (Optional[Type[BaseModel]]): A Pydantic model
1899
+ defining the expected structure of the response. If provided,
1900
+ the summary will be generated as structured output and included
1901
+ in the result.
1902
+ working_directory (Optional[str|Path]): Optional directory to save
1903
+ the markdown summary file. If provided, overrides the default
1904
+ directory used by ContextUtility.
1905
+ include_summaries (bool): Whether to include previously generated
1906
+ summaries in the content to be summarized. If False (default),
1907
+ only non-summary messages will be summarized. If True, all
1908
+ messages including previous summaries will be summarized
1909
+ (full compression). (default: :obj:`False`)
1910
+ add_user_messages (bool): Whether add user messages to summary.
1911
+ (default: :obj:`True`)
1912
+ Returns:
1913
+ Dict[str, Any]: A dictionary containing the summary text, file
1914
+ path, status message, and optionally structured_summary if
1915
+ response_format was provided.
1916
+ """
1917
+
1918
+ result: Dict[str, Any] = {
1919
+ "summary": "",
1920
+ "file_path": None,
1921
+ "status": "",
1922
+ }
1923
+
1924
+ try:
1925
+ # Use external context if set, otherwise create local one
1926
+ if self._context_utility is None:
1927
+ if working_directory is not None:
1928
+ self._context_utility = ContextUtility(
1929
+ working_directory=str(working_directory)
1930
+ )
1931
+ else:
1932
+ self._context_utility = ContextUtility()
1933
+ context_util = self._context_utility
1934
+
1935
+ # Get conversation directly from agent's memory
1936
+ messages, _ = self.memory.get_context()
1937
+
1938
+ if not messages:
1939
+ status_message = (
1940
+ "No conversation context available to summarize."
1941
+ )
1942
+ result["status"] = status_message
1943
+ return result
1944
+
1945
+ # Convert messages to conversation text
1946
+ conversation_lines = []
1947
+ user_messages: List[str] = []
1948
+ for message in messages:
1949
+ role = message.get('role', 'unknown')
1950
+ content = message.get('content', '')
1951
+
1952
+ # Skip summary messages if include_summaries is False
1953
+ if not include_summaries and isinstance(content, str):
1954
+ # Check if this is a summary message by looking for marker
1955
+ if content.startswith('[CONTEXT_SUMMARY]'):
1956
+ continue
1957
+
1958
+ # Handle tool call messages (assistant calling tools)
1959
+ tool_calls = message.get('tool_calls')
1960
+ if tool_calls and isinstance(tool_calls, (list, tuple)):
1961
+ for tool_call in tool_calls:
1962
+ # Handle both dict and object formats
1963
+ if isinstance(tool_call, dict):
1964
+ func_name = tool_call.get('function', {}).get(
1965
+ 'name', 'unknown_tool'
1966
+ )
1967
+ func_args_str = tool_call.get('function', {}).get(
1968
+ 'arguments', '{}'
1969
+ )
1970
+ else:
1971
+ # Handle object format (Pydantic or similar)
1972
+ func_name = getattr(
1973
+ getattr(tool_call, 'function', None),
1974
+ 'name',
1975
+ 'unknown_tool',
1976
+ )
1977
+ func_args_str = getattr(
1978
+ getattr(tool_call, 'function', None),
1979
+ 'arguments',
1980
+ '{}',
1981
+ )
1982
+
1983
+ # Parse and format arguments for readability
1984
+ try:
1985
+ import json
1986
+
1987
+ args_dict = json.loads(func_args_str)
1988
+ args_formatted = ', '.join(
1989
+ f"{k}={v}" for k, v in args_dict.items()
1990
+ )
1991
+ except (json.JSONDecodeError, ValueError, TypeError):
1992
+ args_formatted = func_args_str
1993
+
1994
+ conversation_lines.append(
1995
+ f"[TOOL CALL] {func_name}({args_formatted})"
1996
+ )
1997
+
1998
+ # Handle tool response messages
1999
+ elif role == 'tool':
2000
+ tool_name = message.get('name', 'unknown_tool')
2001
+ if not content:
2002
+ content = str(message.get('content', ''))
2003
+ conversation_lines.append(
2004
+ f"[TOOL RESULT] {tool_name} → {content}"
2005
+ )
2006
+
2007
+ # Handle regular content messages (user/assistant/system)
2008
+ elif content:
2009
+ content = str(content)
2010
+ if role == 'user':
2011
+ user_messages.append(content)
2012
+ conversation_lines.append(f"{role}: {content}")
2013
+
2014
+ conversation_text = "\n".join(conversation_lines).strip()
2015
+
2016
+ if not conversation_text:
2017
+ status_message = (
2018
+ "Conversation context is empty; skipping summary."
2019
+ )
2020
+ result["status"] = status_message
2021
+ return result
2022
+
2023
+ if self._context_summary_agent is None:
2024
+ self._context_summary_agent = ChatAgent(
2025
+ system_message=(
2026
+ "You are a helpful assistant that summarizes "
2027
+ "conversations"
2028
+ ),
2029
+ model=self.model_backend,
2030
+ agent_id=f"{self.agent_id}_context_summarizer",
2031
+ summarize_threshold=None,
2032
+ )
2033
+ else:
2034
+ self._context_summary_agent.reset()
2035
+
2036
+ if summary_prompt:
2037
+ prompt_text = (
2038
+ f"{summary_prompt.rstrip()}\n\n"
2039
+ f"AGENT CONVERSATION TO BE SUMMARIZED:\n"
2040
+ f"{conversation_text}"
2041
+ )
2042
+ else:
2043
+ prompt_text = build_default_summary_prompt(conversation_text)
2044
+
2045
+ try:
2046
+ # Use structured output if response_format is provided
2047
+ if response_format:
2048
+ response = await self._context_summary_agent.astep(
2049
+ prompt_text, response_format=response_format
2050
+ )
2051
+ else:
2052
+ response = await self._context_summary_agent.astep(
2053
+ prompt_text
2054
+ )
2055
+
2056
+ # Handle streaming response
2057
+ if isinstance(response, AsyncStreamingChatAgentResponse):
2058
+ # Collect final response
2059
+ final_response = await response
2060
+ response = final_response
2061
+
2062
+ except Exception as step_exc:
2063
+ error_message = (
2064
+ f"Failed to generate summary using model: {step_exc}"
2065
+ )
2066
+ logger.error(error_message)
2067
+ result["status"] = error_message
2068
+ return result
2069
+
2070
+ if not response.msgs:
2071
+ status_message = (
2072
+ "Failed to generate summary from model response."
2073
+ )
2074
+ result["status"] = status_message
2075
+ return result
2076
+
2077
+ summary_content = response.msgs[-1].content.strip()
2078
+ if not summary_content:
2079
+ status_message = "Generated summary is empty."
2080
+ result["status"] = status_message
2081
+ return result
2082
+
2083
+ # handle structured output if response_format was provided
2084
+ structured_output = None
2085
+ if response_format and response.msgs[-1].parsed:
2086
+ structured_output = response.msgs[-1].parsed
2087
+
2088
+ # determine filename: use provided filename, or extract from
2089
+ # structured output, or generate timestamp
2090
+ if filename:
2091
+ base_filename = filename
2092
+ elif structured_output and hasattr(
2093
+ structured_output, 'task_title'
2094
+ ):
2095
+ # use task_title from structured output for filename
2096
+ task_title = structured_output.task_title
2097
+ clean_title = ContextUtility.sanitize_workflow_filename(
2098
+ task_title
2099
+ )
2100
+ base_filename = (
2101
+ f"{clean_title}_workflow" if clean_title else "workflow"
2102
+ )
2103
+ else:
2104
+ base_filename = f"context_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}" # noqa: E501
2105
+
2106
+ base_filename = Path(base_filename).with_suffix("").name
2107
+
2108
+ metadata = context_util.get_session_metadata()
2109
+ metadata.update(
2110
+ {
2111
+ "agent_id": self.agent_id,
2112
+ "message_count": len(messages),
2113
+ }
2114
+ )
2115
+
2116
+ # convert structured output to custom markdown if present
2117
+ if structured_output:
2118
+ # convert structured output to custom markdown
2119
+ summary_content = context_util.structured_output_to_markdown(
2120
+ structured_data=structured_output, metadata=metadata
2121
+ )
2122
+ if add_user_messages:
2123
+ summary_content = self._append_user_messages_section(
2124
+ summary_content, user_messages
2125
+ )
2126
+
2127
+ # Save the markdown (either custom structured or default)
2128
+ save_status = context_util.save_markdown_file(
2129
+ base_filename,
2130
+ summary_content,
2131
+ title="Conversation Summary"
2132
+ if not structured_output
2133
+ else None,
2134
+ metadata=metadata if not structured_output else None,
2135
+ )
2136
+
2137
+ file_path = (
2138
+ context_util.get_working_directory() / f"{base_filename}.md"
2139
+ )
2140
+
2141
+ summary_content = (
2142
+ f"[CONTEXT_SUMMARY] The following is a summary of our "
2143
+ f"conversation from a previous session: {summary_content}"
2144
+ )
2145
+
2146
+ # Prepare result dictionary
2147
+ result_dict = {
2148
+ "summary": summary_content,
2149
+ "file_path": str(file_path),
2150
+ "status": save_status,
2151
+ "structured_summary": structured_output,
2152
+ }
2153
+
2154
+ result.update(result_dict)
2155
+ logger.info("Conversation summary saved to %s", file_path)
2156
+ return result
2157
+
2158
+ except Exception as exc:
2159
+ error_message = f"Failed to summarize conversation context: {exc}"
2160
+ logger.error(error_message)
2161
+ result["status"] = error_message
2162
+ return result
2163
+
1015
2164
  def clear_memory(self) -> None:
1016
2165
  r"""Clear the agent's memory and reset to initial state.
1017
2166
 
@@ -1019,8 +2168,16 @@ class ChatAgent(BaseAgent):
1019
2168
  None
1020
2169
  """
1021
2170
  self.memory.clear()
2171
+
1022
2172
  if self.system_message is not None:
1023
- self.update_memory(self.system_message, OpenAIBackendRole.SYSTEM)
2173
+ self.memory.write_record(
2174
+ MemoryRecord(
2175
+ message=self.system_message,
2176
+ role_at_backend=OpenAIBackendRole.SYSTEM,
2177
+ timestamp=time.time_ns() / 1_000_000_000,
2178
+ agent_id=self.agent_id,
2179
+ )
2180
+ )
1024
2181
 
1025
2182
  def _generate_system_message_for_output_language(
1026
2183
  self,
@@ -1045,28 +2202,81 @@ class ChatAgent(BaseAgent):
1045
2202
  content = self._original_system_message.content + language_prompt
1046
2203
  return self._original_system_message.create_new_instance(content)
1047
2204
  else:
1048
- return BaseMessage.make_assistant_message(
1049
- role_name="Assistant",
1050
- content=language_prompt,
1051
- )
2205
+ return BaseMessage.make_system_message(language_prompt)
1052
2206
 
1053
2207
  def init_messages(self) -> None:
1054
2208
  r"""Initializes the stored messages list with the current system
1055
2209
  message.
1056
2210
  """
1057
- import time
2211
+ self._reset_summary_state()
2212
+ self.clear_memory()
1058
2213
 
1059
- self.memory.clear()
1060
- # avoid UserWarning: The `ChatHistoryMemory` is empty.
1061
- if self.system_message is not None:
1062
- self.memory.write_record(
1063
- MemoryRecord(
1064
- message=self.system_message,
1065
- role_at_backend=OpenAIBackendRole.SYSTEM,
1066
- timestamp=time.time_ns() / 1_000_000_000,
1067
- agent_id=self.agent_id,
1068
- )
1069
- )
2214
+ def update_system_message(
2215
+ self,
2216
+ system_message: Union[BaseMessage, str],
2217
+ reset_memory: bool = True,
2218
+ ) -> None:
2219
+ r"""Update the system message.
2220
+ It will reset conversation with new system message.
2221
+
2222
+ Args:
2223
+ system_message (Union[BaseMessage, str]): The new system message.
2224
+ Can be either a BaseMessage object or a string.
2225
+ If a string is provided, it will be converted
2226
+ into a BaseMessage object.
2227
+ reset_memory (bool):
2228
+ Whether to reinitialize conversation messages after updating
2229
+ the system message. Defaults to True.
2230
+ """
2231
+ if system_message is None:
2232
+ raise ValueError("system_message is required and cannot be None. ")
2233
+ self._original_system_message = (
2234
+ BaseMessage.make_system_message(system_message)
2235
+ if isinstance(system_message, str)
2236
+ else system_message
2237
+ )
2238
+ self._system_message = (
2239
+ self._generate_system_message_for_output_language()
2240
+ )
2241
+ if reset_memory:
2242
+ self.init_messages()
2243
+
2244
+ def append_to_system_message(
2245
+ self, content: str, reset_memory: bool = True
2246
+ ) -> None:
2247
+ """Append additional context to existing system message.
2248
+
2249
+ Args:
2250
+ content (str): The additional system message.
2251
+ reset_memory (bool):
2252
+ Whether to reinitialize conversation messages after appending
2253
+ additional context. Defaults to True.
2254
+ """
2255
+ original_content = (
2256
+ self._original_system_message.content
2257
+ if self._original_system_message
2258
+ else ""
2259
+ )
2260
+ new_system_message = original_content + '\n' + content
2261
+ self._original_system_message = BaseMessage.make_system_message(
2262
+ new_system_message
2263
+ )
2264
+ self._system_message = (
2265
+ self._generate_system_message_for_output_language()
2266
+ )
2267
+ if reset_memory:
2268
+ self.init_messages()
2269
+
2270
+ def reset_to_original_system_message(self) -> None:
2271
+ r"""Reset system message to original, removing any appended context.
2272
+
2273
+ This method reverts the agent's system message back to its original
2274
+ state, removing any workflow context or other modifications that may
2275
+ have been appended. Useful for resetting agent state in multi-turn
2276
+ scenarios.
2277
+ """
2278
+ self._system_message = self._original_system_message
2279
+ self.init_messages()
1070
2280
 
1071
2281
  def record_message(self, message: BaseMessage) -> None:
1072
2282
  r"""Records the externally provided message into the agent memory as if
@@ -1129,7 +2339,7 @@ class ChatAgent(BaseAgent):
1129
2339
 
1130
2340
  # Create a prompt based on the schema
1131
2341
  format_instruction = (
1132
- "\n\nPlease respond in the following JSON format:\n" "{\n"
2342
+ "\n\nPlease respond in the following JSON format:\n{\n"
1133
2343
  )
1134
2344
 
1135
2345
  properties = schema.get("properties", {})
@@ -1216,6 +2426,33 @@ class ChatAgent(BaseAgent):
1216
2426
  # and True to indicate we used prompt formatting
1217
2427
  return modified_message, None, True
1218
2428
 
2429
+ def _is_called_from_registered_toolkit(self) -> bool:
2430
+ r"""Check if current step/astep call originates from a
2431
+ RegisteredAgentToolkit.
2432
+
2433
+ This method uses stack inspection to detect if the current call
2434
+ is originating from a toolkit that inherits from
2435
+ RegisteredAgentToolkit. When detected, tools should be disabled to
2436
+ prevent recursive calls.
2437
+
2438
+ Returns:
2439
+ bool: True if called from a RegisteredAgentToolkit, False otherwise
2440
+ """
2441
+ from camel.toolkits.base import RegisteredAgentToolkit
2442
+
2443
+ try:
2444
+ for frame_info in inspect.stack():
2445
+ frame_locals = frame_info.frame.f_locals
2446
+ if 'self' in frame_locals:
2447
+ caller_self = frame_locals['self']
2448
+ if isinstance(caller_self, RegisteredAgentToolkit):
2449
+ return True
2450
+
2451
+ except Exception:
2452
+ return False
2453
+
2454
+ return False
2455
+
1219
2456
  def _apply_prompt_based_parsing(
1220
2457
  self,
1221
2458
  response: ModelResponse,
@@ -1232,7 +2469,6 @@ class ChatAgent(BaseAgent):
1232
2469
  try:
1233
2470
  # Try to extract JSON from the response content
1234
2471
  import json
1235
- import re
1236
2472
 
1237
2473
  from pydantic import ValidationError
1238
2474
 
@@ -1271,8 +2507,7 @@ class ChatAgent(BaseAgent):
1271
2507
 
1272
2508
  if not message.parsed:
1273
2509
  logger.warning(
1274
- f"Failed to parse JSON from response: "
1275
- f"{content}"
2510
+ f"Failed to parse JSON from response: {content}"
1276
2511
  )
1277
2512
 
1278
2513
  except Exception as e:
@@ -1365,6 +2600,9 @@ class ChatAgent(BaseAgent):
1365
2600
  a StreamingChatAgentResponse that behaves like
1366
2601
  ChatAgentResponse but can also be iterated for
1367
2602
  streaming updates.
2603
+
2604
+ Raises:
2605
+ TimeoutError: If the step operation exceeds the configured timeout.
1368
2606
  """
1369
2607
 
1370
2608
  stream = self.model_backend.model_config_dict.get("stream", False)
@@ -1374,6 +2612,30 @@ class ChatAgent(BaseAgent):
1374
2612
  generator = self._stream(input_message, response_format)
1375
2613
  return StreamingChatAgentResponse(generator)
1376
2614
 
2615
+ # Execute with timeout if configured
2616
+ if self.step_timeout is not None:
2617
+ with concurrent.futures.ThreadPoolExecutor(
2618
+ max_workers=1
2619
+ ) as executor:
2620
+ future = executor.submit(
2621
+ self._step_impl, input_message, response_format
2622
+ )
2623
+ try:
2624
+ return future.result(timeout=self.step_timeout)
2625
+ except concurrent.futures.TimeoutError:
2626
+ future.cancel()
2627
+ raise TimeoutError(
2628
+ f"Step timed out after {self.step_timeout}s"
2629
+ )
2630
+ else:
2631
+ return self._step_impl(input_message, response_format)
2632
+
2633
+ def _step_impl(
2634
+ self,
2635
+ input_message: Union[BaseMessage, str],
2636
+ response_format: Optional[Type[BaseModel]] = None,
2637
+ ) -> ChatAgentResponse:
2638
+ r"""Implementation of non-streaming step logic."""
1377
2639
  # Set Langfuse session_id using agent_id for trace grouping
1378
2640
  try:
1379
2641
  from camel.utils.langfuse import set_current_agent_session_id
@@ -1382,6 +2644,10 @@ class ChatAgent(BaseAgent):
1382
2644
  except ImportError:
1383
2645
  pass # Langfuse not available
1384
2646
 
2647
+ # Check if this call is from a RegisteredAgentToolkit to prevent tool
2648
+ # use
2649
+ disable_tools = self._is_called_from_registered_toolkit()
2650
+
1385
2651
  # Handle response format compatibility with non-strict tools
1386
2652
  original_response_format = response_format
1387
2653
  input_message, response_format, used_prompt_formatting = (
@@ -1390,48 +2656,155 @@ class ChatAgent(BaseAgent):
1390
2656
  )
1391
2657
  )
1392
2658
 
1393
- # Convert input message to BaseMessage if necessary
1394
- if isinstance(input_message, str):
1395
- input_message = BaseMessage.make_user_message(
1396
- role_name="User", content=input_message
1397
- )
2659
+ # Convert input message to BaseMessage if necessary
2660
+ if isinstance(input_message, str):
2661
+ input_message = BaseMessage.make_user_message(
2662
+ role_name="User", content=input_message
2663
+ )
2664
+
2665
+ # Add user input to memory
2666
+ self.update_memory(input_message, OpenAIBackendRole.USER)
2667
+
2668
+ tool_call_records: List[ToolCallingRecord] = []
2669
+ external_tool_call_requests: Optional[List[ToolCallRequest]] = None
2670
+
2671
+ accumulated_context_tokens = (
2672
+ 0 # This tracks cumulative context tokens, not API usage tokens
2673
+ )
2674
+
2675
+ # Initialize token usage tracker
2676
+ step_token_usage = self._create_token_usage_tracker()
2677
+ iteration_count: int = 0
2678
+ prev_num_openai_messages: int = 0
2679
+
2680
+ while True:
2681
+ if self.pause_event is not None and not self.pause_event.is_set():
2682
+ # Use efficient blocking wait for threading.Event
2683
+ if isinstance(self.pause_event, threading.Event):
2684
+ self.pause_event.wait()
2685
+ else:
2686
+ # Fallback for asyncio.Event in sync context
2687
+ while not self.pause_event.is_set():
2688
+ time.sleep(0.001)
2689
+
2690
+ try:
2691
+ openai_messages, num_tokens = self.memory.get_context()
2692
+ if self.summarize_threshold is not None:
2693
+ threshold = self._calculate_next_summary_threshold()
2694
+ summary_token_count = self._summary_token_count
2695
+ token_limit = self.model_backend.token_limit
2696
+
2697
+ if num_tokens <= token_limit:
2698
+ if (
2699
+ summary_token_count
2700
+ > token_limit * self.summary_window_ratio
2701
+ ):
2702
+ logger.info(
2703
+ f"Summary tokens ({summary_token_count}) "
2704
+ f"exceed limit, full compression."
2705
+ )
2706
+ # Summarize everything (including summaries)
2707
+ summary = self.summarize(include_summaries=True)
2708
+ self._update_memory_with_summary(
2709
+ summary.get("summary", ""),
2710
+ include_summaries=True,
2711
+ )
2712
+ elif num_tokens > threshold:
2713
+ logger.info(
2714
+ f"Token count ({num_tokens}) exceed threshold "
2715
+ f"({threshold}). Triggering summarization."
2716
+ )
2717
+ # Only summarize non-summary content
2718
+ summary = self.summarize(include_summaries=False)
2719
+ self._update_memory_with_summary(
2720
+ summary.get("summary", ""),
2721
+ include_summaries=False,
2722
+ )
2723
+ accumulated_context_tokens += num_tokens
2724
+ except RuntimeError as e:
2725
+ return self._step_terminate(
2726
+ e.args[1], tool_call_records, "max_tokens_exceeded"
2727
+ )
2728
+ # Get response from model backend with token limit error handling
2729
+ try:
2730
+ response = self._get_model_response(
2731
+ openai_messages,
2732
+ num_tokens=num_tokens,
2733
+ current_iteration=iteration_count,
2734
+ response_format=response_format,
2735
+ tool_schemas=[]
2736
+ if disable_tools
2737
+ else self._get_full_tool_schemas(),
2738
+ prev_num_openai_messages=prev_num_openai_messages,
2739
+ )
2740
+ except Exception as exc:
2741
+ logger.exception("Model error: %s", exc)
2742
+
2743
+ if self._is_token_limit_error(exc):
2744
+ tool_signature = self._last_tool_call_signature
2745
+ if (
2746
+ tool_signature is not None
2747
+ and tool_signature
2748
+ == self._last_token_limit_tool_signature
2749
+ ):
2750
+ description = self._describe_tool_call(
2751
+ self._last_tool_call_record
2752
+ )
2753
+ repeated_msg = (
2754
+ "Context exceeded again by the same tool call."
2755
+ )
2756
+ if description:
2757
+ repeated_msg += f" {description}"
2758
+ raise RuntimeError(repeated_msg) from exc
2759
+
2760
+ user_message_count = sum(
2761
+ 1
2762
+ for msg in openai_messages
2763
+ if getattr(msg, "role", None) == "user"
2764
+ )
2765
+ if (
2766
+ user_message_count == 1
2767
+ and getattr(openai_messages[-1], "role", None)
2768
+ == "user"
2769
+ ):
2770
+ raise RuntimeError(
2771
+ "The provided user input alone exceeds the "
2772
+ "context window. Please shorten the input."
2773
+ ) from exc
2774
+
2775
+ logger.warning(
2776
+ "Token limit exceeded error detected. "
2777
+ "Summarizing context."
2778
+ )
2779
+
2780
+ recent_records: List[ContextRecord]
2781
+ try:
2782
+ recent_records = self.memory.retrieve()
2783
+ except Exception: # pragma: no cover - defensive guard
2784
+ recent_records = []
1398
2785
 
1399
- # Add user input to memory
1400
- self.update_memory(input_message, OpenAIBackendRole.USER)
2786
+ indices_to_remove = (
2787
+ self._find_indices_to_remove_for_last_tool_pair(
2788
+ recent_records
2789
+ )
2790
+ )
2791
+ self.memory.remove_records_by_indices(indices_to_remove)
1401
2792
 
1402
- tool_call_records: List[ToolCallingRecord] = []
1403
- external_tool_call_requests: Optional[List[ToolCallRequest]] = None
2793
+ summary = self.summarize(include_summaries=False)
2794
+ tool_notice = self._format_tool_limit_notice()
2795
+ summary_messages = summary.get("summary", "")
1404
2796
 
1405
- accumulated_context_tokens = (
1406
- 0 # This tracks cumulative context tokens, not API usage tokens
1407
- )
2797
+ if tool_notice:
2798
+ summary_messages += "\n\n" + tool_notice
1408
2799
 
1409
- # Initialize token usage tracker
1410
- step_token_usage = self._create_token_usage_tracker()
1411
- iteration_count: int = 0
1412
- prev_num_openai_messages: int = 0
2800
+ self._update_memory_with_summary(
2801
+ summary_messages, include_summaries=False
2802
+ )
2803
+ self._last_token_limit_tool_signature = tool_signature
2804
+ return self._step_impl(input_message, response_format)
1413
2805
 
1414
- while True:
1415
- if self.pause_event is not None and not self.pause_event.is_set():
1416
- while not self.pause_event.is_set():
1417
- time.sleep(0.001)
2806
+ raise
1418
2807
 
1419
- try:
1420
- openai_messages, num_tokens = self.memory.get_context()
1421
- accumulated_context_tokens += num_tokens
1422
- except RuntimeError as e:
1423
- return self._step_terminate(
1424
- e.args[1], tool_call_records, "max_tokens_exceeded"
1425
- )
1426
- # Get response from model backend
1427
- response = self._get_model_response(
1428
- openai_messages,
1429
- num_tokens=num_tokens,
1430
- current_iteration=iteration_count,
1431
- response_format=response_format,
1432
- tool_schemas=self._get_full_tool_schemas(),
1433
- prev_num_openai_messages=prev_num_openai_messages,
1434
- )
1435
2808
  prev_num_openai_messages = len(openai_messages)
1436
2809
  iteration_count += 1
1437
2810
 
@@ -1444,7 +2817,7 @@ class ChatAgent(BaseAgent):
1444
2817
  if self.stop_event and self.stop_event.is_set():
1445
2818
  # Use the _step_terminate to terminate the agent with reason
1446
2819
  logger.info(
1447
- f"Termination triggered at iteration " f"{iteration_count}"
2820
+ f"Termination triggered at iteration {iteration_count}"
1448
2821
  )
1449
2822
  return self._step_terminate(
1450
2823
  accumulated_context_tokens,
@@ -1467,8 +2840,11 @@ class ChatAgent(BaseAgent):
1467
2840
  self.pause_event is not None
1468
2841
  and not self.pause_event.is_set()
1469
2842
  ):
1470
- while not self.pause_event.is_set():
1471
- time.sleep(0.001)
2843
+ if isinstance(self.pause_event, threading.Event):
2844
+ self.pause_event.wait()
2845
+ else:
2846
+ while not self.pause_event.is_set():
2847
+ time.sleep(0.001)
1472
2848
  result = self._execute_tool(tool_call_request)
1473
2849
  tool_call_records.append(result)
1474
2850
 
@@ -1544,6 +2920,10 @@ class ChatAgent(BaseAgent):
1544
2920
  True, returns an AsyncStreamingChatAgentResponse that can be
1545
2921
  awaited for the final result or async iterated for streaming
1546
2922
  updates.
2923
+
2924
+ Raises:
2925
+ asyncio.TimeoutError: If the step operation exceeds the configured
2926
+ timeout.
1547
2927
  """
1548
2928
 
1549
2929
  try:
@@ -1559,9 +2939,22 @@ class ChatAgent(BaseAgent):
1559
2939
  async_generator = self._astream(input_message, response_format)
1560
2940
  return AsyncStreamingChatAgentResponse(async_generator)
1561
2941
  else:
1562
- return await self._astep_non_streaming_task(
1563
- input_message, response_format
1564
- )
2942
+ if self.step_timeout is not None:
2943
+ try:
2944
+ return await asyncio.wait_for(
2945
+ self._astep_non_streaming_task(
2946
+ input_message, response_format
2947
+ ),
2948
+ timeout=self.step_timeout,
2949
+ )
2950
+ except asyncio.TimeoutError:
2951
+ raise asyncio.TimeoutError(
2952
+ f"Async step timed out after {self.step_timeout}s"
2953
+ )
2954
+ else:
2955
+ return await self._astep_non_streaming_task(
2956
+ input_message, response_format
2957
+ )
1565
2958
 
1566
2959
  async def _astep_non_streaming_task(
1567
2960
  self,
@@ -1577,6 +2970,10 @@ class ChatAgent(BaseAgent):
1577
2970
  except ImportError:
1578
2971
  pass # Langfuse not available
1579
2972
 
2973
+ # Check if this call is from a RegisteredAgentToolkit to prevent tool
2974
+ # use
2975
+ disable_tools = self._is_called_from_registered_toolkit()
2976
+
1580
2977
  # Handle response format compatibility with non-strict tools
1581
2978
  original_response_format = response_format
1582
2979
  input_message, response_format, used_prompt_formatting = (
@@ -1602,25 +2999,139 @@ class ChatAgent(BaseAgent):
1602
2999
  step_token_usage = self._create_token_usage_tracker()
1603
3000
  iteration_count: int = 0
1604
3001
  prev_num_openai_messages: int = 0
3002
+
1605
3003
  while True:
1606
3004
  if self.pause_event is not None and not self.pause_event.is_set():
1607
- await self.pause_event.wait()
3005
+ if isinstance(self.pause_event, asyncio.Event):
3006
+ await self.pause_event.wait()
3007
+ elif isinstance(self.pause_event, threading.Event):
3008
+ # For threading.Event in async context, run in executor
3009
+ loop = asyncio.get_event_loop()
3010
+ await loop.run_in_executor(None, self.pause_event.wait)
1608
3011
  try:
1609
3012
  openai_messages, num_tokens = self.memory.get_context()
3013
+ if self.summarize_threshold is not None:
3014
+ threshold = self._calculate_next_summary_threshold()
3015
+ summary_token_count = self._summary_token_count
3016
+ token_limit = self.model_backend.token_limit
3017
+
3018
+ if num_tokens <= token_limit:
3019
+ if (
3020
+ summary_token_count
3021
+ > token_limit * self.summary_window_ratio
3022
+ ):
3023
+ logger.info(
3024
+ f"Summary tokens ({summary_token_count}) "
3025
+ f"exceed limit, full compression."
3026
+ )
3027
+ # Summarize everything (including summaries)
3028
+ summary = await self.asummarize(
3029
+ include_summaries=True
3030
+ )
3031
+ self._update_memory_with_summary(
3032
+ summary.get("summary", ""),
3033
+ include_summaries=True,
3034
+ )
3035
+ elif num_tokens > threshold:
3036
+ logger.info(
3037
+ f"Token count ({num_tokens}) exceed threshold "
3038
+ "({threshold}). Triggering summarization."
3039
+ )
3040
+ # Only summarize non-summary content
3041
+ summary = await self.asummarize(
3042
+ include_summaries=False
3043
+ )
3044
+ self._update_memory_with_summary(
3045
+ summary.get("summary", ""),
3046
+ include_summaries=False,
3047
+ )
1610
3048
  accumulated_context_tokens += num_tokens
1611
3049
  except RuntimeError as e:
1612
3050
  return self._step_terminate(
1613
3051
  e.args[1], tool_call_records, "max_tokens_exceeded"
1614
3052
  )
3053
+ # Get response from model backend with token limit error handling
3054
+ try:
3055
+ response = await self._aget_model_response(
3056
+ openai_messages,
3057
+ num_tokens=num_tokens,
3058
+ current_iteration=iteration_count,
3059
+ response_format=response_format,
3060
+ tool_schemas=[]
3061
+ if disable_tools
3062
+ else self._get_full_tool_schemas(),
3063
+ prev_num_openai_messages=prev_num_openai_messages,
3064
+ )
3065
+ except Exception as exc:
3066
+ logger.exception("Model error: %s", exc)
3067
+
3068
+ if self._is_token_limit_error(exc):
3069
+ tool_signature = self._last_tool_call_signature
3070
+ if (
3071
+ tool_signature is not None
3072
+ and tool_signature
3073
+ == self._last_token_limit_tool_signature
3074
+ ):
3075
+ description = self._describe_tool_call(
3076
+ self._last_tool_call_record
3077
+ )
3078
+ repeated_msg = (
3079
+ "Context exceeded again by the same tool call."
3080
+ )
3081
+ if description:
3082
+ repeated_msg += f" {description}"
3083
+ raise RuntimeError(repeated_msg) from exc
3084
+
3085
+ user_message_count = sum(
3086
+ 1
3087
+ for msg in openai_messages
3088
+ if getattr(msg, "role", None) == "user"
3089
+ )
3090
+ if (
3091
+ user_message_count == 1
3092
+ and getattr(openai_messages[-1], "role", None)
3093
+ == "user"
3094
+ ):
3095
+ raise RuntimeError(
3096
+ "The provided user input alone exceeds the"
3097
+ "context window. Please shorten the input."
3098
+ ) from exc
3099
+
3100
+ logger.warning(
3101
+ "Token limit exceeded error detected. "
3102
+ "Summarizing context."
3103
+ )
3104
+
3105
+ recent_records: List[ContextRecord]
3106
+ try:
3107
+ recent_records = self.memory.retrieve()
3108
+ except Exception: # pragma: no cover - defensive guard
3109
+ recent_records = []
3110
+
3111
+ indices_to_remove = (
3112
+ self._find_indices_to_remove_for_last_tool_pair(
3113
+ recent_records
3114
+ )
3115
+ )
3116
+ self.memory.remove_records_by_indices(indices_to_remove)
3117
+
3118
+ summary = await self.asummarize()
3119
+
3120
+ tool_notice = self._format_tool_limit_notice()
3121
+ summary_messages = summary.get("summary", "")
3122
+
3123
+ if tool_notice:
3124
+ summary_messages += "\n\n" + tool_notice
3125
+ self._update_memory_with_summary(
3126
+ summary_messages, include_summaries=False
3127
+ )
3128
+ self._last_token_limit_tool_signature = tool_signature
3129
+ return await self._astep_non_streaming_task(
3130
+ input_message, response_format
3131
+ )
3132
+
3133
+ raise
1615
3134
 
1616
- response = await self._aget_model_response(
1617
- openai_messages,
1618
- num_tokens=num_tokens,
1619
- current_iteration=iteration_count,
1620
- response_format=response_format,
1621
- tool_schemas=self._get_full_tool_schemas(),
1622
- prev_num_openai_messages=prev_num_openai_messages,
1623
- )
1624
3135
  prev_num_openai_messages = len(openai_messages)
1625
3136
  iteration_count += 1
1626
3137
 
@@ -1633,7 +3144,7 @@ class ChatAgent(BaseAgent):
1633
3144
  if self.stop_event and self.stop_event.is_set():
1634
3145
  # Use the _step_terminate to terminate the agent with reason
1635
3146
  logger.info(
1636
- f"Termination triggered at iteration " f"{iteration_count}"
3147
+ f"Termination triggered at iteration {iteration_count}"
1637
3148
  )
1638
3149
  return self._step_terminate(
1639
3150
  accumulated_context_tokens,
@@ -1656,7 +3167,13 @@ class ChatAgent(BaseAgent):
1656
3167
  self.pause_event is not None
1657
3168
  and not self.pause_event.is_set()
1658
3169
  ):
1659
- await self.pause_event.wait()
3170
+ if isinstance(self.pause_event, asyncio.Event):
3171
+ await self.pause_event.wait()
3172
+ elif isinstance(self.pause_event, threading.Event):
3173
+ loop = asyncio.get_event_loop()
3174
+ await loop.run_in_executor(
3175
+ None, self.pause_event.wait
3176
+ )
1660
3177
  tool_call_record = await self._aexecute_tool(
1661
3178
  tool_call_request
1662
3179
  )
@@ -1691,6 +3208,8 @@ class ChatAgent(BaseAgent):
1691
3208
  if self.prune_tool_calls_from_memory and tool_call_records:
1692
3209
  self.memory.clean_tool_calls()
1693
3210
 
3211
+ self._last_token_limit_user_signature = None
3212
+
1694
3213
  return self._convert_to_chatagent_response(
1695
3214
  response,
1696
3215
  tool_call_records,
@@ -1776,64 +3295,62 @@ class ChatAgent(BaseAgent):
1776
3295
  tool_schemas: Optional[List[Dict[str, Any]]] = None,
1777
3296
  prev_num_openai_messages: int = 0,
1778
3297
  ) -> ModelResponse:
1779
- r"""Internal function for agent step model response.
1780
- Args:
1781
- openai_messages (List[OpenAIMessage]): The OpenAI
1782
- messages to process.
1783
- num_tokens (int): The number of tokens in the context.
1784
- current_iteration (int): The current iteration of the step.
1785
- response_format (Optional[Type[BaseModel]]): The response
1786
- format to use.
1787
- tool_schemas (Optional[List[Dict[str, Any]]]): The tool
1788
- schemas to use.
1789
- prev_num_openai_messages (int): The number of openai messages
1790
- logged in the previous iteration.
1791
-
1792
- Returns:
1793
- ModelResponse: The model response.
1794
- """
3298
+ r"""Internal function for agent step model response."""
3299
+ last_error = None
1795
3300
 
1796
- response = None
1797
- try:
1798
- response = self.model_backend.run(
1799
- openai_messages, response_format, tool_schemas or None
1800
- )
1801
- except Exception as exc:
1802
- logger.error(
1803
- f"An error occurred while running model "
1804
- f"{self.model_backend.model_type}, "
1805
- f"index: {self.model_backend.current_model_index}",
1806
- exc_info=exc,
1807
- )
1808
- error_info = str(exc)
1809
-
1810
- if not response and self.model_backend.num_models > 1:
1811
- raise ModelProcessingError(
1812
- "Unable to process messages: none of the provided models "
1813
- "run successfully."
1814
- )
1815
- elif not response:
3301
+ for attempt in range(self.retry_attempts):
3302
+ try:
3303
+ response = self.model_backend.run(
3304
+ openai_messages, response_format, tool_schemas or None
3305
+ )
3306
+ if response:
3307
+ break
3308
+ except RateLimitError as e:
3309
+ if self._is_token_limit_error(e):
3310
+ raise
3311
+ last_error = e
3312
+ if attempt < self.retry_attempts - 1:
3313
+ delay = min(self.retry_delay * (2**attempt), 60.0)
3314
+ delay = random.uniform(0, delay) # Add jitter
3315
+ logger.warning(
3316
+ f"Rate limit hit (attempt {attempt + 1}"
3317
+ f"/{self.retry_attempts}). Retrying in {delay:.1f}s"
3318
+ )
3319
+ time.sleep(delay)
3320
+ else:
3321
+ logger.error(
3322
+ f"Rate limit exhausted after "
3323
+ f"{self.retry_attempts} attempts"
3324
+ )
3325
+ except Exception:
3326
+ logger.error(
3327
+ f"Model error: {self.model_backend.model_type}",
3328
+ )
3329
+ raise
3330
+ else:
3331
+ # Loop completed without success
1816
3332
  raise ModelProcessingError(
1817
- f"Unable to process messages: the only provided model "
1818
- f"did not run successfully. Error: {error_info}"
3333
+ f"Unable to process messages: "
3334
+ f"{str(last_error) if last_error else 'Unknown error'}"
1819
3335
  )
1820
3336
 
1821
- sanitized_messages = self._sanitize_messages_for_logging(
3337
+ # Log success
3338
+ sanitized = self._sanitize_messages_for_logging(
1822
3339
  openai_messages, prev_num_openai_messages
1823
3340
  )
1824
3341
  logger.info(
1825
- f"Model {self.model_backend.model_type}, "
1826
- f"index {self.model_backend.current_model_index}, "
1827
- f"iteration {current_iteration}, "
1828
- f"processed these messages: {sanitized_messages}"
3342
+ f"Model {self.model_backend.model_type} "
3343
+ f"[{current_iteration}]: {sanitized}"
1829
3344
  )
3345
+
1830
3346
  if not isinstance(response, ChatCompletion):
1831
3347
  raise TypeError(
1832
- f"Expected response to be a `ChatCompletion` object, but "
1833
- f"got {type(response).__name__} instead."
3348
+ f"Expected ChatCompletion, got {type(response).__name__}"
1834
3349
  )
3350
+
1835
3351
  return self._handle_batch_response(response)
1836
3352
 
3353
+ @observe()
1837
3354
  async def _aget_model_response(
1838
3355
  self,
1839
3356
  openai_messages: List[OpenAIMessage],
@@ -1843,62 +3360,61 @@ class ChatAgent(BaseAgent):
1843
3360
  tool_schemas: Optional[List[Dict[str, Any]]] = None,
1844
3361
  prev_num_openai_messages: int = 0,
1845
3362
  ) -> ModelResponse:
1846
- r"""Internal function for agent async step model response.
1847
- Args:
1848
- openai_messages (List[OpenAIMessage]): The OpenAI messages
1849
- to process.
1850
- num_tokens (int): The number of tokens in the context.
1851
- current_iteration (int): The current iteration of the step.
1852
- response_format (Optional[Type[BaseModel]]): The response
1853
- format to use.
1854
- tool_schemas (Optional[List[Dict[str, Any]]]): The tool schemas
1855
- to use.
1856
- prev_num_openai_messages (int): The number of openai messages
1857
- logged in the previous iteration.
1858
-
1859
- Returns:
1860
- ModelResponse: The model response.
1861
- """
1862
-
1863
- response = None
1864
- try:
1865
- response = await self.model_backend.arun(
1866
- openai_messages, response_format, tool_schemas or None
1867
- )
1868
- except Exception as exc:
1869
- logger.error(
1870
- f"An error occurred while running model "
1871
- f"{self.model_backend.model_type}, "
1872
- f"index: {self.model_backend.current_model_index}",
1873
- exc_info=exc,
1874
- )
1875
- error_info = str(exc)
3363
+ r"""Internal function for agent async step model response."""
3364
+ last_error = None
1876
3365
 
1877
- if not response and self.model_backend.num_models > 1:
1878
- raise ModelProcessingError(
1879
- "Unable to process messages: none of the provided models "
1880
- "run successfully."
1881
- )
1882
- elif not response:
3366
+ for attempt in range(self.retry_attempts):
3367
+ try:
3368
+ response = await self.model_backend.arun(
3369
+ openai_messages, response_format, tool_schemas or None
3370
+ )
3371
+ if response:
3372
+ break
3373
+ except RateLimitError as e:
3374
+ if self._is_token_limit_error(e):
3375
+ raise
3376
+ last_error = e
3377
+ if attempt < self.retry_attempts - 1:
3378
+ delay = min(self.retry_delay * (2**attempt), 60.0)
3379
+ delay = random.uniform(0, delay) # Add jitter
3380
+ logger.warning(
3381
+ f"Rate limit hit (attempt {attempt + 1}"
3382
+ f"/{self.retry_attempts}). "
3383
+ f"Retrying in {delay:.1f}s"
3384
+ )
3385
+ await asyncio.sleep(delay)
3386
+ else:
3387
+ logger.error(
3388
+ f"Rate limit exhausted after "
3389
+ f"{self.retry_attempts} attempts"
3390
+ )
3391
+ except Exception:
3392
+ logger.error(
3393
+ f"Model error: {self.model_backend.model_type}",
3394
+ exc_info=True,
3395
+ )
3396
+ raise
3397
+ else:
3398
+ # Loop completed without success
1883
3399
  raise ModelProcessingError(
1884
- f"Unable to process messages: the only provided model "
1885
- f"did not run successfully. Error: {error_info}"
3400
+ f"Unable to process messages: "
3401
+ f"{str(last_error) if last_error else 'Unknown error'}"
1886
3402
  )
1887
3403
 
1888
- sanitized_messages = self._sanitize_messages_for_logging(
3404
+ # Log success
3405
+ sanitized = self._sanitize_messages_for_logging(
1889
3406
  openai_messages, prev_num_openai_messages
1890
3407
  )
1891
3408
  logger.info(
1892
- f"Model {self.model_backend.model_type}, "
1893
- f"index {self.model_backend.current_model_index}, "
1894
- f"iteration {current_iteration}, "
1895
- f"processed these messages: {sanitized_messages}"
3409
+ f"Model {self.model_backend.model_type} "
3410
+ f"[{current_iteration}]: {sanitized}"
1896
3411
  )
3412
+
1897
3413
  if not isinstance(response, ChatCompletion):
1898
3414
  raise TypeError(
1899
- f"Expected response to be a `ChatCompletion` object, but "
1900
- f"got {type(response).__name__} instead."
3415
+ f"Expected ChatCompletion, got {type(response).__name__}"
1901
3416
  )
3417
+
1902
3418
  return self._handle_batch_response(response)
1903
3419
 
1904
3420
  def _sanitize_messages_for_logging(
@@ -1915,11 +3431,6 @@ class ChatAgent(BaseAgent):
1915
3431
  Returns:
1916
3432
  List[OpenAIMessage]: The sanitized OpenAI messages.
1917
3433
  """
1918
- import hashlib
1919
- import os
1920
- import re
1921
- import tempfile
1922
-
1923
3434
  # Create a copy of messages for logging to avoid modifying the
1924
3435
  # original messages
1925
3436
  sanitized_messages = []
@@ -1960,7 +3471,14 @@ class ChatAgent(BaseAgent):
1960
3471
 
1961
3472
  # Save image to temp directory for viewing
1962
3473
  try:
1963
- import base64
3474
+ # Sanitize img_format to prevent path
3475
+ # traversal
3476
+ safe_format = re.sub(
3477
+ r'[^a-zA-Z0-9]', '', img_format
3478
+ )[:10]
3479
+ img_filename = (
3480
+ f"image_{img_hash}.{safe_format}"
3481
+ )
1964
3482
 
1965
3483
  temp_dir = tempfile.gettempdir()
1966
3484
  img_path = os.path.join(
@@ -1975,6 +3493,9 @@ class ChatAgent(BaseAgent):
1975
3493
  base64_data
1976
3494
  )
1977
3495
  )
3496
+ # Register for cleanup
3497
+ with _temp_files_lock:
3498
+ _temp_files.add(img_path)
1978
3499
 
1979
3500
  # Create a file:// URL that can be
1980
3501
  # opened
@@ -2148,9 +3669,9 @@ class ChatAgent(BaseAgent):
2148
3669
  if tool_calls := response.choices[0].message.tool_calls:
2149
3670
  tool_call_requests = []
2150
3671
  for tool_call in tool_calls:
2151
- tool_name = tool_call.function.name
3672
+ tool_name = tool_call.function.name # type: ignore[union-attr]
2152
3673
  tool_call_id = tool_call.id
2153
- args = json.loads(tool_call.function.arguments)
3674
+ args = json.loads(tool_call.function.arguments) # type: ignore[union-attr]
2154
3675
  tool_call_request = ToolCallRequest(
2155
3676
  tool_name=tool_name, args=args, tool_call_id=tool_call_id
2156
3677
  )
@@ -2227,7 +3748,8 @@ class ChatAgent(BaseAgent):
2227
3748
  try:
2228
3749
  raw_result = tool(**args)
2229
3750
  if self.mask_tool_output:
2230
- self._secure_result_store[tool_call_id] = raw_result
3751
+ with self._secure_result_store_lock:
3752
+ self._secure_result_store[tool_call_id] = raw_result
2231
3753
  result = (
2232
3754
  "[The tool has been executed successfully, but the output"
2233
3755
  " from the tool is masked. You can move forward]"
@@ -2285,7 +3807,7 @@ class ChatAgent(BaseAgent):
2285
3807
  # Capture the error message to prevent framework crash
2286
3808
  error_msg = f"Error executing async tool '{func_name}': {e!s}"
2287
3809
  result = f"Tool execution failed: {error_msg}"
2288
- logging.warning(error_msg)
3810
+ logger.warning(error_msg)
2289
3811
  return self._record_tool_calling(func_name, args, result, tool_call_id)
2290
3812
 
2291
3813
  def _record_tool_calling(
@@ -2336,22 +3858,34 @@ class ChatAgent(BaseAgent):
2336
3858
  # This ensures the assistant message (tool call) always appears before
2337
3859
  # the function message (tool result) in the conversation context
2338
3860
  # Use time.time_ns() for nanosecond precision to avoid collisions
2339
- import time
2340
-
2341
3861
  current_time_ns = time.time_ns()
2342
3862
  base_timestamp = current_time_ns / 1_000_000_000 # Convert to seconds
2343
3863
 
2344
3864
  self.update_memory(
2345
- assist_msg, OpenAIBackendRole.ASSISTANT, timestamp=base_timestamp
3865
+ assist_msg,
3866
+ OpenAIBackendRole.ASSISTANT,
3867
+ timestamp=base_timestamp,
3868
+ return_records=self._enable_snapshot_clean,
2346
3869
  )
2347
3870
 
2348
3871
  # Add minimal increment to ensure function message comes after
2349
- self.update_memory(
3872
+ func_records = self.update_memory(
2350
3873
  func_msg,
2351
3874
  OpenAIBackendRole.FUNCTION,
2352
3875
  timestamp=base_timestamp + 1e-6,
3876
+ return_records=self._enable_snapshot_clean,
2353
3877
  )
2354
3878
 
3879
+ # Register tool output for snapshot cleaning if enabled
3880
+ if self._enable_snapshot_clean and not mask_output and func_records:
3881
+ serialized_result = self._serialize_tool_result(result)
3882
+ self._register_tool_output_for_cache(
3883
+ func_name,
3884
+ tool_call_id,
3885
+ serialized_result,
3886
+ cast(List[MemoryRecord], func_records),
3887
+ )
3888
+
2355
3889
  # Record information about this tool call
2356
3890
  tool_record = ToolCallingRecord(
2357
3891
  tool_name=func_name,
@@ -2360,6 +3894,7 @@ class ChatAgent(BaseAgent):
2360
3894
  tool_call_id=tool_call_id,
2361
3895
  )
2362
3896
 
3897
+ self._update_last_tool_call_state(tool_record)
2363
3898
  return tool_record
2364
3899
 
2365
3900
  def _stream(
@@ -2428,7 +3963,7 @@ class ChatAgent(BaseAgent):
2428
3963
  # Check termination condition
2429
3964
  if self.stop_event and self.stop_event.is_set():
2430
3965
  logger.info(
2431
- f"Termination triggered at iteration " f"{iteration_count}"
3966
+ f"Termination triggered at iteration {iteration_count}"
2432
3967
  )
2433
3968
  yield self._step_terminate(
2434
3969
  num_tokens, tool_call_records, "termination_triggered"
@@ -2611,12 +4146,6 @@ class ChatAgent(BaseAgent):
2611
4146
  stream_completed = False
2612
4147
 
2613
4148
  for chunk in stream:
2614
- # Update token usage if available
2615
- if chunk.usage:
2616
- self._update_token_usage_tracker(
2617
- step_token_usage, safe_model_dump(chunk.usage)
2618
- )
2619
-
2620
4149
  # Process chunk delta
2621
4150
  if chunk.choices and len(chunk.choices) > 0:
2622
4151
  choice = chunk.choices[0]
@@ -2649,12 +4178,6 @@ class ChatAgent(BaseAgent):
2649
4178
  # If we have complete tool calls, execute them with
2650
4179
  # sync status updates
2651
4180
  if accumulated_tool_calls:
2652
- # Record assistant message with tool calls first
2653
- self._record_assistant_tool_calls_message(
2654
- accumulated_tool_calls,
2655
- content_accumulator.get_full_content(),
2656
- )
2657
-
2658
4181
  # Execute tools synchronously with
2659
4182
  # optimized status updates
2660
4183
  for (
@@ -2687,7 +4210,49 @@ class ChatAgent(BaseAgent):
2687
4210
  )
2688
4211
 
2689
4212
  self.record_message(final_message)
2690
- break
4213
+ elif chunk.usage and not chunk.choices:
4214
+ # Handle final chunk with usage but empty choices
4215
+ # This happens when stream_options={"include_usage": True}
4216
+ # Update the final usage from this chunk
4217
+ self._update_token_usage_tracker(
4218
+ step_token_usage, safe_model_dump(chunk.usage)
4219
+ )
4220
+
4221
+ # Create final response with final usage
4222
+ final_content = content_accumulator.get_full_content()
4223
+ if final_content.strip():
4224
+ final_message = BaseMessage(
4225
+ role_name=self.role_name,
4226
+ role_type=self.role_type,
4227
+ meta_dict={},
4228
+ content=final_content,
4229
+ )
4230
+
4231
+ if response_format:
4232
+ self._try_format_message(
4233
+ final_message, response_format
4234
+ )
4235
+
4236
+ # Create final response with final usage (not partial)
4237
+ final_response = ChatAgentResponse(
4238
+ msgs=[final_message],
4239
+ terminated=False,
4240
+ info={
4241
+ "id": getattr(chunk, 'id', ''),
4242
+ "usage": step_token_usage.copy(),
4243
+ "finish_reasons": ["stop"],
4244
+ "num_tokens": self._get_token_count(final_content),
4245
+ "tool_calls": tool_call_records or [],
4246
+ "external_tool_requests": None,
4247
+ "streaming": False,
4248
+ "partial": False,
4249
+ },
4250
+ )
4251
+ yield final_response
4252
+ break
4253
+ elif stream_completed:
4254
+ # If we've already seen finish_reason but no usage chunk, exit
4255
+ break
2691
4256
 
2692
4257
  return stream_completed, tool_calls_complete
2693
4258
 
@@ -2767,77 +4332,70 @@ class ChatAgent(BaseAgent):
2767
4332
  accumulated_tool_calls: Dict[str, Any],
2768
4333
  tool_call_records: List[ToolCallingRecord],
2769
4334
  ) -> Generator[ChatAgentResponse, None, None]:
2770
- r"""Execute multiple tools synchronously with
2771
- proper content accumulation, using threads+queue for
2772
- non-blocking status streaming."""
2773
-
2774
- def tool_worker(result_queue, tool_call_data):
2775
- try:
2776
- tool_call_record = self._execute_tool_from_stream_data(
2777
- tool_call_data
2778
- )
2779
- result_queue.put(tool_call_record)
2780
- except Exception as e:
2781
- logger.error(f"Error in threaded tool execution: {e}")
2782
- result_queue.put(None)
4335
+ r"""Execute multiple tools synchronously with proper content
4336
+ accumulation, using ThreadPoolExecutor for better timeout handling."""
2783
4337
 
2784
4338
  tool_calls_to_execute = []
2785
4339
  for _tool_call_index, tool_call_data in accumulated_tool_calls.items():
2786
4340
  if tool_call_data.get('complete', False):
2787
4341
  tool_calls_to_execute.append(tool_call_data)
2788
4342
 
2789
- # Phase 2: Execute tools in threads and yield status while waiting
2790
- for tool_call_data in tool_calls_to_execute:
2791
- function_name = tool_call_data['function']['name']
2792
- try:
2793
- args = json.loads(tool_call_data['function']['arguments'])
2794
- except json.JSONDecodeError:
2795
- args = tool_call_data['function']['arguments']
2796
- result_queue: queue.Queue[Optional[ToolCallingRecord]] = (
2797
- queue.Queue()
2798
- )
2799
- thread = threading.Thread(
2800
- target=tool_worker,
2801
- args=(
2802
- self._internal_tools[function_name],
2803
- args,
2804
- result_queue,
2805
- tool_call_data,
2806
- ),
2807
- )
2808
- thread.start()
2809
-
2810
- # Log debug info instead of adding to content
2811
- logger.info(
2812
- f"Calling function: {function_name} with arguments: {args}"
2813
- )
2814
-
2815
- # wait for tool thread to finish with optional timeout
2816
- thread.join(self.tool_execution_timeout)
4343
+ if not tool_calls_to_execute:
4344
+ # No tools to execute, return immediately
4345
+ return
4346
+ yield # Make this a generator
4347
+
4348
+ # Execute tools using ThreadPoolExecutor for proper timeout handling
4349
+ # Use max_workers=len() for parallel execution, with min of 1
4350
+ with concurrent.futures.ThreadPoolExecutor(
4351
+ max_workers=max(1, len(tool_calls_to_execute))
4352
+ ) as executor:
4353
+ # Submit all tools first (parallel execution)
4354
+ futures_map = {}
4355
+ for tool_call_data in tool_calls_to_execute:
4356
+ function_name = tool_call_data['function']['name']
4357
+ try:
4358
+ args = json.loads(tool_call_data['function']['arguments'])
4359
+ except json.JSONDecodeError:
4360
+ args = tool_call_data['function']['arguments']
2817
4361
 
2818
- # If timeout occurred, mark as error and continue
2819
- if thread.is_alive():
2820
- # Log timeout info instead of adding to content
2821
- logger.warning(
2822
- f"Function '{function_name}' timed out after "
2823
- f"{self.tool_execution_timeout} seconds"
4362
+ # Log debug info
4363
+ logger.info(
4364
+ f"Calling function: {function_name} with arguments: {args}"
2824
4365
  )
2825
4366
 
2826
- # Detach thread (it may still finish later). Skip recording.
2827
- continue
2828
-
2829
- # Tool finished, get result
2830
- tool_call_record = result_queue.get()
2831
- if tool_call_record:
2832
- tool_call_records.append(tool_call_record)
2833
- raw_result = tool_call_record.result
2834
- result_str = str(raw_result)
4367
+ # Submit tool execution (non-blocking)
4368
+ future = executor.submit(
4369
+ self._execute_tool_from_stream_data, tool_call_data
4370
+ )
4371
+ futures_map[future] = (function_name, tool_call_data)
4372
+
4373
+ # Wait for all futures to complete (or timeout)
4374
+ for future in concurrent.futures.as_completed(
4375
+ futures_map.keys(),
4376
+ timeout=self.tool_execution_timeout
4377
+ if self.tool_execution_timeout
4378
+ else None,
4379
+ ):
4380
+ function_name, tool_call_data = futures_map[future]
2835
4381
 
2836
- # Log debug info instead of adding to content
2837
- logger.info(f"Function output: {result_str}")
2838
- else:
2839
- # Error already logged
2840
- continue
4382
+ try:
4383
+ tool_call_record = future.result()
4384
+ if tool_call_record:
4385
+ tool_call_records.append(tool_call_record)
4386
+ logger.info(
4387
+ f"Function output: {tool_call_record.result}"
4388
+ )
4389
+ except concurrent.futures.TimeoutError:
4390
+ logger.warning(
4391
+ f"Function '{function_name}' timed out after "
4392
+ f"{self.tool_execution_timeout} seconds"
4393
+ )
4394
+ future.cancel()
4395
+ except Exception as e:
4396
+ logger.error(
4397
+ f"Error executing tool '{function_name}': {e}"
4398
+ )
2841
4399
 
2842
4400
  # Ensure this function remains a generator (required by type signature)
2843
4401
  return
@@ -2857,10 +4415,19 @@ class ChatAgent(BaseAgent):
2857
4415
  tool = self._internal_tools[function_name]
2858
4416
  try:
2859
4417
  result = tool(**args)
4418
+ # First, create and record the assistant message with tool
4419
+ # call
4420
+ assist_msg = FunctionCallingMessage(
4421
+ role_name=self.role_name,
4422
+ role_type=self.role_type,
4423
+ meta_dict=None,
4424
+ content="",
4425
+ func_name=function_name,
4426
+ args=args,
4427
+ tool_call_id=tool_call_id,
4428
+ )
2860
4429
 
2861
- # Only record the tool response message, not the assistant
2862
- # message assistant message with tool_calls was already
2863
- # recorded in _record_assistant_tool_calls_message
4430
+ # Then create the tool response message
2864
4431
  func_msg = FunctionCallingMessage(
2865
4432
  role_name=self.role_name,
2866
4433
  role_type=self.role_type,
@@ -2871,21 +4438,39 @@ class ChatAgent(BaseAgent):
2871
4438
  tool_call_id=tool_call_id,
2872
4439
  )
2873
4440
 
2874
- self.update_memory(func_msg, OpenAIBackendRole.FUNCTION)
4441
+ # Record both messages with precise timestamps to ensure
4442
+ # correct ordering
4443
+ current_time_ns = time.time_ns()
4444
+ base_timestamp = (
4445
+ current_time_ns / 1_000_000_000
4446
+ ) # Convert to seconds
4447
+
4448
+ self.update_memory(
4449
+ assist_msg,
4450
+ OpenAIBackendRole.ASSISTANT,
4451
+ timestamp=base_timestamp,
4452
+ )
4453
+ self.update_memory(
4454
+ func_msg,
4455
+ OpenAIBackendRole.FUNCTION,
4456
+ timestamp=base_timestamp + 1e-6,
4457
+ )
2875
4458
 
2876
- return ToolCallingRecord(
4459
+ tool_record = ToolCallingRecord(
2877
4460
  tool_name=function_name,
2878
4461
  args=args,
2879
4462
  result=result,
2880
4463
  tool_call_id=tool_call_id,
2881
4464
  )
4465
+ self._update_last_tool_call_state(tool_record)
4466
+ return tool_record
2882
4467
 
2883
4468
  except Exception as e:
2884
4469
  error_msg = (
2885
4470
  f"Error executing tool '{function_name}': {e!s}"
2886
4471
  )
2887
4472
  result = {"error": error_msg}
2888
- logging.warning(error_msg)
4473
+ logger.warning(error_msg)
2889
4474
 
2890
4475
  # Record error response
2891
4476
  func_msg = FunctionCallingMessage(
@@ -2900,12 +4485,14 @@ class ChatAgent(BaseAgent):
2900
4485
 
2901
4486
  self.update_memory(func_msg, OpenAIBackendRole.FUNCTION)
2902
4487
 
2903
- return ToolCallingRecord(
4488
+ tool_record = ToolCallingRecord(
2904
4489
  tool_name=function_name,
2905
4490
  args=args,
2906
4491
  result=result,
2907
4492
  tool_call_id=tool_call_id,
2908
4493
  )
4494
+ self._update_last_tool_call_state(tool_record)
4495
+ return tool_record
2909
4496
  else:
2910
4497
  logger.warning(
2911
4498
  f"Tool '{function_name}' not found in internal tools"
@@ -2927,6 +4514,23 @@ class ChatAgent(BaseAgent):
2927
4514
  tool_call_id = tool_call_data['id']
2928
4515
 
2929
4516
  if function_name in self._internal_tools:
4517
+ # Create the tool call message
4518
+ assist_msg = FunctionCallingMessage(
4519
+ role_name=self.role_name,
4520
+ role_type=self.role_type,
4521
+ meta_dict=None,
4522
+ content="",
4523
+ func_name=function_name,
4524
+ args=args,
4525
+ tool_call_id=tool_call_id,
4526
+ )
4527
+ assist_ts = time.time_ns() / 1_000_000_000
4528
+ self.update_memory(
4529
+ assist_msg,
4530
+ OpenAIBackendRole.ASSISTANT,
4531
+ timestamp=assist_ts,
4532
+ )
4533
+
2930
4534
  tool = self._internal_tools[function_name]
2931
4535
  try:
2932
4536
  # Try different invocation paths in order of preference
@@ -2956,9 +4560,7 @@ class ChatAgent(BaseAgent):
2956
4560
  # Fallback: synchronous call
2957
4561
  result = tool(**args)
2958
4562
 
2959
- # Only record the tool response message, not the assistant
2960
- # message assistant message with tool_calls was already
2961
- # recorded in _record_assistant_tool_calls_message
4563
+ # Create the tool response message
2962
4564
  func_msg = FunctionCallingMessage(
2963
4565
  role_name=self.role_name,
2964
4566
  role_type=self.role_type,
@@ -2968,22 +4570,28 @@ class ChatAgent(BaseAgent):
2968
4570
  result=result,
2969
4571
  tool_call_id=tool_call_id,
2970
4572
  )
4573
+ func_ts = time.time_ns() / 1_000_000_000
4574
+ self.update_memory(
4575
+ func_msg,
4576
+ OpenAIBackendRole.FUNCTION,
4577
+ timestamp=func_ts,
4578
+ )
2971
4579
 
2972
- self.update_memory(func_msg, OpenAIBackendRole.FUNCTION)
2973
-
2974
- return ToolCallingRecord(
4580
+ tool_record = ToolCallingRecord(
2975
4581
  tool_name=function_name,
2976
4582
  args=args,
2977
4583
  result=result,
2978
4584
  tool_call_id=tool_call_id,
2979
4585
  )
4586
+ self._update_last_tool_call_state(tool_record)
4587
+ return tool_record
2980
4588
 
2981
4589
  except Exception as e:
2982
4590
  error_msg = (
2983
4591
  f"Error executing async tool '{function_name}': {e!s}"
2984
4592
  )
2985
4593
  result = {"error": error_msg}
2986
- logging.warning(error_msg)
4594
+ logger.warning(error_msg)
2987
4595
 
2988
4596
  # Record error response
2989
4597
  func_msg = FunctionCallingMessage(
@@ -2995,15 +4603,21 @@ class ChatAgent(BaseAgent):
2995
4603
  result=result,
2996
4604
  tool_call_id=tool_call_id,
2997
4605
  )
4606
+ func_ts = time.time_ns() / 1_000_000_000
4607
+ self.update_memory(
4608
+ func_msg,
4609
+ OpenAIBackendRole.FUNCTION,
4610
+ timestamp=func_ts,
4611
+ )
2998
4612
 
2999
- self.update_memory(func_msg, OpenAIBackendRole.FUNCTION)
3000
-
3001
- return ToolCallingRecord(
4613
+ tool_record = ToolCallingRecord(
3002
4614
  tool_name=function_name,
3003
4615
  args=args,
3004
4616
  result=result,
3005
4617
  tool_call_id=tool_call_id,
3006
4618
  )
4619
+ self._update_last_tool_call_state(tool_record)
4620
+ return tool_record
3007
4621
  else:
3008
4622
  logger.warning(
3009
4623
  f"Tool '{function_name}' not found in internal tools"
@@ -3093,7 +4707,7 @@ class ChatAgent(BaseAgent):
3093
4707
  # Check termination condition
3094
4708
  if self.stop_event and self.stop_event.is_set():
3095
4709
  logger.info(
3096
- f"Termination triggered at iteration " f"{iteration_count}"
4710
+ f"Termination triggered at iteration {iteration_count}"
3097
4711
  )
3098
4712
  yield self._step_terminate(
3099
4713
  num_tokens, tool_call_records, "termination_triggered"
@@ -3320,18 +4934,13 @@ class ChatAgent(BaseAgent):
3320
4934
  response_format: Optional[Type[BaseModel]] = None,
3321
4935
  ) -> AsyncGenerator[Union[ChatAgentResponse, Tuple[bool, bool]], None]:
3322
4936
  r"""Async version of process streaming chunks with
3323
- content accumulator."""
4937
+ content accumulator.
4938
+ """
3324
4939
 
3325
4940
  tool_calls_complete = False
3326
4941
  stream_completed = False
3327
4942
 
3328
4943
  async for chunk in stream:
3329
- # Update token usage if available
3330
- if chunk.usage:
3331
- self._update_token_usage_tracker(
3332
- step_token_usage, safe_model_dump(chunk.usage)
3333
- )
3334
-
3335
4944
  # Process chunk delta
3336
4945
  if chunk.choices and len(chunk.choices) > 0:
3337
4946
  choice = chunk.choices[0]
@@ -3364,13 +4973,6 @@ class ChatAgent(BaseAgent):
3364
4973
  # If we have complete tool calls, execute them with
3365
4974
  # async status updates
3366
4975
  if accumulated_tool_calls:
3367
- # Record assistant message with
3368
- # tool calls first
3369
- self._record_assistant_tool_calls_message(
3370
- accumulated_tool_calls,
3371
- content_accumulator.get_full_content(),
3372
- )
3373
-
3374
4976
  # Execute tools asynchronously with real-time
3375
4977
  # status updates
3376
4978
  async for (
@@ -3405,7 +5007,49 @@ class ChatAgent(BaseAgent):
3405
5007
  )
3406
5008
 
3407
5009
  self.record_message(final_message)
3408
- break
5010
+ elif chunk.usage and not chunk.choices:
5011
+ # Handle final chunk with usage but empty choices
5012
+ # This happens when stream_options={"include_usage": True}
5013
+ # Update the final usage from this chunk
5014
+ self._update_token_usage_tracker(
5015
+ step_token_usage, safe_model_dump(chunk.usage)
5016
+ )
5017
+
5018
+ # Create final response with final usage
5019
+ final_content = content_accumulator.get_full_content()
5020
+ if final_content.strip():
5021
+ final_message = BaseMessage(
5022
+ role_name=self.role_name,
5023
+ role_type=self.role_type,
5024
+ meta_dict={},
5025
+ content=final_content,
5026
+ )
5027
+
5028
+ if response_format:
5029
+ self._try_format_message(
5030
+ final_message, response_format
5031
+ )
5032
+
5033
+ # Create final response with final usage (not partial)
5034
+ final_response = ChatAgentResponse(
5035
+ msgs=[final_message],
5036
+ terminated=False,
5037
+ info={
5038
+ "id": getattr(chunk, 'id', ''),
5039
+ "usage": step_token_usage.copy(),
5040
+ "finish_reasons": ["stop"],
5041
+ "num_tokens": self._get_token_count(final_content),
5042
+ "tool_calls": tool_call_records or [],
5043
+ "external_tool_requests": None,
5044
+ "streaming": False,
5045
+ "partial": False,
5046
+ },
5047
+ )
5048
+ yield final_response
5049
+ break
5050
+ elif stream_completed:
5051
+ # If we've already seen finish_reason but no usage chunk, exit
5052
+ break
3409
5053
 
3410
5054
  # Yield the final status as a tuple
3411
5055
  yield (stream_completed, tool_calls_complete)
@@ -3498,15 +5142,18 @@ class ChatAgent(BaseAgent):
3498
5142
  ) -> ChatAgentResponse:
3499
5143
  r"""Create a streaming response using content accumulator."""
3500
5144
 
3501
- # Add new content to accumulator and get full content
5145
+ # Add new content; only build full content when needed
3502
5146
  accumulator.add_streaming_content(new_content)
3503
- full_content = accumulator.get_full_content()
5147
+ if self.stream_accumulate:
5148
+ message_content = accumulator.get_full_content()
5149
+ else:
5150
+ message_content = new_content
3504
5151
 
3505
5152
  message = BaseMessage(
3506
5153
  role_name=self.role_name,
3507
5154
  role_type=self.role_type,
3508
5155
  meta_dict={},
3509
- content=full_content,
5156
+ content=message_content,
3510
5157
  )
3511
5158
 
3512
5159
  return ChatAgentResponse(
@@ -3516,7 +5163,7 @@ class ChatAgent(BaseAgent):
3516
5163
  "id": response_id,
3517
5164
  "usage": step_token_usage.copy(),
3518
5165
  "finish_reasons": ["streaming"],
3519
- "num_tokens": self._get_token_count(full_content),
5166
+ "num_tokens": self._get_token_count(message_content),
3520
5167
  "tool_calls": tool_call_records or [],
3521
5168
  "external_tool_requests": None,
3522
5169
  "streaming": True,
@@ -3572,10 +5219,12 @@ class ChatAgent(BaseAgent):
3572
5219
  configuration.
3573
5220
  """
3574
5221
  # Create a new instance with the same configuration
3575
- # If with_memory is True, set system_message to None
3576
- # If with_memory is False, use the original system message
5222
+ # If with_memory is True, set system_message to None (it will be
5223
+ # copied from memory below, including any workflow context)
5224
+ # If with_memory is False, use the current system message
5225
+ # (which may include appended workflow context)
3577
5226
  # To avoid duplicated system memory.
3578
- system_message = None if with_memory else self._original_system_message
5227
+ system_message = None if with_memory else self._system_message
3579
5228
 
3580
5229
  # Clone tools and collect toolkits that need registration
3581
5230
  cloned_tools, toolkits_to_register = self._clone_tools()
@@ -3589,7 +5238,7 @@ class ChatAgent(BaseAgent):
3589
5238
  self.memory.get_context_creator(), "token_limit", None
3590
5239
  ),
3591
5240
  output_language=self._output_language,
3592
- tools=cloned_tools,
5241
+ tools=cast(List[Union[FunctionTool, Callable]], cloned_tools),
3593
5242
  toolkits_to_register_agent=toolkits_to_register,
3594
5243
  external_tools=[
3595
5244
  schema for schema in self._external_tool_schemas.values()
@@ -3603,6 +5252,7 @@ class ChatAgent(BaseAgent):
3603
5252
  tool_execution_timeout=self.tool_execution_timeout,
3604
5253
  pause_event=self.pause_event,
3605
5254
  prune_tool_calls_from_memory=self.prune_tool_calls_from_memory,
5255
+ stream_accumulate=self.stream_accumulate,
3606
5256
  )
3607
5257
 
3608
5258
  # Copy memory if requested
@@ -3617,9 +5267,7 @@ class ChatAgent(BaseAgent):
3617
5267
 
3618
5268
  def _clone_tools(
3619
5269
  self,
3620
- ) -> Tuple[
3621
- List[Union[FunctionTool, Callable]], List[RegisteredAgentToolkit]
3622
- ]:
5270
+ ) -> Tuple[List[FunctionTool], List[RegisteredAgentToolkit]]:
3623
5271
  r"""Clone tools and return toolkits that need agent registration.
3624
5272
 
3625
5273
  This method handles stateful toolkits by cloning them if they have
@@ -3674,15 +5322,65 @@ class ChatAgent(BaseAgent):
3674
5322
  # Get the method from the cloned (or original) toolkit
3675
5323
  toolkit = cloned_toolkits[toolkit_id]
3676
5324
  method_name = tool.func.__name__
5325
+
5326
+ # Check if toolkit was actually cloned or just reused
5327
+ toolkit_was_cloned = toolkit is not toolkit_instance
5328
+
3677
5329
  if hasattr(toolkit, method_name):
3678
5330
  new_method = getattr(toolkit, method_name)
3679
- cloned_tools.append(new_method)
5331
+
5332
+ # If toolkit wasn't cloned (stateless), preserve the
5333
+ # original function to maintain any enhancements/wrappers
5334
+ if not toolkit_was_cloned:
5335
+ # Toolkit is stateless, safe to reuse original function
5336
+ cloned_tools.append(
5337
+ FunctionTool(
5338
+ func=tool.func,
5339
+ openai_tool_schema=tool.get_openai_tool_schema(),
5340
+ )
5341
+ )
5342
+ continue
5343
+
5344
+ # Toolkit was cloned, use the new method
5345
+ # Wrap cloned method into a new FunctionTool,
5346
+ # preserving schema
5347
+ try:
5348
+ new_tool = FunctionTool(
5349
+ func=new_method,
5350
+ openai_tool_schema=tool.get_openai_tool_schema(),
5351
+ )
5352
+ cloned_tools.append(new_tool)
5353
+ except Exception as e:
5354
+ # If wrapping fails, fallback to wrapping the original
5355
+ # function with its schema to maintain consistency
5356
+ logger.warning(
5357
+ f"Failed to wrap cloned toolkit "
5358
+ f"method '{method_name}' "
5359
+ f"with FunctionTool: {e}. Using original "
5360
+ f"function with preserved schema instead."
5361
+ )
5362
+ cloned_tools.append(
5363
+ FunctionTool(
5364
+ func=tool.func,
5365
+ openai_tool_schema=tool.get_openai_tool_schema(),
5366
+ )
5367
+ )
3680
5368
  else:
3681
- # Fallback to original function
3682
- cloned_tools.append(tool.func)
5369
+ # Fallback to original function wrapped in FunctionTool
5370
+ cloned_tools.append(
5371
+ FunctionTool(
5372
+ func=tool.func,
5373
+ openai_tool_schema=tool.get_openai_tool_schema(),
5374
+ )
5375
+ )
3683
5376
  else:
3684
- # Not a toolkit method, just use the original function
3685
- cloned_tools.append(tool.func)
5377
+ # Not a toolkit method, preserve FunctionTool schema directly
5378
+ cloned_tools.append(
5379
+ FunctionTool(
5380
+ func=tool.func,
5381
+ openai_tool_schema=tool.get_openai_tool_schema(),
5382
+ )
5383
+ )
3686
5384
 
3687
5385
  return cloned_tools, toolkits_to_register
3688
5386