camel-ai 0.2.73a4__py3-none-any.whl → 0.2.80a2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- camel/__init__.py +1 -1
- camel/agents/_utils.py +38 -0
- camel/agents/chat_agent.py +2217 -519
- camel/agents/mcp_agent.py +30 -27
- camel/configs/__init__.py +15 -0
- camel/configs/aihubmix_config.py +88 -0
- camel/configs/amd_config.py +70 -0
- camel/configs/cometapi_config.py +104 -0
- camel/configs/minimax_config.py +93 -0
- camel/configs/nebius_config.py +103 -0
- camel/data_collectors/alpaca_collector.py +15 -6
- camel/datasets/base_generator.py +39 -10
- camel/environments/single_step.py +28 -3
- camel/environments/tic_tac_toe.py +1 -1
- camel/interpreters/__init__.py +2 -0
- camel/interpreters/docker/Dockerfile +3 -12
- camel/interpreters/e2b_interpreter.py +34 -1
- camel/interpreters/microsandbox_interpreter.py +395 -0
- camel/loaders/__init__.py +11 -2
- camel/loaders/chunkr_reader.py +9 -0
- camel/memories/agent_memories.py +48 -4
- camel/memories/base.py +26 -0
- camel/memories/blocks/chat_history_block.py +122 -4
- camel/memories/context_creators/score_based.py +25 -384
- camel/memories/records.py +88 -8
- camel/messages/base.py +153 -34
- camel/models/__init__.py +10 -0
- camel/models/aihubmix_model.py +83 -0
- camel/models/aiml_model.py +1 -16
- camel/models/amd_model.py +101 -0
- camel/models/anthropic_model.py +6 -19
- camel/models/aws_bedrock_model.py +2 -33
- camel/models/azure_openai_model.py +114 -89
- camel/models/base_audio_model.py +3 -1
- camel/models/base_model.py +32 -14
- camel/models/cohere_model.py +1 -16
- camel/models/cometapi_model.py +83 -0
- camel/models/crynux_model.py +1 -16
- camel/models/deepseek_model.py +1 -16
- camel/models/fish_audio_model.py +6 -0
- camel/models/gemini_model.py +36 -18
- camel/models/groq_model.py +1 -17
- camel/models/internlm_model.py +1 -16
- camel/models/litellm_model.py +1 -16
- camel/models/lmstudio_model.py +1 -17
- camel/models/minimax_model.py +83 -0
- camel/models/mistral_model.py +1 -16
- camel/models/model_factory.py +27 -1
- camel/models/modelscope_model.py +1 -16
- camel/models/moonshot_model.py +105 -24
- camel/models/nebius_model.py +83 -0
- camel/models/nemotron_model.py +0 -5
- camel/models/netmind_model.py +1 -16
- camel/models/novita_model.py +1 -16
- camel/models/nvidia_model.py +1 -16
- camel/models/ollama_model.py +4 -19
- camel/models/openai_compatible_model.py +62 -41
- camel/models/openai_model.py +62 -57
- camel/models/openrouter_model.py +1 -17
- camel/models/ppio_model.py +1 -16
- camel/models/qianfan_model.py +1 -16
- camel/models/qwen_model.py +1 -16
- camel/models/reka_model.py +1 -16
- camel/models/samba_model.py +34 -47
- camel/models/sglang_model.py +64 -31
- camel/models/siliconflow_model.py +1 -16
- camel/models/stub_model.py +0 -4
- camel/models/togetherai_model.py +1 -16
- camel/models/vllm_model.py +1 -16
- camel/models/volcano_model.py +0 -17
- camel/models/watsonx_model.py +1 -16
- camel/models/yi_model.py +1 -16
- camel/models/zhipuai_model.py +60 -16
- camel/parsers/__init__.py +18 -0
- camel/parsers/mcp_tool_call_parser.py +176 -0
- camel/retrievers/auto_retriever.py +1 -0
- camel/runtimes/daytona_runtime.py +11 -12
- camel/societies/__init__.py +2 -0
- camel/societies/workforce/__init__.py +2 -0
- camel/societies/workforce/events.py +122 -0
- camel/societies/workforce/prompts.py +146 -66
- camel/societies/workforce/role_playing_worker.py +15 -11
- camel/societies/workforce/single_agent_worker.py +302 -65
- camel/societies/workforce/structured_output_handler.py +30 -18
- camel/societies/workforce/task_channel.py +163 -27
- camel/societies/workforce/utils.py +107 -13
- camel/societies/workforce/workflow_memory_manager.py +772 -0
- camel/societies/workforce/workforce.py +1949 -579
- camel/societies/workforce/workforce_callback.py +74 -0
- camel/societies/workforce/workforce_logger.py +168 -145
- camel/societies/workforce/workforce_metrics.py +33 -0
- camel/storages/key_value_storages/json.py +15 -2
- camel/storages/key_value_storages/mem0_cloud.py +48 -47
- camel/storages/object_storages/google_cloud.py +1 -1
- camel/storages/vectordb_storages/oceanbase.py +13 -13
- camel/storages/vectordb_storages/qdrant.py +3 -3
- camel/storages/vectordb_storages/tidb.py +8 -6
- camel/tasks/task.py +4 -3
- camel/toolkits/__init__.py +20 -7
- camel/toolkits/aci_toolkit.py +45 -0
- camel/toolkits/base.py +6 -4
- camel/toolkits/code_execution.py +28 -1
- camel/toolkits/context_summarizer_toolkit.py +684 -0
- camel/toolkits/dappier_toolkit.py +5 -1
- camel/toolkits/dingtalk.py +1135 -0
- camel/toolkits/edgeone_pages_mcp_toolkit.py +11 -31
- camel/toolkits/excel_toolkit.py +1 -1
- camel/toolkits/{file_write_toolkit.py → file_toolkit.py} +430 -36
- camel/toolkits/function_tool.py +13 -3
- camel/toolkits/github_toolkit.py +104 -17
- camel/toolkits/gmail_toolkit.py +1839 -0
- camel/toolkits/google_calendar_toolkit.py +38 -4
- camel/toolkits/google_drive_mcp_toolkit.py +12 -31
- camel/toolkits/hybrid_browser_toolkit/config_loader.py +15 -0
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit.py +77 -8
- camel/toolkits/hybrid_browser_toolkit/hybrid_browser_toolkit_ts.py +884 -88
- camel/toolkits/hybrid_browser_toolkit/installer.py +203 -0
- camel/toolkits/hybrid_browser_toolkit/ts/package-lock.json +5 -612
- camel/toolkits/hybrid_browser_toolkit/ts/package.json +0 -1
- camel/toolkits/hybrid_browser_toolkit/ts/src/browser-session.ts +959 -89
- camel/toolkits/hybrid_browser_toolkit/ts/src/config-loader.ts +9 -2
- camel/toolkits/hybrid_browser_toolkit/ts/src/hybrid-browser-toolkit.ts +281 -213
- camel/toolkits/hybrid_browser_toolkit/ts/src/parent-child-filter.ts +226 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/snapshot-parser.ts +219 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/som-screenshot-injected.ts +543 -0
- camel/toolkits/hybrid_browser_toolkit/ts/src/types.ts +23 -3
- camel/toolkits/hybrid_browser_toolkit/ts/websocket-server.js +72 -7
- camel/toolkits/hybrid_browser_toolkit/ws_wrapper.py +582 -132
- camel/toolkits/hybrid_browser_toolkit_py/actions.py +158 -0
- camel/toolkits/hybrid_browser_toolkit_py/browser_session.py +55 -8
- camel/toolkits/hybrid_browser_toolkit_py/config_loader.py +43 -0
- camel/toolkits/hybrid_browser_toolkit_py/hybrid_browser_toolkit.py +321 -8
- camel/toolkits/hybrid_browser_toolkit_py/snapshot.py +10 -4
- camel/toolkits/hybrid_browser_toolkit_py/unified_analyzer.js +45 -4
- camel/toolkits/{openai_image_toolkit.py → image_generation_toolkit.py} +151 -53
- camel/toolkits/klavis_toolkit.py +5 -1
- camel/toolkits/markitdown_toolkit.py +27 -1
- camel/toolkits/math_toolkit.py +64 -10
- camel/toolkits/mcp_toolkit.py +366 -71
- camel/toolkits/memory_toolkit.py +5 -1
- camel/toolkits/message_integration.py +18 -13
- camel/toolkits/minimax_mcp_toolkit.py +195 -0
- camel/toolkits/note_taking_toolkit.py +19 -10
- camel/toolkits/notion_mcp_toolkit.py +16 -26
- camel/toolkits/openbb_toolkit.py +5 -1
- camel/toolkits/origene_mcp_toolkit.py +8 -49
- camel/toolkits/playwright_mcp_toolkit.py +12 -31
- camel/toolkits/resend_toolkit.py +168 -0
- camel/toolkits/search_toolkit.py +264 -91
- camel/toolkits/slack_toolkit.py +64 -10
- camel/toolkits/terminal_toolkit/__init__.py +18 -0
- camel/toolkits/terminal_toolkit/terminal_toolkit.py +957 -0
- camel/toolkits/terminal_toolkit/utils.py +532 -0
- camel/toolkits/vertex_ai_veo_toolkit.py +590 -0
- camel/toolkits/video_analysis_toolkit.py +17 -11
- camel/toolkits/wechat_official_toolkit.py +483 -0
- camel/toolkits/zapier_toolkit.py +5 -1
- camel/types/__init__.py +2 -2
- camel/types/enums.py +274 -7
- camel/types/openai_types.py +2 -2
- camel/types/unified_model_type.py +15 -0
- camel/utils/commons.py +36 -5
- camel/utils/constants.py +3 -0
- camel/utils/context_utils.py +1003 -0
- camel/utils/mcp.py +138 -4
- camel/utils/token_counting.py +43 -20
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/METADATA +223 -83
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/RECORD +170 -141
- camel/loaders/pandas_reader.py +0 -368
- camel/toolkits/openai_agent_toolkit.py +0 -135
- camel/toolkits/terminal_toolkit.py +0 -1550
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/WHEEL +0 -0
- {camel_ai-0.2.73a4.dist-info → camel_ai-0.2.80a2.dist-info}/licenses/LICENSE +0 -0
camel/agents/chat_agent.py
CHANGED
|
@@ -14,13 +14,23 @@
|
|
|
14
14
|
from __future__ import annotations
|
|
15
15
|
|
|
16
16
|
import asyncio
|
|
17
|
+
import atexit
|
|
18
|
+
import base64
|
|
19
|
+
import concurrent.futures
|
|
20
|
+
import hashlib
|
|
21
|
+
import inspect
|
|
17
22
|
import json
|
|
18
|
-
import
|
|
19
|
-
import
|
|
23
|
+
import os
|
|
24
|
+
import random
|
|
25
|
+
import re
|
|
26
|
+
import tempfile
|
|
20
27
|
import textwrap
|
|
21
28
|
import threading
|
|
22
29
|
import time
|
|
23
30
|
import uuid
|
|
31
|
+
import warnings
|
|
32
|
+
from dataclasses import dataclass
|
|
33
|
+
from datetime import datetime
|
|
24
34
|
from pathlib import Path
|
|
25
35
|
from typing import (
|
|
26
36
|
TYPE_CHECKING,
|
|
@@ -40,12 +50,14 @@ from typing import (
|
|
|
40
50
|
|
|
41
51
|
from openai import (
|
|
42
52
|
AsyncStream,
|
|
53
|
+
RateLimitError,
|
|
43
54
|
Stream,
|
|
44
55
|
)
|
|
45
56
|
from pydantic import BaseModel, ValidationError
|
|
46
57
|
|
|
47
58
|
from camel.agents._types import ModelResponse, ToolCallRequest
|
|
48
59
|
from camel.agents._utils import (
|
|
60
|
+
build_default_summary_prompt,
|
|
49
61
|
convert_to_function_tool,
|
|
50
62
|
convert_to_schema,
|
|
51
63
|
get_info_dict,
|
|
@@ -57,6 +69,7 @@ from camel.logger import get_logger
|
|
|
57
69
|
from camel.memories import (
|
|
58
70
|
AgentMemory,
|
|
59
71
|
ChatHistoryMemory,
|
|
72
|
+
ContextRecord,
|
|
60
73
|
MemoryRecord,
|
|
61
74
|
ScoreBasedContextCreator,
|
|
62
75
|
)
|
|
@@ -85,20 +98,46 @@ from camel.types import (
|
|
|
85
98
|
)
|
|
86
99
|
from camel.types.agents import ToolCallingRecord
|
|
87
100
|
from camel.utils import (
|
|
101
|
+
Constants,
|
|
88
102
|
get_model_encoding,
|
|
89
103
|
model_from_json_schema,
|
|
90
104
|
)
|
|
91
105
|
from camel.utils.commons import dependencies_required
|
|
106
|
+
from camel.utils.context_utils import ContextUtility
|
|
107
|
+
|
|
108
|
+
TOKEN_LIMIT_ERROR_MARKERS = (
|
|
109
|
+
"context_length_exceeded",
|
|
110
|
+
"prompt is too long",
|
|
111
|
+
"exceeded your current quota",
|
|
112
|
+
"tokens must be reduced",
|
|
113
|
+
"context length",
|
|
114
|
+
"token count",
|
|
115
|
+
"context limit",
|
|
116
|
+
)
|
|
92
117
|
|
|
93
118
|
if TYPE_CHECKING:
|
|
94
119
|
from camel.terminators import ResponseTerminator
|
|
95
120
|
|
|
96
121
|
logger = get_logger(__name__)
|
|
97
122
|
|
|
123
|
+
# Cleanup temp files on exit
|
|
124
|
+
_temp_files: Set[str] = set()
|
|
125
|
+
_temp_files_lock = threading.Lock()
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _cleanup_temp_files():
|
|
129
|
+
with _temp_files_lock:
|
|
130
|
+
for path in _temp_files:
|
|
131
|
+
try:
|
|
132
|
+
os.unlink(path)
|
|
133
|
+
except Exception:
|
|
134
|
+
pass
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
atexit.register(_cleanup_temp_files)
|
|
138
|
+
|
|
98
139
|
# AgentOps decorator setting
|
|
99
140
|
try:
|
|
100
|
-
import os
|
|
101
|
-
|
|
102
141
|
if os.getenv("AGENTOPS_API_KEY") is not None:
|
|
103
142
|
from agentops import track_agent
|
|
104
143
|
else:
|
|
@@ -132,13 +171,23 @@ SIMPLE_FORMAT_PROMPT = TextPrompt(
|
|
|
132
171
|
)
|
|
133
172
|
|
|
134
173
|
|
|
174
|
+
@dataclass
|
|
175
|
+
class _ToolOutputHistoryEntry:
|
|
176
|
+
tool_name: str
|
|
177
|
+
tool_call_id: str
|
|
178
|
+
result_text: str
|
|
179
|
+
record_uuids: List[str]
|
|
180
|
+
record_timestamps: List[float]
|
|
181
|
+
cached: bool = False
|
|
182
|
+
|
|
183
|
+
|
|
135
184
|
class StreamContentAccumulator:
|
|
136
185
|
r"""Manages content accumulation across streaming responses to ensure
|
|
137
186
|
all responses contain complete cumulative content."""
|
|
138
187
|
|
|
139
188
|
def __init__(self):
|
|
140
189
|
self.base_content = "" # Content before tool calls
|
|
141
|
-
self.current_content =
|
|
190
|
+
self.current_content = [] # Accumulated streaming fragments
|
|
142
191
|
self.tool_status_messages = [] # Accumulated tool status messages
|
|
143
192
|
|
|
144
193
|
def set_base_content(self, content: str):
|
|
@@ -147,7 +196,7 @@ class StreamContentAccumulator:
|
|
|
147
196
|
|
|
148
197
|
def add_streaming_content(self, new_content: str):
|
|
149
198
|
r"""Add new streaming content."""
|
|
150
|
-
self.current_content
|
|
199
|
+
self.current_content.append(new_content)
|
|
151
200
|
|
|
152
201
|
def add_tool_status(self, status_message: str):
|
|
153
202
|
r"""Add a tool status message."""
|
|
@@ -156,16 +205,18 @@ class StreamContentAccumulator:
|
|
|
156
205
|
def get_full_content(self) -> str:
|
|
157
206
|
r"""Get the complete accumulated content."""
|
|
158
207
|
tool_messages = "".join(self.tool_status_messages)
|
|
159
|
-
|
|
208
|
+
current = "".join(self.current_content)
|
|
209
|
+
return self.base_content + tool_messages + current
|
|
160
210
|
|
|
161
211
|
def get_content_with_new_status(self, status_message: str) -> str:
|
|
162
212
|
r"""Get content with a new status message appended."""
|
|
163
213
|
tool_messages = "".join([*self.tool_status_messages, status_message])
|
|
164
|
-
|
|
214
|
+
current = "".join(self.current_content)
|
|
215
|
+
return self.base_content + tool_messages + current
|
|
165
216
|
|
|
166
217
|
def reset_streaming_content(self):
|
|
167
218
|
r"""Reset only the streaming content, keep base and tool status."""
|
|
168
|
-
self.current_content =
|
|
219
|
+
self.current_content = []
|
|
169
220
|
|
|
170
221
|
|
|
171
222
|
class StreamingChatAgentResponse:
|
|
@@ -186,13 +237,10 @@ class StreamingChatAgentResponse:
|
|
|
186
237
|
def _ensure_latest_response(self):
|
|
187
238
|
r"""Ensure we have the latest response by consuming the generator."""
|
|
188
239
|
if not self._consumed:
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
self._consumed = True
|
|
194
|
-
except StopIteration:
|
|
195
|
-
self._consumed = True
|
|
240
|
+
for response in self._generator:
|
|
241
|
+
self._responses.append(response)
|
|
242
|
+
self._current_response = response
|
|
243
|
+
self._consumed = True
|
|
196
244
|
|
|
197
245
|
@property
|
|
198
246
|
def msgs(self) -> List[BaseMessage]:
|
|
@@ -230,17 +278,14 @@ class StreamingChatAgentResponse:
|
|
|
230
278
|
r"""Make this object iterable."""
|
|
231
279
|
if self._consumed:
|
|
232
280
|
# If already consumed, iterate over stored responses
|
|
233
|
-
|
|
281
|
+
yield from self._responses
|
|
234
282
|
else:
|
|
235
283
|
# If not consumed, consume and yield
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
self._consumed = True
|
|
242
|
-
except StopIteration:
|
|
243
|
-
self._consumed = True
|
|
284
|
+
for response in self._generator:
|
|
285
|
+
self._responses.append(response)
|
|
286
|
+
self._current_response = response
|
|
287
|
+
yield response
|
|
288
|
+
self._consumed = True
|
|
244
289
|
|
|
245
290
|
def __getattr__(self, name):
|
|
246
291
|
r"""Forward any other attribute access to the latest response."""
|
|
@@ -271,13 +316,10 @@ class AsyncStreamingChatAgentResponse:
|
|
|
271
316
|
async def _ensure_latest_response(self):
|
|
272
317
|
r"""Ensure the latest response by consuming the async generator."""
|
|
273
318
|
if not self._consumed:
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
self._consumed = True
|
|
279
|
-
except StopAsyncIteration:
|
|
280
|
-
self._consumed = True
|
|
319
|
+
async for response in self._async_generator:
|
|
320
|
+
self._responses.append(response)
|
|
321
|
+
self._current_response = response
|
|
322
|
+
self._consumed = True
|
|
281
323
|
|
|
282
324
|
async def _get_final_response(self) -> ChatAgentResponse:
|
|
283
325
|
r"""Get the final response after consuming the entire stream."""
|
|
@@ -303,14 +345,11 @@ class AsyncStreamingChatAgentResponse:
|
|
|
303
345
|
else:
|
|
304
346
|
# If not consumed, consume and yield
|
|
305
347
|
async def _consume_and_yield():
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
self._consumed = True
|
|
312
|
-
except StopAsyncIteration:
|
|
313
|
-
self._consumed = True
|
|
348
|
+
async for response in self._async_generator:
|
|
349
|
+
self._responses.append(response)
|
|
350
|
+
self._current_response = response
|
|
351
|
+
yield response
|
|
352
|
+
self._consumed = True
|
|
314
353
|
|
|
315
354
|
return _consume_and_yield()
|
|
316
355
|
|
|
@@ -338,9 +377,9 @@ class ChatAgent(BaseAgent):
|
|
|
338
377
|
message_window_size (int, optional): The maximum number of previous
|
|
339
378
|
messages to include in the context window. If `None`, no windowing
|
|
340
379
|
is performed. (default: :obj:`None`)
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
380
|
+
summarize_threshold (int, optional): The percentage of the context
|
|
381
|
+
window that triggers summarization. If `None`, will trigger
|
|
382
|
+
summarization when the context window is full.
|
|
344
383
|
(default: :obj:`None`)
|
|
345
384
|
output_language (str, optional): The language to be output by the
|
|
346
385
|
agent. (default: :obj:`None`)
|
|
@@ -378,14 +417,35 @@ class ChatAgent(BaseAgent):
|
|
|
378
417
|
for individual tool execution. If None, wait indefinitely.
|
|
379
418
|
mask_tool_output (Optional[bool]): Whether to return a sanitized
|
|
380
419
|
placeholder instead of the raw tool output. (default: :obj:`False`)
|
|
381
|
-
pause_event (Optional[asyncio.Event]): Event to
|
|
382
|
-
agent's operation. When clear, the agent will
|
|
383
|
-
|
|
420
|
+
pause_event (Optional[Union[threading.Event, asyncio.Event]]): Event to
|
|
421
|
+
signal pause of the agent's operation. When clear, the agent will
|
|
422
|
+
pause its execution. Use threading.Event for sync operations or
|
|
423
|
+
asyncio.Event for async operations. (default: :obj:`None`)
|
|
384
424
|
prune_tool_calls_from_memory (bool): Whether to clean tool
|
|
385
425
|
call messages from memory after response generation to save token
|
|
386
426
|
usage. When enabled, removes FUNCTION/TOOL role messages and
|
|
387
427
|
ASSISTANT messages with tool_calls after each step.
|
|
388
428
|
(default: :obj:`False`)
|
|
429
|
+
enable_snapshot_clean (bool, optional): Whether to clean snapshot
|
|
430
|
+
markers and references from historical tool outputs in memory.
|
|
431
|
+
This removes verbose DOM markers (like [ref=...]) from older tool
|
|
432
|
+
results while keeping the latest output intact for immediate use.
|
|
433
|
+
(default: :obj:`False`)
|
|
434
|
+
retry_attempts (int, optional): Maximum number of retry attempts for
|
|
435
|
+
rate limit errors. (default: :obj:`3`)
|
|
436
|
+
retry_delay (float, optional): Initial delay in seconds between
|
|
437
|
+
retries. Uses exponential backoff. (default: :obj:`1.0`)
|
|
438
|
+
step_timeout (Optional[float], optional): Timeout in seconds for the
|
|
439
|
+
entire step operation. If None, no timeout is applied.
|
|
440
|
+
(default: :obj:`None`)
|
|
441
|
+
stream_accumulate (bool, optional): When True, partial streaming
|
|
442
|
+
updates return accumulated content (current behavior). When False,
|
|
443
|
+
partial updates return only the incremental delta. (default:
|
|
444
|
+
:obj:`True`)
|
|
445
|
+
summary_window_ratio (float, optional): Maximum fraction of the total
|
|
446
|
+
context window that can be occupied by summary information. Used
|
|
447
|
+
to limit how much of the model's context is reserved for
|
|
448
|
+
summarization results. (default: :obj:`0.6`)
|
|
389
449
|
"""
|
|
390
450
|
|
|
391
451
|
def __init__(
|
|
@@ -408,6 +468,7 @@ class ChatAgent(BaseAgent):
|
|
|
408
468
|
] = None,
|
|
409
469
|
memory: Optional[AgentMemory] = None,
|
|
410
470
|
message_window_size: Optional[int] = None,
|
|
471
|
+
summarize_threshold: Optional[int] = 50,
|
|
411
472
|
token_limit: Optional[int] = None,
|
|
412
473
|
output_language: Optional[str] = None,
|
|
413
474
|
tools: Optional[List[Union[FunctionTool, Callable]]] = None,
|
|
@@ -422,10 +483,16 @@ class ChatAgent(BaseAgent):
|
|
|
422
483
|
max_iteration: Optional[int] = None,
|
|
423
484
|
agent_id: Optional[str] = None,
|
|
424
485
|
stop_event: Optional[threading.Event] = None,
|
|
425
|
-
tool_execution_timeout: Optional[float] =
|
|
486
|
+
tool_execution_timeout: Optional[float] = Constants.TIMEOUT_THRESHOLD,
|
|
426
487
|
mask_tool_output: bool = False,
|
|
427
|
-
pause_event: Optional[asyncio.Event] = None,
|
|
488
|
+
pause_event: Optional[Union[threading.Event, asyncio.Event]] = None,
|
|
428
489
|
prune_tool_calls_from_memory: bool = False,
|
|
490
|
+
enable_snapshot_clean: bool = False,
|
|
491
|
+
retry_attempts: int = 3,
|
|
492
|
+
retry_delay: float = 1.0,
|
|
493
|
+
step_timeout: Optional[float] = Constants.TIMEOUT_THRESHOLD,
|
|
494
|
+
stream_accumulate: bool = True,
|
|
495
|
+
summary_window_ratio: float = 0.6,
|
|
429
496
|
) -> None:
|
|
430
497
|
if isinstance(model, ModelManager):
|
|
431
498
|
self.model_backend = model
|
|
@@ -441,10 +508,13 @@ class ChatAgent(BaseAgent):
|
|
|
441
508
|
# Assign unique ID
|
|
442
509
|
self.agent_id = agent_id if agent_id else str(uuid.uuid4())
|
|
443
510
|
|
|
511
|
+
self._enable_snapshot_clean = enable_snapshot_clean
|
|
512
|
+
self._tool_output_history: List[_ToolOutputHistoryEntry] = []
|
|
513
|
+
|
|
444
514
|
# Set up memory
|
|
445
515
|
context_creator = ScoreBasedContextCreator(
|
|
446
516
|
self.model_backend.token_counter,
|
|
447
|
-
|
|
517
|
+
self.model_backend.token_limit,
|
|
448
518
|
)
|
|
449
519
|
|
|
450
520
|
self._memory: AgentMemory = memory or ChatHistoryMemory(
|
|
@@ -459,9 +529,7 @@ class ChatAgent(BaseAgent):
|
|
|
459
529
|
|
|
460
530
|
# Set up system message and initialize messages
|
|
461
531
|
self._original_system_message = (
|
|
462
|
-
BaseMessage.
|
|
463
|
-
role_name="Assistant", content=system_message
|
|
464
|
-
)
|
|
532
|
+
BaseMessage.make_system_message(system_message)
|
|
465
533
|
if isinstance(system_message, str)
|
|
466
534
|
else system_message
|
|
467
535
|
)
|
|
@@ -471,6 +539,21 @@ class ChatAgent(BaseAgent):
|
|
|
471
539
|
)
|
|
472
540
|
self.init_messages()
|
|
473
541
|
|
|
542
|
+
# Set up summarize threshold with validation
|
|
543
|
+
if summarize_threshold is not None:
|
|
544
|
+
if not (0 < summarize_threshold <= 100):
|
|
545
|
+
raise ValueError(
|
|
546
|
+
f"summarize_threshold must be between 0 and 100, "
|
|
547
|
+
f"got {summarize_threshold}"
|
|
548
|
+
)
|
|
549
|
+
logger.info(
|
|
550
|
+
f"Automatic context compression is enabled. Will trigger "
|
|
551
|
+
f"summarization when context window exceeds "
|
|
552
|
+
f"{summarize_threshold}% of the total token limit."
|
|
553
|
+
)
|
|
554
|
+
self.summarize_threshold = summarize_threshold
|
|
555
|
+
self._reset_summary_state()
|
|
556
|
+
|
|
474
557
|
# Set up role name and role type
|
|
475
558
|
self.role_name: str = (
|
|
476
559
|
getattr(self.system_message, "role_name", None) or "assistant"
|
|
@@ -509,13 +592,25 @@ class ChatAgent(BaseAgent):
|
|
|
509
592
|
self.tool_execution_timeout = tool_execution_timeout
|
|
510
593
|
self.mask_tool_output = mask_tool_output
|
|
511
594
|
self._secure_result_store: Dict[str, Any] = {}
|
|
595
|
+
self._secure_result_store_lock = threading.Lock()
|
|
512
596
|
self.pause_event = pause_event
|
|
513
597
|
self.prune_tool_calls_from_memory = prune_tool_calls_from_memory
|
|
598
|
+
self.retry_attempts = max(1, retry_attempts)
|
|
599
|
+
self.retry_delay = max(0.0, retry_delay)
|
|
600
|
+
self.step_timeout = step_timeout
|
|
601
|
+
self._context_utility: Optional[ContextUtility] = None
|
|
602
|
+
self._context_summary_agent: Optional["ChatAgent"] = None
|
|
603
|
+
self.stream_accumulate = stream_accumulate
|
|
604
|
+
self._last_tool_call_record: Optional[ToolCallingRecord] = None
|
|
605
|
+
self._last_tool_call_signature: Optional[str] = None
|
|
606
|
+
self._last_token_limit_tool_signature: Optional[str] = None
|
|
607
|
+
self.summary_window_ratio = summary_window_ratio
|
|
514
608
|
|
|
515
609
|
def reset(self):
|
|
516
610
|
r"""Resets the :obj:`ChatAgent` to its initial state."""
|
|
517
611
|
self.terminated = False
|
|
518
612
|
self.init_messages()
|
|
613
|
+
self._reset_summary_state()
|
|
519
614
|
for terminator in self.response_terminators:
|
|
520
615
|
terminator.reset()
|
|
521
616
|
|
|
@@ -699,6 +794,20 @@ class ChatAgent(BaseAgent):
|
|
|
699
794
|
# Ensure the new memory has the system message
|
|
700
795
|
self.init_messages()
|
|
701
796
|
|
|
797
|
+
def set_context_utility(
|
|
798
|
+
self, context_utility: Optional[ContextUtility]
|
|
799
|
+
) -> None:
|
|
800
|
+
r"""Set the context utility for the agent.
|
|
801
|
+
|
|
802
|
+
This allows external components (like SingleAgentWorker) to provide
|
|
803
|
+
a shared context utility instance for workflow management.
|
|
804
|
+
|
|
805
|
+
Args:
|
|
806
|
+
context_utility (ContextUtility, optional): The context utility
|
|
807
|
+
to use. If None, the agent will create its own when needed.
|
|
808
|
+
"""
|
|
809
|
+
self._context_utility = context_utility
|
|
810
|
+
|
|
702
811
|
def _get_full_tool_schemas(self) -> List[Dict[str, Any]]:
|
|
703
812
|
r"""Returns a list of tool schemas of all tools, including internal
|
|
704
813
|
and external tools.
|
|
@@ -708,6 +817,329 @@ class ChatAgent(BaseAgent):
|
|
|
708
817
|
for func_tool in self._internal_tools.values()
|
|
709
818
|
]
|
|
710
819
|
|
|
820
|
+
@staticmethod
|
|
821
|
+
def _is_token_limit_error(error: Exception) -> bool:
|
|
822
|
+
r"""Return True when the exception message indicates a token limit."""
|
|
823
|
+
error_message = str(error).lower()
|
|
824
|
+
return any(
|
|
825
|
+
marker in error_message for marker in TOKEN_LIMIT_ERROR_MARKERS
|
|
826
|
+
)
|
|
827
|
+
|
|
828
|
+
@staticmethod
|
|
829
|
+
def _is_tool_related_record(record: MemoryRecord) -> bool:
|
|
830
|
+
r"""Determine whether the given memory record
|
|
831
|
+
belongs to a tool call."""
|
|
832
|
+
if record.role_at_backend in {
|
|
833
|
+
OpenAIBackendRole.TOOL,
|
|
834
|
+
OpenAIBackendRole.FUNCTION,
|
|
835
|
+
}:
|
|
836
|
+
return True
|
|
837
|
+
|
|
838
|
+
if (
|
|
839
|
+
record.role_at_backend == OpenAIBackendRole.ASSISTANT
|
|
840
|
+
and isinstance(record.message, FunctionCallingMessage)
|
|
841
|
+
):
|
|
842
|
+
return True
|
|
843
|
+
|
|
844
|
+
return False
|
|
845
|
+
|
|
846
|
+
def _find_indices_to_remove_for_last_tool_pair(
|
|
847
|
+
self, recent_records: List[ContextRecord]
|
|
848
|
+
) -> List[int]:
|
|
849
|
+
"""Find indices of records that should be removed to clean up the most
|
|
850
|
+
recent incomplete tool interaction pair.
|
|
851
|
+
|
|
852
|
+
This method identifies tool call/result pairs by tool_call_id and
|
|
853
|
+
returns the exact indices to remove, allowing non-contiguous deletions.
|
|
854
|
+
|
|
855
|
+
Logic:
|
|
856
|
+
- If the last record is a tool result (TOOL/FUNCTION) with a
|
|
857
|
+
tool_call_id, find the matching assistant call anywhere in history
|
|
858
|
+
and return both indices.
|
|
859
|
+
- If the last record is an assistant tool call without a result yet,
|
|
860
|
+
return just that index.
|
|
861
|
+
- For normal messages (non tool-related): remove just the last one.
|
|
862
|
+
- Fallback: If no tool_call_id is available, use heuristic (last 2 if
|
|
863
|
+
tool-related, otherwise last 1).
|
|
864
|
+
|
|
865
|
+
Returns:
|
|
866
|
+
List[int]: Indices to remove (may be non-contiguous).
|
|
867
|
+
"""
|
|
868
|
+
if not recent_records:
|
|
869
|
+
return []
|
|
870
|
+
|
|
871
|
+
last_idx = len(recent_records) - 1
|
|
872
|
+
last_record = recent_records[last_idx].memory_record
|
|
873
|
+
|
|
874
|
+
# Case A: Last is an ASSISTANT tool call with no result yet
|
|
875
|
+
if (
|
|
876
|
+
last_record.role_at_backend == OpenAIBackendRole.ASSISTANT
|
|
877
|
+
and isinstance(last_record.message, FunctionCallingMessage)
|
|
878
|
+
and last_record.message.result is None
|
|
879
|
+
):
|
|
880
|
+
return [last_idx]
|
|
881
|
+
|
|
882
|
+
# Case B: Last is TOOL/FUNCTION result, try id-based pairing
|
|
883
|
+
if last_record.role_at_backend in {
|
|
884
|
+
OpenAIBackendRole.TOOL,
|
|
885
|
+
OpenAIBackendRole.FUNCTION,
|
|
886
|
+
}:
|
|
887
|
+
tool_id = None
|
|
888
|
+
if isinstance(last_record.message, FunctionCallingMessage):
|
|
889
|
+
tool_id = last_record.message.tool_call_id
|
|
890
|
+
|
|
891
|
+
if tool_id:
|
|
892
|
+
for idx in range(len(recent_records) - 2, -1, -1):
|
|
893
|
+
rec = recent_records[idx].memory_record
|
|
894
|
+
if rec.role_at_backend != OpenAIBackendRole.ASSISTANT:
|
|
895
|
+
continue
|
|
896
|
+
|
|
897
|
+
# Check if this assistant message contains the tool_call_id
|
|
898
|
+
matched = False
|
|
899
|
+
|
|
900
|
+
# Case 1: FunctionCallingMessage (single tool call)
|
|
901
|
+
if isinstance(rec.message, FunctionCallingMessage):
|
|
902
|
+
if rec.message.tool_call_id == tool_id:
|
|
903
|
+
matched = True
|
|
904
|
+
|
|
905
|
+
# Case 2: BaseMessage with multiple tool_calls in meta_dict
|
|
906
|
+
elif (
|
|
907
|
+
hasattr(rec.message, "meta_dict")
|
|
908
|
+
and rec.message.meta_dict
|
|
909
|
+
):
|
|
910
|
+
tool_calls_list = rec.message.meta_dict.get(
|
|
911
|
+
"tool_calls", []
|
|
912
|
+
)
|
|
913
|
+
if isinstance(tool_calls_list, list):
|
|
914
|
+
for tc in tool_calls_list:
|
|
915
|
+
if (
|
|
916
|
+
isinstance(tc, dict)
|
|
917
|
+
and tc.get("id") == tool_id
|
|
918
|
+
):
|
|
919
|
+
matched = True
|
|
920
|
+
break
|
|
921
|
+
|
|
922
|
+
if matched:
|
|
923
|
+
# Return both assistant call and tool result indices
|
|
924
|
+
return [idx, last_idx]
|
|
925
|
+
|
|
926
|
+
# Fallback: no tool_call_id, use heuristic
|
|
927
|
+
if self._is_tool_related_record(last_record):
|
|
928
|
+
# Remove last 2 (assume they are paired)
|
|
929
|
+
return [last_idx - 1, last_idx] if last_idx > 0 else [last_idx]
|
|
930
|
+
else:
|
|
931
|
+
return [last_idx]
|
|
932
|
+
|
|
933
|
+
# Default: non tool-related tail => remove last one
|
|
934
|
+
return [last_idx]
|
|
935
|
+
|
|
936
|
+
@staticmethod
|
|
937
|
+
def _serialize_tool_args(args: Dict[str, Any]) -> str:
|
|
938
|
+
try:
|
|
939
|
+
return json.dumps(args, ensure_ascii=False, sort_keys=True)
|
|
940
|
+
except TypeError:
|
|
941
|
+
return str(args)
|
|
942
|
+
|
|
943
|
+
@classmethod
|
|
944
|
+
def _build_tool_signature(
|
|
945
|
+
cls, func_name: str, args: Dict[str, Any]
|
|
946
|
+
) -> str:
|
|
947
|
+
args_repr = cls._serialize_tool_args(args)
|
|
948
|
+
return f"{func_name}:{args_repr}"
|
|
949
|
+
|
|
950
|
+
def _describe_tool_call(
|
|
951
|
+
self, record: Optional[ToolCallingRecord]
|
|
952
|
+
) -> Optional[str]:
|
|
953
|
+
if record is None:
|
|
954
|
+
return None
|
|
955
|
+
args_repr = self._serialize_tool_args(record.args)
|
|
956
|
+
return f"Tool `{record.tool_name}` invoked with arguments {args_repr}."
|
|
957
|
+
|
|
958
|
+
def _update_last_tool_call_state(
|
|
959
|
+
self, record: Optional[ToolCallingRecord]
|
|
960
|
+
) -> None:
|
|
961
|
+
"""Track the most recent tool call and its identifying signature."""
|
|
962
|
+
self._last_tool_call_record = record
|
|
963
|
+
if record is None:
|
|
964
|
+
self._last_tool_call_signature = None
|
|
965
|
+
return
|
|
966
|
+
|
|
967
|
+
args = (
|
|
968
|
+
record.args
|
|
969
|
+
if isinstance(record.args, dict)
|
|
970
|
+
else {"_raw": record.args}
|
|
971
|
+
)
|
|
972
|
+
try:
|
|
973
|
+
signature = self._build_tool_signature(record.tool_name, args)
|
|
974
|
+
except Exception: # pragma: no cover - defensive guard
|
|
975
|
+
signature = None
|
|
976
|
+
self._last_tool_call_signature = signature
|
|
977
|
+
|
|
978
|
+
def _format_tool_limit_notice(self) -> Optional[str]:
|
|
979
|
+
record = self._last_tool_call_record
|
|
980
|
+
description = self._describe_tool_call(record)
|
|
981
|
+
if description is None:
|
|
982
|
+
return None
|
|
983
|
+
notice_lines = [
|
|
984
|
+
"[Tool Call Causing Token Limit]",
|
|
985
|
+
description,
|
|
986
|
+
]
|
|
987
|
+
|
|
988
|
+
if record is not None:
|
|
989
|
+
result = record.result
|
|
990
|
+
if isinstance(result, bytes):
|
|
991
|
+
result_repr = result.decode(errors="replace")
|
|
992
|
+
elif isinstance(result, str):
|
|
993
|
+
result_repr = result
|
|
994
|
+
else:
|
|
995
|
+
try:
|
|
996
|
+
result_repr = json.dumps(
|
|
997
|
+
result, ensure_ascii=False, sort_keys=True
|
|
998
|
+
)
|
|
999
|
+
except (TypeError, ValueError):
|
|
1000
|
+
result_repr = str(result)
|
|
1001
|
+
|
|
1002
|
+
result_length = len(result_repr)
|
|
1003
|
+
notice_lines.append(f"Tool result length: {result_length}")
|
|
1004
|
+
if self.model_backend.token_limit != 999999999:
|
|
1005
|
+
notice_lines.append(
|
|
1006
|
+
f"Token limit: {self.model_backend.token_limit}"
|
|
1007
|
+
)
|
|
1008
|
+
|
|
1009
|
+
return "\n".join(notice_lines)
|
|
1010
|
+
|
|
1011
|
+
@staticmethod
|
|
1012
|
+
def _append_user_messages_section(
|
|
1013
|
+
summary_content: str, user_messages: List[str]
|
|
1014
|
+
) -> str:
|
|
1015
|
+
section_title = "- **All User Messages**:"
|
|
1016
|
+
sanitized_messages: List[str] = []
|
|
1017
|
+
for msg in user_messages:
|
|
1018
|
+
if not isinstance(msg, str):
|
|
1019
|
+
msg = str(msg)
|
|
1020
|
+
cleaned = " ".join(msg.strip().splitlines())
|
|
1021
|
+
if cleaned:
|
|
1022
|
+
sanitized_messages.append(cleaned)
|
|
1023
|
+
|
|
1024
|
+
bullet_block = (
|
|
1025
|
+
"\n".join(f"- {m}" for m in sanitized_messages)
|
|
1026
|
+
if sanitized_messages
|
|
1027
|
+
else "- None noted"
|
|
1028
|
+
)
|
|
1029
|
+
user_section = f"{section_title}\n{bullet_block}"
|
|
1030
|
+
|
|
1031
|
+
summary_clean = summary_content.rstrip()
|
|
1032
|
+
separator = "\n\n" if summary_clean else ""
|
|
1033
|
+
return f"{summary_clean}{separator}{user_section}"
|
|
1034
|
+
|
|
1035
|
+
def _reset_summary_state(self) -> None:
|
|
1036
|
+
self._summary_token_count = 0 # Total tokens in summary messages
|
|
1037
|
+
|
|
1038
|
+
def _calculate_next_summary_threshold(self) -> int:
|
|
1039
|
+
r"""Calculate the next token threshold that should trigger
|
|
1040
|
+
summarization.
|
|
1041
|
+
|
|
1042
|
+
The threshold calculation follows a progressive strategy:
|
|
1043
|
+
- First time: token_limit * (summarize_threshold / 100)
|
|
1044
|
+
- Subsequent times: (limit - summary_token) / 2 + summary_token
|
|
1045
|
+
|
|
1046
|
+
This ensures that as summaries accumulate, the threshold adapts
|
|
1047
|
+
to maintain a reasonable balance between context and summaries.
|
|
1048
|
+
|
|
1049
|
+
Returns:
|
|
1050
|
+
int: The token count threshold for next summarization.
|
|
1051
|
+
"""
|
|
1052
|
+
token_limit = self.model_backend.token_limit
|
|
1053
|
+
summary_token_count = self._summary_token_count
|
|
1054
|
+
|
|
1055
|
+
# First summarization: use the percentage threshold
|
|
1056
|
+
if summary_token_count == 0:
|
|
1057
|
+
threshold = int(token_limit * self.summarize_threshold / 100)
|
|
1058
|
+
else:
|
|
1059
|
+
# Subsequent summarizations: adaptive threshold
|
|
1060
|
+
threshold = int(
|
|
1061
|
+
(token_limit - summary_token_count)
|
|
1062
|
+
* self.summarize_threshold
|
|
1063
|
+
/ 100
|
|
1064
|
+
+ summary_token_count
|
|
1065
|
+
)
|
|
1066
|
+
|
|
1067
|
+
return threshold
|
|
1068
|
+
|
|
1069
|
+
def _update_memory_with_summary(
|
|
1070
|
+
self, summary: str, include_summaries: bool = False
|
|
1071
|
+
) -> None:
|
|
1072
|
+
r"""Update memory with summary result.
|
|
1073
|
+
|
|
1074
|
+
This method handles memory clearing and restoration of summaries based
|
|
1075
|
+
on whether it's a progressive or full compression.
|
|
1076
|
+
"""
|
|
1077
|
+
|
|
1078
|
+
summary_content: str = summary
|
|
1079
|
+
|
|
1080
|
+
existing_summaries = []
|
|
1081
|
+
if not include_summaries:
|
|
1082
|
+
messages, _ = self.memory.get_context()
|
|
1083
|
+
for msg in messages:
|
|
1084
|
+
content = msg.get('content', '')
|
|
1085
|
+
if isinstance(content, str) and content.startswith(
|
|
1086
|
+
'[CONTEXT_SUMMARY]'
|
|
1087
|
+
):
|
|
1088
|
+
existing_summaries.append(msg)
|
|
1089
|
+
|
|
1090
|
+
# Clear memory
|
|
1091
|
+
self.clear_memory()
|
|
1092
|
+
|
|
1093
|
+
# Restore old summaries (for progressive compression)
|
|
1094
|
+
for old_summary in existing_summaries:
|
|
1095
|
+
content = old_summary.get('content', '')
|
|
1096
|
+
if not isinstance(content, str):
|
|
1097
|
+
content = str(content)
|
|
1098
|
+
summary_msg = BaseMessage.make_assistant_message(
|
|
1099
|
+
role_name="assistant", content=content
|
|
1100
|
+
)
|
|
1101
|
+
self.update_memory(summary_msg, OpenAIBackendRole.ASSISTANT)
|
|
1102
|
+
|
|
1103
|
+
# Add new summary
|
|
1104
|
+
new_summary_msg = BaseMessage.make_assistant_message(
|
|
1105
|
+
role_name="assistant", content=summary_content
|
|
1106
|
+
)
|
|
1107
|
+
self.update_memory(new_summary_msg, OpenAIBackendRole.ASSISTANT)
|
|
1108
|
+
input_message = BaseMessage.make_assistant_message(
|
|
1109
|
+
role_name="assistant",
|
|
1110
|
+
content=(
|
|
1111
|
+
"Please continue the conversation from "
|
|
1112
|
+
"where we left it off without asking the user any further "
|
|
1113
|
+
"questions. Continue with the last task that you were "
|
|
1114
|
+
"asked to work on."
|
|
1115
|
+
),
|
|
1116
|
+
)
|
|
1117
|
+
self.update_memory(input_message, OpenAIBackendRole.ASSISTANT)
|
|
1118
|
+
# Update token count
|
|
1119
|
+
try:
|
|
1120
|
+
summary_tokens = (
|
|
1121
|
+
self.model_backend.token_counter.count_tokens_from_messages(
|
|
1122
|
+
[{"role": "assistant", "content": summary_content}]
|
|
1123
|
+
)
|
|
1124
|
+
)
|
|
1125
|
+
|
|
1126
|
+
if include_summaries: # Full compression - reset count
|
|
1127
|
+
self._summary_token_count = summary_tokens
|
|
1128
|
+
logger.info(
|
|
1129
|
+
f"Full compression: Summary with {summary_tokens} tokens. "
|
|
1130
|
+
f"Total summary tokens reset to: {summary_tokens}"
|
|
1131
|
+
)
|
|
1132
|
+
else: # Progressive compression - accumulate
|
|
1133
|
+
self._summary_token_count += summary_tokens
|
|
1134
|
+
logger.info(
|
|
1135
|
+
f"Progressive compression: New summary "
|
|
1136
|
+
f"with {summary_tokens} tokens. "
|
|
1137
|
+
f"Total summary tokens: "
|
|
1138
|
+
f"{self._summary_token_count}"
|
|
1139
|
+
)
|
|
1140
|
+
except Exception as e:
|
|
1141
|
+
logger.warning(f"Failed to count summary tokens: {e}")
|
|
1142
|
+
|
|
711
1143
|
def _get_external_tool_names(self) -> Set[str]:
|
|
712
1144
|
r"""Returns a set of external tool names."""
|
|
713
1145
|
return set(self._external_tool_schemas.keys())
|
|
@@ -722,6 +1154,282 @@ class ChatAgent(BaseAgent):
|
|
|
722
1154
|
for tool in tools:
|
|
723
1155
|
self.add_tool(tool)
|
|
724
1156
|
|
|
1157
|
+
def _serialize_tool_result(self, result: Any) -> str:
|
|
1158
|
+
if isinstance(result, str):
|
|
1159
|
+
return result
|
|
1160
|
+
try:
|
|
1161
|
+
return json.dumps(result, ensure_ascii=False)
|
|
1162
|
+
except (TypeError, ValueError):
|
|
1163
|
+
return str(result)
|
|
1164
|
+
|
|
1165
|
+
def _clean_snapshot_line(self, line: str) -> str:
|
|
1166
|
+
r"""Clean a single snapshot line by removing prefixes and references.
|
|
1167
|
+
|
|
1168
|
+
This method handles snapshot lines in the format:
|
|
1169
|
+
- [prefix] "quoted text" [attributes] [ref=...]: description
|
|
1170
|
+
|
|
1171
|
+
It preserves:
|
|
1172
|
+
- Quoted text content (including brackets inside quotes)
|
|
1173
|
+
- Description text after the colon
|
|
1174
|
+
|
|
1175
|
+
It removes:
|
|
1176
|
+
- Line prefixes (e.g., "- button", "- tooltip", "generic:")
|
|
1177
|
+
- Attribute markers (e.g., [disabled], [ref=e47])
|
|
1178
|
+
- Lines with only element types
|
|
1179
|
+
- All indentation
|
|
1180
|
+
|
|
1181
|
+
Args:
|
|
1182
|
+
line: The original line content.
|
|
1183
|
+
|
|
1184
|
+
Returns:
|
|
1185
|
+
The cleaned line content, or empty string if line should be
|
|
1186
|
+
removed.
|
|
1187
|
+
"""
|
|
1188
|
+
original = line.strip()
|
|
1189
|
+
if not original:
|
|
1190
|
+
return ''
|
|
1191
|
+
|
|
1192
|
+
# Check if line is just an element type marker
|
|
1193
|
+
# (e.g., "- generic:", "button:")
|
|
1194
|
+
if re.match(r'^(?:-\s+)?\w+\s*:?\s*$', original):
|
|
1195
|
+
return ''
|
|
1196
|
+
|
|
1197
|
+
# Remove element type prefix
|
|
1198
|
+
line = re.sub(r'^(?:-\s+)?\w+[\s:]+', '', original)
|
|
1199
|
+
|
|
1200
|
+
# Remove bracket markers while preserving quoted text
|
|
1201
|
+
quoted_parts = []
|
|
1202
|
+
|
|
1203
|
+
def save_quoted(match):
|
|
1204
|
+
quoted_parts.append(match.group(0))
|
|
1205
|
+
return f'__QUOTED_{len(quoted_parts)-1}__'
|
|
1206
|
+
|
|
1207
|
+
line = re.sub(r'"[^"]*"', save_quoted, line)
|
|
1208
|
+
line = re.sub(r'\s*\[[^\]]+\]\s*', ' ', line)
|
|
1209
|
+
|
|
1210
|
+
for i, quoted in enumerate(quoted_parts):
|
|
1211
|
+
line = line.replace(f'__QUOTED_{i}__', quoted)
|
|
1212
|
+
|
|
1213
|
+
# Clean up formatting
|
|
1214
|
+
line = re.sub(r'\s+', ' ', line).strip()
|
|
1215
|
+
line = re.sub(r'\s*:\s*', ': ', line)
|
|
1216
|
+
line = line.lstrip(': ').strip()
|
|
1217
|
+
|
|
1218
|
+
return '' if not line else line
|
|
1219
|
+
|
|
1220
|
+
def _clean_snapshot_content(self, content: str) -> str:
|
|
1221
|
+
r"""Clean snapshot content by removing prefixes, references, and
|
|
1222
|
+
deduplicating lines.
|
|
1223
|
+
|
|
1224
|
+
This method identifies snapshot lines (containing element keywords or
|
|
1225
|
+
references) and cleans them while preserving non-snapshot content.
|
|
1226
|
+
It also handles JSON-formatted tool outputs with snapshot fields.
|
|
1227
|
+
|
|
1228
|
+
Args:
|
|
1229
|
+
content: The original snapshot content.
|
|
1230
|
+
|
|
1231
|
+
Returns:
|
|
1232
|
+
The cleaned content with deduplicated lines.
|
|
1233
|
+
"""
|
|
1234
|
+
try:
|
|
1235
|
+
import json
|
|
1236
|
+
|
|
1237
|
+
data = json.loads(content)
|
|
1238
|
+
modified = False
|
|
1239
|
+
|
|
1240
|
+
def clean_json_value(obj):
|
|
1241
|
+
nonlocal modified
|
|
1242
|
+
if isinstance(obj, dict):
|
|
1243
|
+
result = {}
|
|
1244
|
+
for key, value in obj.items():
|
|
1245
|
+
if key == 'snapshot' and isinstance(value, str):
|
|
1246
|
+
try:
|
|
1247
|
+
decoded_value = value.encode().decode(
|
|
1248
|
+
'unicode_escape'
|
|
1249
|
+
)
|
|
1250
|
+
except (UnicodeDecodeError, AttributeError):
|
|
1251
|
+
decoded_value = value
|
|
1252
|
+
|
|
1253
|
+
needs_cleaning = (
|
|
1254
|
+
'- ' in decoded_value
|
|
1255
|
+
or '[ref=' in decoded_value
|
|
1256
|
+
or any(
|
|
1257
|
+
elem + ':' in decoded_value
|
|
1258
|
+
for elem in [
|
|
1259
|
+
'generic',
|
|
1260
|
+
'img',
|
|
1261
|
+
'banner',
|
|
1262
|
+
'list',
|
|
1263
|
+
'listitem',
|
|
1264
|
+
'search',
|
|
1265
|
+
'navigation',
|
|
1266
|
+
]
|
|
1267
|
+
)
|
|
1268
|
+
)
|
|
1269
|
+
|
|
1270
|
+
if needs_cleaning:
|
|
1271
|
+
cleaned_snapshot = self._clean_text_snapshot(
|
|
1272
|
+
decoded_value
|
|
1273
|
+
)
|
|
1274
|
+
result[key] = cleaned_snapshot
|
|
1275
|
+
modified = True
|
|
1276
|
+
else:
|
|
1277
|
+
result[key] = value
|
|
1278
|
+
else:
|
|
1279
|
+
result[key] = clean_json_value(value)
|
|
1280
|
+
return result
|
|
1281
|
+
elif isinstance(obj, list):
|
|
1282
|
+
return [clean_json_value(item) for item in obj]
|
|
1283
|
+
else:
|
|
1284
|
+
return obj
|
|
1285
|
+
|
|
1286
|
+
cleaned_data = clean_json_value(data)
|
|
1287
|
+
|
|
1288
|
+
if modified:
|
|
1289
|
+
return json.dumps(cleaned_data, ensure_ascii=False, indent=4)
|
|
1290
|
+
else:
|
|
1291
|
+
return content
|
|
1292
|
+
|
|
1293
|
+
except (json.JSONDecodeError, TypeError):
|
|
1294
|
+
return self._clean_text_snapshot(content)
|
|
1295
|
+
|
|
1296
|
+
def _clean_text_snapshot(self, content: str) -> str:
|
|
1297
|
+
r"""Clean plain text snapshot content.
|
|
1298
|
+
|
|
1299
|
+
This method:
|
|
1300
|
+
- Removes all indentation
|
|
1301
|
+
- Deletes empty lines
|
|
1302
|
+
- Deduplicates all lines
|
|
1303
|
+
- Cleans snapshot-specific markers
|
|
1304
|
+
|
|
1305
|
+
Args:
|
|
1306
|
+
content: The original snapshot text.
|
|
1307
|
+
|
|
1308
|
+
Returns:
|
|
1309
|
+
The cleaned content with deduplicated lines, no indentation,
|
|
1310
|
+
and no empty lines.
|
|
1311
|
+
"""
|
|
1312
|
+
lines = content.split('\n')
|
|
1313
|
+
cleaned_lines = []
|
|
1314
|
+
seen = set()
|
|
1315
|
+
|
|
1316
|
+
for line in lines:
|
|
1317
|
+
stripped_line = line.strip()
|
|
1318
|
+
|
|
1319
|
+
if not stripped_line:
|
|
1320
|
+
continue
|
|
1321
|
+
|
|
1322
|
+
# Skip metadata lines (like "- /url:", "- /ref:")
|
|
1323
|
+
if re.match(r'^-?\s*/\w+\s*:', stripped_line):
|
|
1324
|
+
continue
|
|
1325
|
+
|
|
1326
|
+
is_snapshot_line = '[ref=' in stripped_line or re.match(
|
|
1327
|
+
r'^(?:-\s+)?\w+(?:[\s:]|$)', stripped_line
|
|
1328
|
+
)
|
|
1329
|
+
|
|
1330
|
+
if is_snapshot_line:
|
|
1331
|
+
cleaned = self._clean_snapshot_line(stripped_line)
|
|
1332
|
+
if cleaned and cleaned not in seen:
|
|
1333
|
+
cleaned_lines.append(cleaned)
|
|
1334
|
+
seen.add(cleaned)
|
|
1335
|
+
else:
|
|
1336
|
+
if stripped_line not in seen:
|
|
1337
|
+
cleaned_lines.append(stripped_line)
|
|
1338
|
+
seen.add(stripped_line)
|
|
1339
|
+
|
|
1340
|
+
return '\n'.join(cleaned_lines)
|
|
1341
|
+
|
|
1342
|
+
def _register_tool_output_for_cache(
|
|
1343
|
+
self,
|
|
1344
|
+
func_name: str,
|
|
1345
|
+
tool_call_id: str,
|
|
1346
|
+
result_text: str,
|
|
1347
|
+
records: List[MemoryRecord],
|
|
1348
|
+
) -> None:
|
|
1349
|
+
if not records:
|
|
1350
|
+
return
|
|
1351
|
+
|
|
1352
|
+
entry = _ToolOutputHistoryEntry(
|
|
1353
|
+
tool_name=func_name,
|
|
1354
|
+
tool_call_id=tool_call_id,
|
|
1355
|
+
result_text=result_text,
|
|
1356
|
+
record_uuids=[str(record.uuid) for record in records],
|
|
1357
|
+
record_timestamps=[record.timestamp for record in records],
|
|
1358
|
+
)
|
|
1359
|
+
self._tool_output_history.append(entry)
|
|
1360
|
+
self._process_tool_output_cache()
|
|
1361
|
+
|
|
1362
|
+
def _process_tool_output_cache(self) -> None:
|
|
1363
|
+
if not self._enable_snapshot_clean or not self._tool_output_history:
|
|
1364
|
+
return
|
|
1365
|
+
|
|
1366
|
+
# Only clean older results; keep the latest expanded for immediate use.
|
|
1367
|
+
for entry in self._tool_output_history[:-1]:
|
|
1368
|
+
if entry.cached:
|
|
1369
|
+
continue
|
|
1370
|
+
self._clean_snapshot_in_memory(entry)
|
|
1371
|
+
|
|
1372
|
+
def _clean_snapshot_in_memory(
|
|
1373
|
+
self, entry: _ToolOutputHistoryEntry
|
|
1374
|
+
) -> None:
|
|
1375
|
+
if not entry.record_uuids:
|
|
1376
|
+
return
|
|
1377
|
+
|
|
1378
|
+
# Clean snapshot markers and references from historical tool output
|
|
1379
|
+
result_text = entry.result_text
|
|
1380
|
+
if '- ' in result_text and '[ref=' in result_text:
|
|
1381
|
+
cleaned_result = self._clean_snapshot_content(result_text)
|
|
1382
|
+
|
|
1383
|
+
# Update the message in memory storage
|
|
1384
|
+
timestamp = (
|
|
1385
|
+
entry.record_timestamps[0]
|
|
1386
|
+
if entry.record_timestamps
|
|
1387
|
+
else time.time_ns() / 1_000_000_000
|
|
1388
|
+
)
|
|
1389
|
+
cleaned_message = FunctionCallingMessage(
|
|
1390
|
+
role_name=self.role_name,
|
|
1391
|
+
role_type=self.role_type,
|
|
1392
|
+
meta_dict={},
|
|
1393
|
+
content="",
|
|
1394
|
+
func_name=entry.tool_name,
|
|
1395
|
+
result=cleaned_result,
|
|
1396
|
+
tool_call_id=entry.tool_call_id,
|
|
1397
|
+
)
|
|
1398
|
+
|
|
1399
|
+
chat_history_block = getattr(
|
|
1400
|
+
self.memory, "_chat_history_block", None
|
|
1401
|
+
)
|
|
1402
|
+
storage = getattr(chat_history_block, "storage", None)
|
|
1403
|
+
if storage is None:
|
|
1404
|
+
return
|
|
1405
|
+
|
|
1406
|
+
existing_records = storage.load()
|
|
1407
|
+
updated_records = [
|
|
1408
|
+
record
|
|
1409
|
+
for record in existing_records
|
|
1410
|
+
if record["uuid"] not in entry.record_uuids
|
|
1411
|
+
]
|
|
1412
|
+
new_record = MemoryRecord(
|
|
1413
|
+
message=cleaned_message,
|
|
1414
|
+
role_at_backend=OpenAIBackendRole.FUNCTION,
|
|
1415
|
+
timestamp=timestamp,
|
|
1416
|
+
agent_id=self.agent_id,
|
|
1417
|
+
)
|
|
1418
|
+
updated_records.append(new_record.to_dict())
|
|
1419
|
+
updated_records.sort(key=lambda record: record["timestamp"])
|
|
1420
|
+
storage.clear()
|
|
1421
|
+
storage.save(updated_records)
|
|
1422
|
+
|
|
1423
|
+
logger.info(
|
|
1424
|
+
"Cleaned snapshot in memory for tool output '%s' (%s)",
|
|
1425
|
+
entry.tool_name,
|
|
1426
|
+
entry.tool_call_id,
|
|
1427
|
+
)
|
|
1428
|
+
|
|
1429
|
+
entry.cached = True
|
|
1430
|
+
entry.record_uuids = [str(new_record.uuid)]
|
|
1431
|
+
entry.record_timestamps = [timestamp]
|
|
1432
|
+
|
|
725
1433
|
def add_external_tool(
|
|
726
1434
|
self, tool: Union[FunctionTool, Callable, Dict[str, Any]]
|
|
727
1435
|
) -> None:
|
|
@@ -766,19 +1474,10 @@ class ChatAgent(BaseAgent):
|
|
|
766
1474
|
message: BaseMessage,
|
|
767
1475
|
role: OpenAIBackendRole,
|
|
768
1476
|
timestamp: Optional[float] = None,
|
|
769
|
-
|
|
1477
|
+
return_records: bool = False,
|
|
1478
|
+
) -> Optional[List[MemoryRecord]]:
|
|
770
1479
|
r"""Updates the agent memory with a new message.
|
|
771
1480
|
|
|
772
|
-
If the single *message* exceeds the model's context window, it will
|
|
773
|
-
be **automatically split into multiple smaller chunks** before being
|
|
774
|
-
written into memory. This prevents later failures in
|
|
775
|
-
`ScoreBasedContextCreator` where an over-sized message cannot fit
|
|
776
|
-
into the available token budget at all.
|
|
777
|
-
|
|
778
|
-
This slicing logic handles both regular text messages (in the
|
|
779
|
-
`content` field) and long tool call results (in the `result` field of
|
|
780
|
-
a `FunctionCallingMessage`).
|
|
781
|
-
|
|
782
1481
|
Args:
|
|
783
1482
|
message (BaseMessage): The new message to add to the stored
|
|
784
1483
|
messages.
|
|
@@ -786,168 +1485,41 @@ class ChatAgent(BaseAgent):
|
|
|
786
1485
|
timestamp (Optional[float], optional): Custom timestamp for the
|
|
787
1486
|
memory record. If `None`, the current time will be used.
|
|
788
1487
|
(default: :obj:`None`)
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
import time
|
|
793
|
-
import uuid as _uuid
|
|
794
|
-
|
|
795
|
-
# 1. Helper to write a record to memory
|
|
796
|
-
def _write_single_record(
|
|
797
|
-
message: BaseMessage, role: OpenAIBackendRole, timestamp: float
|
|
798
|
-
):
|
|
799
|
-
self.memory.write_record(
|
|
800
|
-
MemoryRecord(
|
|
801
|
-
message=message,
|
|
802
|
-
role_at_backend=role,
|
|
803
|
-
timestamp=timestamp,
|
|
804
|
-
agent_id=self.agent_id,
|
|
805
|
-
)
|
|
806
|
-
)
|
|
1488
|
+
return_records (bool, optional): When ``True`` the method returns
|
|
1489
|
+
the list of MemoryRecord objects written to memory.
|
|
1490
|
+
(default: :obj:`False`)
|
|
807
1491
|
|
|
808
|
-
|
|
809
|
-
|
|
1492
|
+
Returns:
|
|
1493
|
+
Optional[List[MemoryRecord]]: The records that were written when
|
|
1494
|
+
``return_records`` is ``True``; otherwise ``None``.
|
|
1495
|
+
"""
|
|
1496
|
+
record = MemoryRecord(
|
|
1497
|
+
message=message,
|
|
1498
|
+
role_at_backend=role,
|
|
1499
|
+
timestamp=timestamp
|
|
810
1500
|
if timestamp is not None
|
|
811
|
-
else time.time_ns() / 1_000_000_000
|
|
1501
|
+
else time.time_ns() / 1_000_000_000, # Nanosecond precision
|
|
1502
|
+
agent_id=self.agent_id,
|
|
812
1503
|
)
|
|
1504
|
+
self.memory.write_record(record)
|
|
813
1505
|
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
token_counter = context_creator.token_counter
|
|
818
|
-
token_limit = context_creator.token_limit
|
|
819
|
-
except AttributeError:
|
|
820
|
-
_write_single_record(message, role, base_ts)
|
|
821
|
-
return
|
|
1506
|
+
if return_records:
|
|
1507
|
+
return [record]
|
|
1508
|
+
return None
|
|
822
1509
|
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
current_tokens = token_counter.count_tokens_from_messages(
|
|
826
|
-
[message.to_openai_message(role)]
|
|
827
|
-
)
|
|
828
|
-
_, ctx_tokens = self.memory.get_context()
|
|
829
|
-
remaining_budget = max(0, token_limit - ctx_tokens)
|
|
1510
|
+
def load_memory(self, memory: AgentMemory) -> None:
|
|
1511
|
+
r"""Load the provided memory into the agent.
|
|
830
1512
|
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
return
|
|
834
|
-
except Exception as e:
|
|
835
|
-
logger.warning(
|
|
836
|
-
f"Token calculation failed before chunking, "
|
|
837
|
-
f"writing message as-is. Error: {e}"
|
|
838
|
-
)
|
|
839
|
-
_write_single_record(message, role, base_ts)
|
|
840
|
-
return
|
|
1513
|
+
Args:
|
|
1514
|
+
memory (AgentMemory): The memory to load into the agent.
|
|
841
1515
|
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
f"of {remaining_budget}. Slicing into smaller chunks."
|
|
846
|
-
)
|
|
1516
|
+
Returns:
|
|
1517
|
+
None
|
|
1518
|
+
"""
|
|
847
1519
|
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
if isinstance(message, FunctionCallingMessage) and isinstance(
|
|
852
|
-
message.result, str
|
|
853
|
-
):
|
|
854
|
-
text_to_chunk = message.result
|
|
855
|
-
is_function_result = True
|
|
856
|
-
elif isinstance(message.content, str):
|
|
857
|
-
text_to_chunk = message.content
|
|
858
|
-
|
|
859
|
-
if not text_to_chunk or not text_to_chunk.strip():
|
|
860
|
-
_write_single_record(message, role, base_ts)
|
|
861
|
-
return
|
|
862
|
-
# Encode the entire text to get a list of all token IDs
|
|
863
|
-
try:
|
|
864
|
-
all_token_ids = token_counter.encode(text_to_chunk)
|
|
865
|
-
except Exception as e:
|
|
866
|
-
logger.error(f"Failed to encode text for chunking: {e}")
|
|
867
|
-
_write_single_record(message, role, base_ts) # Fallback
|
|
868
|
-
return
|
|
869
|
-
|
|
870
|
-
if not all_token_ids:
|
|
871
|
-
_write_single_record(message, role, base_ts) # Nothing to chunk
|
|
872
|
-
return
|
|
873
|
-
|
|
874
|
-
# 1. Base chunk size: one-tenth of the smaller of (a) total token
|
|
875
|
-
# limit and (b) current remaining budget. This prevents us from
|
|
876
|
-
# creating chunks that are guaranteed to overflow the
|
|
877
|
-
# immediate context window.
|
|
878
|
-
base_chunk_size = max(1, remaining_budget) // 10
|
|
879
|
-
|
|
880
|
-
# 2. Each chunk gets a textual prefix such as:
|
|
881
|
-
# "[chunk 3/12 of a long message]\n"
|
|
882
|
-
# The prefix itself consumes tokens, so if we do not subtract its
|
|
883
|
-
# length the *total* tokens of the outgoing message (prefix + body)
|
|
884
|
-
# can exceed the intended bound. We estimate the prefix length
|
|
885
|
-
# with a representative example that is safely long enough for the
|
|
886
|
-
# vast majority of cases (three-digit indices).
|
|
887
|
-
sample_prefix = "[chunk 1/1000 of a long message]\n"
|
|
888
|
-
prefix_token_len = len(token_counter.encode(sample_prefix))
|
|
889
|
-
|
|
890
|
-
# 3. The real capacity for the message body is therefore the base
|
|
891
|
-
# chunk size minus the prefix length. Fallback to at least one
|
|
892
|
-
# token to avoid zero or negative sizes.
|
|
893
|
-
chunk_body_limit = max(1, base_chunk_size - prefix_token_len)
|
|
894
|
-
|
|
895
|
-
# 4. Calculate how many chunks we will need with this body size.
|
|
896
|
-
num_chunks = math.ceil(len(all_token_ids) / chunk_body_limit)
|
|
897
|
-
group_id = str(_uuid.uuid4())
|
|
898
|
-
|
|
899
|
-
for i in range(num_chunks):
|
|
900
|
-
start_idx = i * chunk_body_limit
|
|
901
|
-
end_idx = start_idx + chunk_body_limit
|
|
902
|
-
chunk_token_ids = all_token_ids[start_idx:end_idx]
|
|
903
|
-
|
|
904
|
-
chunk_body = token_counter.decode(chunk_token_ids)
|
|
905
|
-
|
|
906
|
-
prefix = f"[chunk {i + 1}/{num_chunks} of a long message]\n"
|
|
907
|
-
new_body = prefix + chunk_body
|
|
908
|
-
|
|
909
|
-
if is_function_result and isinstance(
|
|
910
|
-
message, FunctionCallingMessage
|
|
911
|
-
):
|
|
912
|
-
new_msg: BaseMessage = FunctionCallingMessage(
|
|
913
|
-
role_name=message.role_name,
|
|
914
|
-
role_type=message.role_type,
|
|
915
|
-
meta_dict=message.meta_dict,
|
|
916
|
-
content=message.content,
|
|
917
|
-
func_name=message.func_name,
|
|
918
|
-
args=message.args,
|
|
919
|
-
result=new_body,
|
|
920
|
-
tool_call_id=message.tool_call_id,
|
|
921
|
-
)
|
|
922
|
-
else:
|
|
923
|
-
new_msg = message.create_new_instance(new_body)
|
|
924
|
-
|
|
925
|
-
meta = (new_msg.meta_dict or {}).copy()
|
|
926
|
-
meta.update(
|
|
927
|
-
{
|
|
928
|
-
"chunk_idx": i + 1,
|
|
929
|
-
"chunk_total": num_chunks,
|
|
930
|
-
"chunk_group_id": group_id,
|
|
931
|
-
}
|
|
932
|
-
)
|
|
933
|
-
new_msg.meta_dict = meta
|
|
934
|
-
|
|
935
|
-
# Increment timestamp slightly to maintain order
|
|
936
|
-
_write_single_record(new_msg, role, base_ts + i * 1e-6)
|
|
937
|
-
|
|
938
|
-
def load_memory(self, memory: AgentMemory) -> None:
|
|
939
|
-
r"""Load the provided memory into the agent.
|
|
940
|
-
|
|
941
|
-
Args:
|
|
942
|
-
memory (AgentMemory): The memory to load into the agent.
|
|
943
|
-
|
|
944
|
-
Returns:
|
|
945
|
-
None
|
|
946
|
-
"""
|
|
947
|
-
|
|
948
|
-
for context_record in memory.retrieve():
|
|
949
|
-
self.memory.write_record(context_record.memory_record)
|
|
950
|
-
logger.info(f"Memory loaded from {memory}")
|
|
1520
|
+
for context_record in memory.retrieve():
|
|
1521
|
+
self.memory.write_record(context_record.memory_record)
|
|
1522
|
+
logger.info(f"Memory loaded from {memory}")
|
|
951
1523
|
|
|
952
1524
|
def load_memory_from_path(self, path: str) -> None:
|
|
953
1525
|
r"""Loads memory records from a JSON file filtered by this agent's ID.
|
|
@@ -1012,6 +1584,583 @@ class ChatAgent(BaseAgent):
|
|
|
1012
1584
|
json_store.save(to_save)
|
|
1013
1585
|
logger.info(f"Memory saved to {path}")
|
|
1014
1586
|
|
|
1587
|
+
def summarize(
|
|
1588
|
+
self,
|
|
1589
|
+
filename: Optional[str] = None,
|
|
1590
|
+
summary_prompt: Optional[str] = None,
|
|
1591
|
+
response_format: Optional[Type[BaseModel]] = None,
|
|
1592
|
+
working_directory: Optional[Union[str, Path]] = None,
|
|
1593
|
+
include_summaries: bool = False,
|
|
1594
|
+
add_user_messages: bool = True,
|
|
1595
|
+
) -> Dict[str, Any]:
|
|
1596
|
+
r"""Summarize the agent's current conversation context and persist it
|
|
1597
|
+
to a markdown file.
|
|
1598
|
+
|
|
1599
|
+
.. deprecated:: 0.2.80
|
|
1600
|
+
Use :meth:`asummarize` for async/await support and better
|
|
1601
|
+
performance in parallel summarization workflows.
|
|
1602
|
+
|
|
1603
|
+
Args:
|
|
1604
|
+
filename (Optional[str]): The base filename (without extension) to
|
|
1605
|
+
use for the markdown file. Defaults to a timestamped name when
|
|
1606
|
+
not provided.
|
|
1607
|
+
summary_prompt (Optional[str]): Custom prompt for the summarizer.
|
|
1608
|
+
When omitted, a default prompt highlighting key decisions,
|
|
1609
|
+
action items, and open questions is used.
|
|
1610
|
+
response_format (Optional[Type[BaseModel]]): A Pydantic model
|
|
1611
|
+
defining the expected structure of the response. If provided,
|
|
1612
|
+
the summary will be generated as structured output and included
|
|
1613
|
+
in the result.
|
|
1614
|
+
include_summaries (bool): Whether to include previously generated
|
|
1615
|
+
summaries in the content to be summarized. If False (default),
|
|
1616
|
+
only non-summary messages will be summarized. If True, all
|
|
1617
|
+
messages including previous summaries will be summarized
|
|
1618
|
+
(full compression). (default: :obj:`False`)
|
|
1619
|
+
working_directory (Optional[str|Path]): Optional directory to save
|
|
1620
|
+
the markdown summary file. If provided, overrides the default
|
|
1621
|
+
directory used by ContextUtility.
|
|
1622
|
+
add_user_messages (bool): Whether add user messages to summary.
|
|
1623
|
+
(default: :obj:`True`)
|
|
1624
|
+
Returns:
|
|
1625
|
+
Dict[str, Any]: A dictionary containing the summary text, file
|
|
1626
|
+
path, status message, and optionally structured_summary if
|
|
1627
|
+
response_format was provided.
|
|
1628
|
+
|
|
1629
|
+
See Also:
|
|
1630
|
+
:meth:`asummarize`: Async version for non-blocking LLM calls.
|
|
1631
|
+
"""
|
|
1632
|
+
|
|
1633
|
+
warnings.warn(
|
|
1634
|
+
"summarize() is synchronous. Consider using asummarize() "
|
|
1635
|
+
"for async/await support and better performance.",
|
|
1636
|
+
DeprecationWarning,
|
|
1637
|
+
stacklevel=2,
|
|
1638
|
+
)
|
|
1639
|
+
|
|
1640
|
+
result: Dict[str, Any] = {
|
|
1641
|
+
"summary": "",
|
|
1642
|
+
"file_path": None,
|
|
1643
|
+
"status": "",
|
|
1644
|
+
}
|
|
1645
|
+
|
|
1646
|
+
try:
|
|
1647
|
+
# Use external context if set, otherwise create local one
|
|
1648
|
+
if self._context_utility is None:
|
|
1649
|
+
if working_directory is not None:
|
|
1650
|
+
self._context_utility = ContextUtility(
|
|
1651
|
+
working_directory=str(working_directory)
|
|
1652
|
+
)
|
|
1653
|
+
else:
|
|
1654
|
+
self._context_utility = ContextUtility()
|
|
1655
|
+
context_util = self._context_utility
|
|
1656
|
+
|
|
1657
|
+
# Get conversation directly from agent's memory
|
|
1658
|
+
messages, _ = self.memory.get_context()
|
|
1659
|
+
|
|
1660
|
+
if not messages:
|
|
1661
|
+
status_message = (
|
|
1662
|
+
"No conversation context available to summarize."
|
|
1663
|
+
)
|
|
1664
|
+
result["status"] = status_message
|
|
1665
|
+
return result
|
|
1666
|
+
|
|
1667
|
+
# Convert messages to conversation text
|
|
1668
|
+
conversation_lines = []
|
|
1669
|
+
user_messages: List[str] = []
|
|
1670
|
+
for message in messages:
|
|
1671
|
+
role = message.get('role', 'unknown')
|
|
1672
|
+
content = message.get('content', '')
|
|
1673
|
+
|
|
1674
|
+
# Skip summary messages if include_summaries is False
|
|
1675
|
+
if not include_summaries and isinstance(content, str):
|
|
1676
|
+
# Check if this is a summary message by looking for marker
|
|
1677
|
+
if content.startswith('[CONTEXT_SUMMARY]'):
|
|
1678
|
+
continue
|
|
1679
|
+
|
|
1680
|
+
# Handle tool call messages (assistant calling tools)
|
|
1681
|
+
tool_calls = message.get('tool_calls')
|
|
1682
|
+
if tool_calls and isinstance(tool_calls, (list, tuple)):
|
|
1683
|
+
for tool_call in tool_calls:
|
|
1684
|
+
# Handle both dict and object formats
|
|
1685
|
+
if isinstance(tool_call, dict):
|
|
1686
|
+
func_name = tool_call.get('function', {}).get(
|
|
1687
|
+
'name', 'unknown_tool'
|
|
1688
|
+
)
|
|
1689
|
+
func_args_str = tool_call.get('function', {}).get(
|
|
1690
|
+
'arguments', '{}'
|
|
1691
|
+
)
|
|
1692
|
+
else:
|
|
1693
|
+
# Handle object format (Pydantic or similar)
|
|
1694
|
+
func_name = getattr(
|
|
1695
|
+
getattr(tool_call, 'function', None),
|
|
1696
|
+
'name',
|
|
1697
|
+
'unknown_tool',
|
|
1698
|
+
)
|
|
1699
|
+
func_args_str = getattr(
|
|
1700
|
+
getattr(tool_call, 'function', None),
|
|
1701
|
+
'arguments',
|
|
1702
|
+
'{}',
|
|
1703
|
+
)
|
|
1704
|
+
|
|
1705
|
+
# Parse and format arguments for readability
|
|
1706
|
+
try:
|
|
1707
|
+
import json
|
|
1708
|
+
|
|
1709
|
+
args_dict = json.loads(func_args_str)
|
|
1710
|
+
args_formatted = ', '.join(
|
|
1711
|
+
f"{k}={v}" for k, v in args_dict.items()
|
|
1712
|
+
)
|
|
1713
|
+
except (json.JSONDecodeError, ValueError, TypeError):
|
|
1714
|
+
args_formatted = func_args_str
|
|
1715
|
+
|
|
1716
|
+
conversation_lines.append(
|
|
1717
|
+
f"[TOOL CALL] {func_name}({args_formatted})"
|
|
1718
|
+
)
|
|
1719
|
+
|
|
1720
|
+
# Handle tool response messages
|
|
1721
|
+
elif role == 'tool':
|
|
1722
|
+
tool_name = message.get('name', 'unknown_tool')
|
|
1723
|
+
if not content:
|
|
1724
|
+
content = str(message.get('content', ''))
|
|
1725
|
+
conversation_lines.append(
|
|
1726
|
+
f"[TOOL RESULT] {tool_name} → {content}"
|
|
1727
|
+
)
|
|
1728
|
+
|
|
1729
|
+
# Handle regular content messages (user/assistant/system)
|
|
1730
|
+
elif content:
|
|
1731
|
+
content = str(content)
|
|
1732
|
+
if role == 'user':
|
|
1733
|
+
user_messages.append(content)
|
|
1734
|
+
conversation_lines.append(f"{role}: {content}")
|
|
1735
|
+
|
|
1736
|
+
conversation_text = "\n".join(conversation_lines).strip()
|
|
1737
|
+
|
|
1738
|
+
if not conversation_text:
|
|
1739
|
+
status_message = (
|
|
1740
|
+
"Conversation context is empty; skipping summary."
|
|
1741
|
+
)
|
|
1742
|
+
result["status"] = status_message
|
|
1743
|
+
return result
|
|
1744
|
+
|
|
1745
|
+
if self._context_summary_agent is None:
|
|
1746
|
+
self._context_summary_agent = ChatAgent(
|
|
1747
|
+
system_message=(
|
|
1748
|
+
"You are a helpful assistant that summarizes "
|
|
1749
|
+
"conversations"
|
|
1750
|
+
),
|
|
1751
|
+
model=self.model_backend,
|
|
1752
|
+
agent_id=f"{self.agent_id}_context_summarizer",
|
|
1753
|
+
summarize_threshold=None,
|
|
1754
|
+
)
|
|
1755
|
+
else:
|
|
1756
|
+
self._context_summary_agent.reset()
|
|
1757
|
+
|
|
1758
|
+
if summary_prompt:
|
|
1759
|
+
prompt_text = (
|
|
1760
|
+
f"{summary_prompt.rstrip()}\n\n"
|
|
1761
|
+
f"AGENT CONVERSATION TO BE SUMMARIZED:\n"
|
|
1762
|
+
f"{conversation_text}"
|
|
1763
|
+
)
|
|
1764
|
+
else:
|
|
1765
|
+
prompt_text = build_default_summary_prompt(conversation_text)
|
|
1766
|
+
|
|
1767
|
+
try:
|
|
1768
|
+
# Use structured output if response_format is provided
|
|
1769
|
+
if response_format:
|
|
1770
|
+
response = self._context_summary_agent.step(
|
|
1771
|
+
prompt_text, response_format=response_format
|
|
1772
|
+
)
|
|
1773
|
+
else:
|
|
1774
|
+
response = self._context_summary_agent.step(prompt_text)
|
|
1775
|
+
except Exception as step_exc:
|
|
1776
|
+
error_message = (
|
|
1777
|
+
f"Failed to generate summary using model: {step_exc}"
|
|
1778
|
+
)
|
|
1779
|
+
logger.error(error_message)
|
|
1780
|
+
result["status"] = error_message
|
|
1781
|
+
return result
|
|
1782
|
+
|
|
1783
|
+
if not response.msgs:
|
|
1784
|
+
status_message = (
|
|
1785
|
+
"Failed to generate summary from model response."
|
|
1786
|
+
)
|
|
1787
|
+
result["status"] = status_message
|
|
1788
|
+
return result
|
|
1789
|
+
|
|
1790
|
+
summary_content = response.msgs[-1].content.strip()
|
|
1791
|
+
if not summary_content:
|
|
1792
|
+
status_message = "Generated summary is empty."
|
|
1793
|
+
result["status"] = status_message
|
|
1794
|
+
return result
|
|
1795
|
+
|
|
1796
|
+
# handle structured output if response_format was provided
|
|
1797
|
+
structured_output = None
|
|
1798
|
+
if response_format and response.msgs[-1].parsed:
|
|
1799
|
+
structured_output = response.msgs[-1].parsed
|
|
1800
|
+
|
|
1801
|
+
# determine filename: use provided filename, or extract from
|
|
1802
|
+
# structured output, or generate timestamp
|
|
1803
|
+
if filename:
|
|
1804
|
+
base_filename = filename
|
|
1805
|
+
elif structured_output and hasattr(
|
|
1806
|
+
structured_output, 'task_title'
|
|
1807
|
+
):
|
|
1808
|
+
# use task_title from structured output for filename
|
|
1809
|
+
task_title = structured_output.task_title
|
|
1810
|
+
clean_title = ContextUtility.sanitize_workflow_filename(
|
|
1811
|
+
task_title
|
|
1812
|
+
)
|
|
1813
|
+
base_filename = (
|
|
1814
|
+
f"{clean_title}_workflow" if clean_title else "workflow"
|
|
1815
|
+
)
|
|
1816
|
+
else:
|
|
1817
|
+
base_filename = f"context_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}" # noqa: E501
|
|
1818
|
+
|
|
1819
|
+
base_filename = Path(base_filename).with_suffix("").name
|
|
1820
|
+
|
|
1821
|
+
metadata = context_util.get_session_metadata()
|
|
1822
|
+
metadata.update(
|
|
1823
|
+
{
|
|
1824
|
+
"agent_id": self.agent_id,
|
|
1825
|
+
"message_count": len(messages),
|
|
1826
|
+
}
|
|
1827
|
+
)
|
|
1828
|
+
|
|
1829
|
+
# convert structured output to custom markdown if present
|
|
1830
|
+
if structured_output:
|
|
1831
|
+
# convert structured output to custom markdown
|
|
1832
|
+
summary_content = context_util.structured_output_to_markdown(
|
|
1833
|
+
structured_data=structured_output, metadata=metadata
|
|
1834
|
+
)
|
|
1835
|
+
if add_user_messages:
|
|
1836
|
+
summary_content = self._append_user_messages_section(
|
|
1837
|
+
summary_content, user_messages
|
|
1838
|
+
)
|
|
1839
|
+
|
|
1840
|
+
# Save the markdown (either custom structured or default)
|
|
1841
|
+
save_status = context_util.save_markdown_file(
|
|
1842
|
+
base_filename,
|
|
1843
|
+
summary_content,
|
|
1844
|
+
title="Conversation Summary"
|
|
1845
|
+
if not structured_output
|
|
1846
|
+
else None,
|
|
1847
|
+
metadata=metadata if not structured_output else None,
|
|
1848
|
+
)
|
|
1849
|
+
|
|
1850
|
+
file_path = (
|
|
1851
|
+
context_util.get_working_directory() / f"{base_filename}.md"
|
|
1852
|
+
)
|
|
1853
|
+
summary_content = (
|
|
1854
|
+
f"[CONTEXT_SUMMARY] The following is a summary of our "
|
|
1855
|
+
f"conversation from a previous session: {summary_content}"
|
|
1856
|
+
)
|
|
1857
|
+
# Prepare result dictionary
|
|
1858
|
+
result_dict = {
|
|
1859
|
+
"summary": summary_content,
|
|
1860
|
+
"file_path": str(file_path),
|
|
1861
|
+
"status": save_status,
|
|
1862
|
+
"structured_summary": structured_output,
|
|
1863
|
+
}
|
|
1864
|
+
|
|
1865
|
+
result.update(result_dict)
|
|
1866
|
+
logger.info("Conversation summary saved to %s", file_path)
|
|
1867
|
+
return result
|
|
1868
|
+
|
|
1869
|
+
except Exception as exc:
|
|
1870
|
+
error_message = f"Failed to summarize conversation context: {exc}"
|
|
1871
|
+
logger.error(error_message)
|
|
1872
|
+
result["status"] = error_message
|
|
1873
|
+
return result
|
|
1874
|
+
|
|
1875
|
+
async def asummarize(
|
|
1876
|
+
self,
|
|
1877
|
+
filename: Optional[str] = None,
|
|
1878
|
+
summary_prompt: Optional[str] = None,
|
|
1879
|
+
response_format: Optional[Type[BaseModel]] = None,
|
|
1880
|
+
working_directory: Optional[Union[str, Path]] = None,
|
|
1881
|
+
include_summaries: bool = False,
|
|
1882
|
+
add_user_messages: bool = True,
|
|
1883
|
+
) -> Dict[str, Any]:
|
|
1884
|
+
r"""Asynchronously summarize the agent's current conversation context
|
|
1885
|
+
and persist it to a markdown file.
|
|
1886
|
+
|
|
1887
|
+
This is the async version of summarize() that uses astep() for
|
|
1888
|
+
non-blocking LLM calls, enabling parallel summarization of multiple
|
|
1889
|
+
agents.
|
|
1890
|
+
|
|
1891
|
+
Args:
|
|
1892
|
+
filename (Optional[str]): The base filename (without extension) to
|
|
1893
|
+
use for the markdown file. Defaults to a timestamped name when
|
|
1894
|
+
not provided.
|
|
1895
|
+
summary_prompt (Optional[str]): Custom prompt for the summarizer.
|
|
1896
|
+
When omitted, a default prompt highlighting key decisions,
|
|
1897
|
+
action items, and open questions is used.
|
|
1898
|
+
response_format (Optional[Type[BaseModel]]): A Pydantic model
|
|
1899
|
+
defining the expected structure of the response. If provided,
|
|
1900
|
+
the summary will be generated as structured output and included
|
|
1901
|
+
in the result.
|
|
1902
|
+
working_directory (Optional[str|Path]): Optional directory to save
|
|
1903
|
+
the markdown summary file. If provided, overrides the default
|
|
1904
|
+
directory used by ContextUtility.
|
|
1905
|
+
include_summaries (bool): Whether to include previously generated
|
|
1906
|
+
summaries in the content to be summarized. If False (default),
|
|
1907
|
+
only non-summary messages will be summarized. If True, all
|
|
1908
|
+
messages including previous summaries will be summarized
|
|
1909
|
+
(full compression). (default: :obj:`False`)
|
|
1910
|
+
add_user_messages (bool): Whether add user messages to summary.
|
|
1911
|
+
(default: :obj:`True`)
|
|
1912
|
+
Returns:
|
|
1913
|
+
Dict[str, Any]: A dictionary containing the summary text, file
|
|
1914
|
+
path, status message, and optionally structured_summary if
|
|
1915
|
+
response_format was provided.
|
|
1916
|
+
"""
|
|
1917
|
+
|
|
1918
|
+
result: Dict[str, Any] = {
|
|
1919
|
+
"summary": "",
|
|
1920
|
+
"file_path": None,
|
|
1921
|
+
"status": "",
|
|
1922
|
+
}
|
|
1923
|
+
|
|
1924
|
+
try:
|
|
1925
|
+
# Use external context if set, otherwise create local one
|
|
1926
|
+
if self._context_utility is None:
|
|
1927
|
+
if working_directory is not None:
|
|
1928
|
+
self._context_utility = ContextUtility(
|
|
1929
|
+
working_directory=str(working_directory)
|
|
1930
|
+
)
|
|
1931
|
+
else:
|
|
1932
|
+
self._context_utility = ContextUtility()
|
|
1933
|
+
context_util = self._context_utility
|
|
1934
|
+
|
|
1935
|
+
# Get conversation directly from agent's memory
|
|
1936
|
+
messages, _ = self.memory.get_context()
|
|
1937
|
+
|
|
1938
|
+
if not messages:
|
|
1939
|
+
status_message = (
|
|
1940
|
+
"No conversation context available to summarize."
|
|
1941
|
+
)
|
|
1942
|
+
result["status"] = status_message
|
|
1943
|
+
return result
|
|
1944
|
+
|
|
1945
|
+
# Convert messages to conversation text
|
|
1946
|
+
conversation_lines = []
|
|
1947
|
+
user_messages: List[str] = []
|
|
1948
|
+
for message in messages:
|
|
1949
|
+
role = message.get('role', 'unknown')
|
|
1950
|
+
content = message.get('content', '')
|
|
1951
|
+
|
|
1952
|
+
# Skip summary messages if include_summaries is False
|
|
1953
|
+
if not include_summaries and isinstance(content, str):
|
|
1954
|
+
# Check if this is a summary message by looking for marker
|
|
1955
|
+
if content.startswith('[CONTEXT_SUMMARY]'):
|
|
1956
|
+
continue
|
|
1957
|
+
|
|
1958
|
+
# Handle tool call messages (assistant calling tools)
|
|
1959
|
+
tool_calls = message.get('tool_calls')
|
|
1960
|
+
if tool_calls and isinstance(tool_calls, (list, tuple)):
|
|
1961
|
+
for tool_call in tool_calls:
|
|
1962
|
+
# Handle both dict and object formats
|
|
1963
|
+
if isinstance(tool_call, dict):
|
|
1964
|
+
func_name = tool_call.get('function', {}).get(
|
|
1965
|
+
'name', 'unknown_tool'
|
|
1966
|
+
)
|
|
1967
|
+
func_args_str = tool_call.get('function', {}).get(
|
|
1968
|
+
'arguments', '{}'
|
|
1969
|
+
)
|
|
1970
|
+
else:
|
|
1971
|
+
# Handle object format (Pydantic or similar)
|
|
1972
|
+
func_name = getattr(
|
|
1973
|
+
getattr(tool_call, 'function', None),
|
|
1974
|
+
'name',
|
|
1975
|
+
'unknown_tool',
|
|
1976
|
+
)
|
|
1977
|
+
func_args_str = getattr(
|
|
1978
|
+
getattr(tool_call, 'function', None),
|
|
1979
|
+
'arguments',
|
|
1980
|
+
'{}',
|
|
1981
|
+
)
|
|
1982
|
+
|
|
1983
|
+
# Parse and format arguments for readability
|
|
1984
|
+
try:
|
|
1985
|
+
import json
|
|
1986
|
+
|
|
1987
|
+
args_dict = json.loads(func_args_str)
|
|
1988
|
+
args_formatted = ', '.join(
|
|
1989
|
+
f"{k}={v}" for k, v in args_dict.items()
|
|
1990
|
+
)
|
|
1991
|
+
except (json.JSONDecodeError, ValueError, TypeError):
|
|
1992
|
+
args_formatted = func_args_str
|
|
1993
|
+
|
|
1994
|
+
conversation_lines.append(
|
|
1995
|
+
f"[TOOL CALL] {func_name}({args_formatted})"
|
|
1996
|
+
)
|
|
1997
|
+
|
|
1998
|
+
# Handle tool response messages
|
|
1999
|
+
elif role == 'tool':
|
|
2000
|
+
tool_name = message.get('name', 'unknown_tool')
|
|
2001
|
+
if not content:
|
|
2002
|
+
content = str(message.get('content', ''))
|
|
2003
|
+
conversation_lines.append(
|
|
2004
|
+
f"[TOOL RESULT] {tool_name} → {content}"
|
|
2005
|
+
)
|
|
2006
|
+
|
|
2007
|
+
# Handle regular content messages (user/assistant/system)
|
|
2008
|
+
elif content:
|
|
2009
|
+
content = str(content)
|
|
2010
|
+
if role == 'user':
|
|
2011
|
+
user_messages.append(content)
|
|
2012
|
+
conversation_lines.append(f"{role}: {content}")
|
|
2013
|
+
|
|
2014
|
+
conversation_text = "\n".join(conversation_lines).strip()
|
|
2015
|
+
|
|
2016
|
+
if not conversation_text:
|
|
2017
|
+
status_message = (
|
|
2018
|
+
"Conversation context is empty; skipping summary."
|
|
2019
|
+
)
|
|
2020
|
+
result["status"] = status_message
|
|
2021
|
+
return result
|
|
2022
|
+
|
|
2023
|
+
if self._context_summary_agent is None:
|
|
2024
|
+
self._context_summary_agent = ChatAgent(
|
|
2025
|
+
system_message=(
|
|
2026
|
+
"You are a helpful assistant that summarizes "
|
|
2027
|
+
"conversations"
|
|
2028
|
+
),
|
|
2029
|
+
model=self.model_backend,
|
|
2030
|
+
agent_id=f"{self.agent_id}_context_summarizer",
|
|
2031
|
+
summarize_threshold=None,
|
|
2032
|
+
)
|
|
2033
|
+
else:
|
|
2034
|
+
self._context_summary_agent.reset()
|
|
2035
|
+
|
|
2036
|
+
if summary_prompt:
|
|
2037
|
+
prompt_text = (
|
|
2038
|
+
f"{summary_prompt.rstrip()}\n\n"
|
|
2039
|
+
f"AGENT CONVERSATION TO BE SUMMARIZED:\n"
|
|
2040
|
+
f"{conversation_text}"
|
|
2041
|
+
)
|
|
2042
|
+
else:
|
|
2043
|
+
prompt_text = build_default_summary_prompt(conversation_text)
|
|
2044
|
+
|
|
2045
|
+
try:
|
|
2046
|
+
# Use structured output if response_format is provided
|
|
2047
|
+
if response_format:
|
|
2048
|
+
response = await self._context_summary_agent.astep(
|
|
2049
|
+
prompt_text, response_format=response_format
|
|
2050
|
+
)
|
|
2051
|
+
else:
|
|
2052
|
+
response = await self._context_summary_agent.astep(
|
|
2053
|
+
prompt_text
|
|
2054
|
+
)
|
|
2055
|
+
|
|
2056
|
+
# Handle streaming response
|
|
2057
|
+
if isinstance(response, AsyncStreamingChatAgentResponse):
|
|
2058
|
+
# Collect final response
|
|
2059
|
+
final_response = await response
|
|
2060
|
+
response = final_response
|
|
2061
|
+
|
|
2062
|
+
except Exception as step_exc:
|
|
2063
|
+
error_message = (
|
|
2064
|
+
f"Failed to generate summary using model: {step_exc}"
|
|
2065
|
+
)
|
|
2066
|
+
logger.error(error_message)
|
|
2067
|
+
result["status"] = error_message
|
|
2068
|
+
return result
|
|
2069
|
+
|
|
2070
|
+
if not response.msgs:
|
|
2071
|
+
status_message = (
|
|
2072
|
+
"Failed to generate summary from model response."
|
|
2073
|
+
)
|
|
2074
|
+
result["status"] = status_message
|
|
2075
|
+
return result
|
|
2076
|
+
|
|
2077
|
+
summary_content = response.msgs[-1].content.strip()
|
|
2078
|
+
if not summary_content:
|
|
2079
|
+
status_message = "Generated summary is empty."
|
|
2080
|
+
result["status"] = status_message
|
|
2081
|
+
return result
|
|
2082
|
+
|
|
2083
|
+
# handle structured output if response_format was provided
|
|
2084
|
+
structured_output = None
|
|
2085
|
+
if response_format and response.msgs[-1].parsed:
|
|
2086
|
+
structured_output = response.msgs[-1].parsed
|
|
2087
|
+
|
|
2088
|
+
# determine filename: use provided filename, or extract from
|
|
2089
|
+
# structured output, or generate timestamp
|
|
2090
|
+
if filename:
|
|
2091
|
+
base_filename = filename
|
|
2092
|
+
elif structured_output and hasattr(
|
|
2093
|
+
structured_output, 'task_title'
|
|
2094
|
+
):
|
|
2095
|
+
# use task_title from structured output for filename
|
|
2096
|
+
task_title = structured_output.task_title
|
|
2097
|
+
clean_title = ContextUtility.sanitize_workflow_filename(
|
|
2098
|
+
task_title
|
|
2099
|
+
)
|
|
2100
|
+
base_filename = (
|
|
2101
|
+
f"{clean_title}_workflow" if clean_title else "workflow"
|
|
2102
|
+
)
|
|
2103
|
+
else:
|
|
2104
|
+
base_filename = f"context_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}" # noqa: E501
|
|
2105
|
+
|
|
2106
|
+
base_filename = Path(base_filename).with_suffix("").name
|
|
2107
|
+
|
|
2108
|
+
metadata = context_util.get_session_metadata()
|
|
2109
|
+
metadata.update(
|
|
2110
|
+
{
|
|
2111
|
+
"agent_id": self.agent_id,
|
|
2112
|
+
"message_count": len(messages),
|
|
2113
|
+
}
|
|
2114
|
+
)
|
|
2115
|
+
|
|
2116
|
+
# convert structured output to custom markdown if present
|
|
2117
|
+
if structured_output:
|
|
2118
|
+
# convert structured output to custom markdown
|
|
2119
|
+
summary_content = context_util.structured_output_to_markdown(
|
|
2120
|
+
structured_data=structured_output, metadata=metadata
|
|
2121
|
+
)
|
|
2122
|
+
if add_user_messages:
|
|
2123
|
+
summary_content = self._append_user_messages_section(
|
|
2124
|
+
summary_content, user_messages
|
|
2125
|
+
)
|
|
2126
|
+
|
|
2127
|
+
# Save the markdown (either custom structured or default)
|
|
2128
|
+
save_status = context_util.save_markdown_file(
|
|
2129
|
+
base_filename,
|
|
2130
|
+
summary_content,
|
|
2131
|
+
title="Conversation Summary"
|
|
2132
|
+
if not structured_output
|
|
2133
|
+
else None,
|
|
2134
|
+
metadata=metadata if not structured_output else None,
|
|
2135
|
+
)
|
|
2136
|
+
|
|
2137
|
+
file_path = (
|
|
2138
|
+
context_util.get_working_directory() / f"{base_filename}.md"
|
|
2139
|
+
)
|
|
2140
|
+
|
|
2141
|
+
summary_content = (
|
|
2142
|
+
f"[CONTEXT_SUMMARY] The following is a summary of our "
|
|
2143
|
+
f"conversation from a previous session: {summary_content}"
|
|
2144
|
+
)
|
|
2145
|
+
|
|
2146
|
+
# Prepare result dictionary
|
|
2147
|
+
result_dict = {
|
|
2148
|
+
"summary": summary_content,
|
|
2149
|
+
"file_path": str(file_path),
|
|
2150
|
+
"status": save_status,
|
|
2151
|
+
"structured_summary": structured_output,
|
|
2152
|
+
}
|
|
2153
|
+
|
|
2154
|
+
result.update(result_dict)
|
|
2155
|
+
logger.info("Conversation summary saved to %s", file_path)
|
|
2156
|
+
return result
|
|
2157
|
+
|
|
2158
|
+
except Exception as exc:
|
|
2159
|
+
error_message = f"Failed to summarize conversation context: {exc}"
|
|
2160
|
+
logger.error(error_message)
|
|
2161
|
+
result["status"] = error_message
|
|
2162
|
+
return result
|
|
2163
|
+
|
|
1015
2164
|
def clear_memory(self) -> None:
|
|
1016
2165
|
r"""Clear the agent's memory and reset to initial state.
|
|
1017
2166
|
|
|
@@ -1019,8 +2168,16 @@ class ChatAgent(BaseAgent):
|
|
|
1019
2168
|
None
|
|
1020
2169
|
"""
|
|
1021
2170
|
self.memory.clear()
|
|
2171
|
+
|
|
1022
2172
|
if self.system_message is not None:
|
|
1023
|
-
self.
|
|
2173
|
+
self.memory.write_record(
|
|
2174
|
+
MemoryRecord(
|
|
2175
|
+
message=self.system_message,
|
|
2176
|
+
role_at_backend=OpenAIBackendRole.SYSTEM,
|
|
2177
|
+
timestamp=time.time_ns() / 1_000_000_000,
|
|
2178
|
+
agent_id=self.agent_id,
|
|
2179
|
+
)
|
|
2180
|
+
)
|
|
1024
2181
|
|
|
1025
2182
|
def _generate_system_message_for_output_language(
|
|
1026
2183
|
self,
|
|
@@ -1045,28 +2202,81 @@ class ChatAgent(BaseAgent):
|
|
|
1045
2202
|
content = self._original_system_message.content + language_prompt
|
|
1046
2203
|
return self._original_system_message.create_new_instance(content)
|
|
1047
2204
|
else:
|
|
1048
|
-
return BaseMessage.
|
|
1049
|
-
role_name="Assistant",
|
|
1050
|
-
content=language_prompt,
|
|
1051
|
-
)
|
|
2205
|
+
return BaseMessage.make_system_message(language_prompt)
|
|
1052
2206
|
|
|
1053
2207
|
def init_messages(self) -> None:
|
|
1054
2208
|
r"""Initializes the stored messages list with the current system
|
|
1055
2209
|
message.
|
|
1056
2210
|
"""
|
|
1057
|
-
|
|
2211
|
+
self._reset_summary_state()
|
|
2212
|
+
self.clear_memory()
|
|
1058
2213
|
|
|
1059
|
-
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
2214
|
+
def update_system_message(
|
|
2215
|
+
self,
|
|
2216
|
+
system_message: Union[BaseMessage, str],
|
|
2217
|
+
reset_memory: bool = True,
|
|
2218
|
+
) -> None:
|
|
2219
|
+
r"""Update the system message.
|
|
2220
|
+
It will reset conversation with new system message.
|
|
2221
|
+
|
|
2222
|
+
Args:
|
|
2223
|
+
system_message (Union[BaseMessage, str]): The new system message.
|
|
2224
|
+
Can be either a BaseMessage object or a string.
|
|
2225
|
+
If a string is provided, it will be converted
|
|
2226
|
+
into a BaseMessage object.
|
|
2227
|
+
reset_memory (bool):
|
|
2228
|
+
Whether to reinitialize conversation messages after updating
|
|
2229
|
+
the system message. Defaults to True.
|
|
2230
|
+
"""
|
|
2231
|
+
if system_message is None:
|
|
2232
|
+
raise ValueError("system_message is required and cannot be None. ")
|
|
2233
|
+
self._original_system_message = (
|
|
2234
|
+
BaseMessage.make_system_message(system_message)
|
|
2235
|
+
if isinstance(system_message, str)
|
|
2236
|
+
else system_message
|
|
2237
|
+
)
|
|
2238
|
+
self._system_message = (
|
|
2239
|
+
self._generate_system_message_for_output_language()
|
|
2240
|
+
)
|
|
2241
|
+
if reset_memory:
|
|
2242
|
+
self.init_messages()
|
|
2243
|
+
|
|
2244
|
+
def append_to_system_message(
|
|
2245
|
+
self, content: str, reset_memory: bool = True
|
|
2246
|
+
) -> None:
|
|
2247
|
+
"""Append additional context to existing system message.
|
|
2248
|
+
|
|
2249
|
+
Args:
|
|
2250
|
+
content (str): The additional system message.
|
|
2251
|
+
reset_memory (bool):
|
|
2252
|
+
Whether to reinitialize conversation messages after appending
|
|
2253
|
+
additional context. Defaults to True.
|
|
2254
|
+
"""
|
|
2255
|
+
original_content = (
|
|
2256
|
+
self._original_system_message.content
|
|
2257
|
+
if self._original_system_message
|
|
2258
|
+
else ""
|
|
2259
|
+
)
|
|
2260
|
+
new_system_message = original_content + '\n' + content
|
|
2261
|
+
self._original_system_message = BaseMessage.make_system_message(
|
|
2262
|
+
new_system_message
|
|
2263
|
+
)
|
|
2264
|
+
self._system_message = (
|
|
2265
|
+
self._generate_system_message_for_output_language()
|
|
2266
|
+
)
|
|
2267
|
+
if reset_memory:
|
|
2268
|
+
self.init_messages()
|
|
2269
|
+
|
|
2270
|
+
def reset_to_original_system_message(self) -> None:
|
|
2271
|
+
r"""Reset system message to original, removing any appended context.
|
|
2272
|
+
|
|
2273
|
+
This method reverts the agent's system message back to its original
|
|
2274
|
+
state, removing any workflow context or other modifications that may
|
|
2275
|
+
have been appended. Useful for resetting agent state in multi-turn
|
|
2276
|
+
scenarios.
|
|
2277
|
+
"""
|
|
2278
|
+
self._system_message = self._original_system_message
|
|
2279
|
+
self.init_messages()
|
|
1070
2280
|
|
|
1071
2281
|
def record_message(self, message: BaseMessage) -> None:
|
|
1072
2282
|
r"""Records the externally provided message into the agent memory as if
|
|
@@ -1129,7 +2339,7 @@ class ChatAgent(BaseAgent):
|
|
|
1129
2339
|
|
|
1130
2340
|
# Create a prompt based on the schema
|
|
1131
2341
|
format_instruction = (
|
|
1132
|
-
"\n\nPlease respond in the following JSON format:\n
|
|
2342
|
+
"\n\nPlease respond in the following JSON format:\n{\n"
|
|
1133
2343
|
)
|
|
1134
2344
|
|
|
1135
2345
|
properties = schema.get("properties", {})
|
|
@@ -1216,6 +2426,33 @@ class ChatAgent(BaseAgent):
|
|
|
1216
2426
|
# and True to indicate we used prompt formatting
|
|
1217
2427
|
return modified_message, None, True
|
|
1218
2428
|
|
|
2429
|
+
def _is_called_from_registered_toolkit(self) -> bool:
|
|
2430
|
+
r"""Check if current step/astep call originates from a
|
|
2431
|
+
RegisteredAgentToolkit.
|
|
2432
|
+
|
|
2433
|
+
This method uses stack inspection to detect if the current call
|
|
2434
|
+
is originating from a toolkit that inherits from
|
|
2435
|
+
RegisteredAgentToolkit. When detected, tools should be disabled to
|
|
2436
|
+
prevent recursive calls.
|
|
2437
|
+
|
|
2438
|
+
Returns:
|
|
2439
|
+
bool: True if called from a RegisteredAgentToolkit, False otherwise
|
|
2440
|
+
"""
|
|
2441
|
+
from camel.toolkits.base import RegisteredAgentToolkit
|
|
2442
|
+
|
|
2443
|
+
try:
|
|
2444
|
+
for frame_info in inspect.stack():
|
|
2445
|
+
frame_locals = frame_info.frame.f_locals
|
|
2446
|
+
if 'self' in frame_locals:
|
|
2447
|
+
caller_self = frame_locals['self']
|
|
2448
|
+
if isinstance(caller_self, RegisteredAgentToolkit):
|
|
2449
|
+
return True
|
|
2450
|
+
|
|
2451
|
+
except Exception:
|
|
2452
|
+
return False
|
|
2453
|
+
|
|
2454
|
+
return False
|
|
2455
|
+
|
|
1219
2456
|
def _apply_prompt_based_parsing(
|
|
1220
2457
|
self,
|
|
1221
2458
|
response: ModelResponse,
|
|
@@ -1232,7 +2469,6 @@ class ChatAgent(BaseAgent):
|
|
|
1232
2469
|
try:
|
|
1233
2470
|
# Try to extract JSON from the response content
|
|
1234
2471
|
import json
|
|
1235
|
-
import re
|
|
1236
2472
|
|
|
1237
2473
|
from pydantic import ValidationError
|
|
1238
2474
|
|
|
@@ -1271,8 +2507,7 @@ class ChatAgent(BaseAgent):
|
|
|
1271
2507
|
|
|
1272
2508
|
if not message.parsed:
|
|
1273
2509
|
logger.warning(
|
|
1274
|
-
f"Failed to parse JSON from response: "
|
|
1275
|
-
f"{content}"
|
|
2510
|
+
f"Failed to parse JSON from response: {content}"
|
|
1276
2511
|
)
|
|
1277
2512
|
|
|
1278
2513
|
except Exception as e:
|
|
@@ -1365,6 +2600,9 @@ class ChatAgent(BaseAgent):
|
|
|
1365
2600
|
a StreamingChatAgentResponse that behaves like
|
|
1366
2601
|
ChatAgentResponse but can also be iterated for
|
|
1367
2602
|
streaming updates.
|
|
2603
|
+
|
|
2604
|
+
Raises:
|
|
2605
|
+
TimeoutError: If the step operation exceeds the configured timeout.
|
|
1368
2606
|
"""
|
|
1369
2607
|
|
|
1370
2608
|
stream = self.model_backend.model_config_dict.get("stream", False)
|
|
@@ -1374,6 +2612,30 @@ class ChatAgent(BaseAgent):
|
|
|
1374
2612
|
generator = self._stream(input_message, response_format)
|
|
1375
2613
|
return StreamingChatAgentResponse(generator)
|
|
1376
2614
|
|
|
2615
|
+
# Execute with timeout if configured
|
|
2616
|
+
if self.step_timeout is not None:
|
|
2617
|
+
with concurrent.futures.ThreadPoolExecutor(
|
|
2618
|
+
max_workers=1
|
|
2619
|
+
) as executor:
|
|
2620
|
+
future = executor.submit(
|
|
2621
|
+
self._step_impl, input_message, response_format
|
|
2622
|
+
)
|
|
2623
|
+
try:
|
|
2624
|
+
return future.result(timeout=self.step_timeout)
|
|
2625
|
+
except concurrent.futures.TimeoutError:
|
|
2626
|
+
future.cancel()
|
|
2627
|
+
raise TimeoutError(
|
|
2628
|
+
f"Step timed out after {self.step_timeout}s"
|
|
2629
|
+
)
|
|
2630
|
+
else:
|
|
2631
|
+
return self._step_impl(input_message, response_format)
|
|
2632
|
+
|
|
2633
|
+
def _step_impl(
|
|
2634
|
+
self,
|
|
2635
|
+
input_message: Union[BaseMessage, str],
|
|
2636
|
+
response_format: Optional[Type[BaseModel]] = None,
|
|
2637
|
+
) -> ChatAgentResponse:
|
|
2638
|
+
r"""Implementation of non-streaming step logic."""
|
|
1377
2639
|
# Set Langfuse session_id using agent_id for trace grouping
|
|
1378
2640
|
try:
|
|
1379
2641
|
from camel.utils.langfuse import set_current_agent_session_id
|
|
@@ -1382,6 +2644,10 @@ class ChatAgent(BaseAgent):
|
|
|
1382
2644
|
except ImportError:
|
|
1383
2645
|
pass # Langfuse not available
|
|
1384
2646
|
|
|
2647
|
+
# Check if this call is from a RegisteredAgentToolkit to prevent tool
|
|
2648
|
+
# use
|
|
2649
|
+
disable_tools = self._is_called_from_registered_toolkit()
|
|
2650
|
+
|
|
1385
2651
|
# Handle response format compatibility with non-strict tools
|
|
1386
2652
|
original_response_format = response_format
|
|
1387
2653
|
input_message, response_format, used_prompt_formatting = (
|
|
@@ -1390,48 +2656,155 @@ class ChatAgent(BaseAgent):
|
|
|
1390
2656
|
)
|
|
1391
2657
|
)
|
|
1392
2658
|
|
|
1393
|
-
# Convert input message to BaseMessage if necessary
|
|
1394
|
-
if isinstance(input_message, str):
|
|
1395
|
-
input_message = BaseMessage.make_user_message(
|
|
1396
|
-
role_name="User", content=input_message
|
|
1397
|
-
)
|
|
2659
|
+
# Convert input message to BaseMessage if necessary
|
|
2660
|
+
if isinstance(input_message, str):
|
|
2661
|
+
input_message = BaseMessage.make_user_message(
|
|
2662
|
+
role_name="User", content=input_message
|
|
2663
|
+
)
|
|
2664
|
+
|
|
2665
|
+
# Add user input to memory
|
|
2666
|
+
self.update_memory(input_message, OpenAIBackendRole.USER)
|
|
2667
|
+
|
|
2668
|
+
tool_call_records: List[ToolCallingRecord] = []
|
|
2669
|
+
external_tool_call_requests: Optional[List[ToolCallRequest]] = None
|
|
2670
|
+
|
|
2671
|
+
accumulated_context_tokens = (
|
|
2672
|
+
0 # This tracks cumulative context tokens, not API usage tokens
|
|
2673
|
+
)
|
|
2674
|
+
|
|
2675
|
+
# Initialize token usage tracker
|
|
2676
|
+
step_token_usage = self._create_token_usage_tracker()
|
|
2677
|
+
iteration_count: int = 0
|
|
2678
|
+
prev_num_openai_messages: int = 0
|
|
2679
|
+
|
|
2680
|
+
while True:
|
|
2681
|
+
if self.pause_event is not None and not self.pause_event.is_set():
|
|
2682
|
+
# Use efficient blocking wait for threading.Event
|
|
2683
|
+
if isinstance(self.pause_event, threading.Event):
|
|
2684
|
+
self.pause_event.wait()
|
|
2685
|
+
else:
|
|
2686
|
+
# Fallback for asyncio.Event in sync context
|
|
2687
|
+
while not self.pause_event.is_set():
|
|
2688
|
+
time.sleep(0.001)
|
|
2689
|
+
|
|
2690
|
+
try:
|
|
2691
|
+
openai_messages, num_tokens = self.memory.get_context()
|
|
2692
|
+
if self.summarize_threshold is not None:
|
|
2693
|
+
threshold = self._calculate_next_summary_threshold()
|
|
2694
|
+
summary_token_count = self._summary_token_count
|
|
2695
|
+
token_limit = self.model_backend.token_limit
|
|
2696
|
+
|
|
2697
|
+
if num_tokens <= token_limit:
|
|
2698
|
+
if (
|
|
2699
|
+
summary_token_count
|
|
2700
|
+
> token_limit * self.summary_window_ratio
|
|
2701
|
+
):
|
|
2702
|
+
logger.info(
|
|
2703
|
+
f"Summary tokens ({summary_token_count}) "
|
|
2704
|
+
f"exceed limit, full compression."
|
|
2705
|
+
)
|
|
2706
|
+
# Summarize everything (including summaries)
|
|
2707
|
+
summary = self.summarize(include_summaries=True)
|
|
2708
|
+
self._update_memory_with_summary(
|
|
2709
|
+
summary.get("summary", ""),
|
|
2710
|
+
include_summaries=True,
|
|
2711
|
+
)
|
|
2712
|
+
elif num_tokens > threshold:
|
|
2713
|
+
logger.info(
|
|
2714
|
+
f"Token count ({num_tokens}) exceed threshold "
|
|
2715
|
+
f"({threshold}). Triggering summarization."
|
|
2716
|
+
)
|
|
2717
|
+
# Only summarize non-summary content
|
|
2718
|
+
summary = self.summarize(include_summaries=False)
|
|
2719
|
+
self._update_memory_with_summary(
|
|
2720
|
+
summary.get("summary", ""),
|
|
2721
|
+
include_summaries=False,
|
|
2722
|
+
)
|
|
2723
|
+
accumulated_context_tokens += num_tokens
|
|
2724
|
+
except RuntimeError as e:
|
|
2725
|
+
return self._step_terminate(
|
|
2726
|
+
e.args[1], tool_call_records, "max_tokens_exceeded"
|
|
2727
|
+
)
|
|
2728
|
+
# Get response from model backend with token limit error handling
|
|
2729
|
+
try:
|
|
2730
|
+
response = self._get_model_response(
|
|
2731
|
+
openai_messages,
|
|
2732
|
+
num_tokens=num_tokens,
|
|
2733
|
+
current_iteration=iteration_count,
|
|
2734
|
+
response_format=response_format,
|
|
2735
|
+
tool_schemas=[]
|
|
2736
|
+
if disable_tools
|
|
2737
|
+
else self._get_full_tool_schemas(),
|
|
2738
|
+
prev_num_openai_messages=prev_num_openai_messages,
|
|
2739
|
+
)
|
|
2740
|
+
except Exception as exc:
|
|
2741
|
+
logger.exception("Model error: %s", exc)
|
|
2742
|
+
|
|
2743
|
+
if self._is_token_limit_error(exc):
|
|
2744
|
+
tool_signature = self._last_tool_call_signature
|
|
2745
|
+
if (
|
|
2746
|
+
tool_signature is not None
|
|
2747
|
+
and tool_signature
|
|
2748
|
+
== self._last_token_limit_tool_signature
|
|
2749
|
+
):
|
|
2750
|
+
description = self._describe_tool_call(
|
|
2751
|
+
self._last_tool_call_record
|
|
2752
|
+
)
|
|
2753
|
+
repeated_msg = (
|
|
2754
|
+
"Context exceeded again by the same tool call."
|
|
2755
|
+
)
|
|
2756
|
+
if description:
|
|
2757
|
+
repeated_msg += f" {description}"
|
|
2758
|
+
raise RuntimeError(repeated_msg) from exc
|
|
2759
|
+
|
|
2760
|
+
user_message_count = sum(
|
|
2761
|
+
1
|
|
2762
|
+
for msg in openai_messages
|
|
2763
|
+
if getattr(msg, "role", None) == "user"
|
|
2764
|
+
)
|
|
2765
|
+
if (
|
|
2766
|
+
user_message_count == 1
|
|
2767
|
+
and getattr(openai_messages[-1], "role", None)
|
|
2768
|
+
== "user"
|
|
2769
|
+
):
|
|
2770
|
+
raise RuntimeError(
|
|
2771
|
+
"The provided user input alone exceeds the "
|
|
2772
|
+
"context window. Please shorten the input."
|
|
2773
|
+
) from exc
|
|
2774
|
+
|
|
2775
|
+
logger.warning(
|
|
2776
|
+
"Token limit exceeded error detected. "
|
|
2777
|
+
"Summarizing context."
|
|
2778
|
+
)
|
|
2779
|
+
|
|
2780
|
+
recent_records: List[ContextRecord]
|
|
2781
|
+
try:
|
|
2782
|
+
recent_records = self.memory.retrieve()
|
|
2783
|
+
except Exception: # pragma: no cover - defensive guard
|
|
2784
|
+
recent_records = []
|
|
1398
2785
|
|
|
1399
|
-
|
|
1400
|
-
|
|
2786
|
+
indices_to_remove = (
|
|
2787
|
+
self._find_indices_to_remove_for_last_tool_pair(
|
|
2788
|
+
recent_records
|
|
2789
|
+
)
|
|
2790
|
+
)
|
|
2791
|
+
self.memory.remove_records_by_indices(indices_to_remove)
|
|
1401
2792
|
|
|
1402
|
-
|
|
1403
|
-
|
|
2793
|
+
summary = self.summarize(include_summaries=False)
|
|
2794
|
+
tool_notice = self._format_tool_limit_notice()
|
|
2795
|
+
summary_messages = summary.get("summary", "")
|
|
1404
2796
|
|
|
1405
|
-
|
|
1406
|
-
|
|
1407
|
-
)
|
|
2797
|
+
if tool_notice:
|
|
2798
|
+
summary_messages += "\n\n" + tool_notice
|
|
1408
2799
|
|
|
1409
|
-
|
|
1410
|
-
|
|
1411
|
-
|
|
1412
|
-
|
|
2800
|
+
self._update_memory_with_summary(
|
|
2801
|
+
summary_messages, include_summaries=False
|
|
2802
|
+
)
|
|
2803
|
+
self._last_token_limit_tool_signature = tool_signature
|
|
2804
|
+
return self._step_impl(input_message, response_format)
|
|
1413
2805
|
|
|
1414
|
-
|
|
1415
|
-
if self.pause_event is not None and not self.pause_event.is_set():
|
|
1416
|
-
while not self.pause_event.is_set():
|
|
1417
|
-
time.sleep(0.001)
|
|
2806
|
+
raise
|
|
1418
2807
|
|
|
1419
|
-
try:
|
|
1420
|
-
openai_messages, num_tokens = self.memory.get_context()
|
|
1421
|
-
accumulated_context_tokens += num_tokens
|
|
1422
|
-
except RuntimeError as e:
|
|
1423
|
-
return self._step_terminate(
|
|
1424
|
-
e.args[1], tool_call_records, "max_tokens_exceeded"
|
|
1425
|
-
)
|
|
1426
|
-
# Get response from model backend
|
|
1427
|
-
response = self._get_model_response(
|
|
1428
|
-
openai_messages,
|
|
1429
|
-
num_tokens=num_tokens,
|
|
1430
|
-
current_iteration=iteration_count,
|
|
1431
|
-
response_format=response_format,
|
|
1432
|
-
tool_schemas=self._get_full_tool_schemas(),
|
|
1433
|
-
prev_num_openai_messages=prev_num_openai_messages,
|
|
1434
|
-
)
|
|
1435
2808
|
prev_num_openai_messages = len(openai_messages)
|
|
1436
2809
|
iteration_count += 1
|
|
1437
2810
|
|
|
@@ -1444,7 +2817,7 @@ class ChatAgent(BaseAgent):
|
|
|
1444
2817
|
if self.stop_event and self.stop_event.is_set():
|
|
1445
2818
|
# Use the _step_terminate to terminate the agent with reason
|
|
1446
2819
|
logger.info(
|
|
1447
|
-
f"Termination triggered at iteration
|
|
2820
|
+
f"Termination triggered at iteration {iteration_count}"
|
|
1448
2821
|
)
|
|
1449
2822
|
return self._step_terminate(
|
|
1450
2823
|
accumulated_context_tokens,
|
|
@@ -1467,8 +2840,11 @@ class ChatAgent(BaseAgent):
|
|
|
1467
2840
|
self.pause_event is not None
|
|
1468
2841
|
and not self.pause_event.is_set()
|
|
1469
2842
|
):
|
|
1470
|
-
|
|
1471
|
-
|
|
2843
|
+
if isinstance(self.pause_event, threading.Event):
|
|
2844
|
+
self.pause_event.wait()
|
|
2845
|
+
else:
|
|
2846
|
+
while not self.pause_event.is_set():
|
|
2847
|
+
time.sleep(0.001)
|
|
1472
2848
|
result = self._execute_tool(tool_call_request)
|
|
1473
2849
|
tool_call_records.append(result)
|
|
1474
2850
|
|
|
@@ -1544,6 +2920,10 @@ class ChatAgent(BaseAgent):
|
|
|
1544
2920
|
True, returns an AsyncStreamingChatAgentResponse that can be
|
|
1545
2921
|
awaited for the final result or async iterated for streaming
|
|
1546
2922
|
updates.
|
|
2923
|
+
|
|
2924
|
+
Raises:
|
|
2925
|
+
asyncio.TimeoutError: If the step operation exceeds the configured
|
|
2926
|
+
timeout.
|
|
1547
2927
|
"""
|
|
1548
2928
|
|
|
1549
2929
|
try:
|
|
@@ -1559,9 +2939,22 @@ class ChatAgent(BaseAgent):
|
|
|
1559
2939
|
async_generator = self._astream(input_message, response_format)
|
|
1560
2940
|
return AsyncStreamingChatAgentResponse(async_generator)
|
|
1561
2941
|
else:
|
|
1562
|
-
|
|
1563
|
-
|
|
1564
|
-
|
|
2942
|
+
if self.step_timeout is not None:
|
|
2943
|
+
try:
|
|
2944
|
+
return await asyncio.wait_for(
|
|
2945
|
+
self._astep_non_streaming_task(
|
|
2946
|
+
input_message, response_format
|
|
2947
|
+
),
|
|
2948
|
+
timeout=self.step_timeout,
|
|
2949
|
+
)
|
|
2950
|
+
except asyncio.TimeoutError:
|
|
2951
|
+
raise asyncio.TimeoutError(
|
|
2952
|
+
f"Async step timed out after {self.step_timeout}s"
|
|
2953
|
+
)
|
|
2954
|
+
else:
|
|
2955
|
+
return await self._astep_non_streaming_task(
|
|
2956
|
+
input_message, response_format
|
|
2957
|
+
)
|
|
1565
2958
|
|
|
1566
2959
|
async def _astep_non_streaming_task(
|
|
1567
2960
|
self,
|
|
@@ -1577,6 +2970,10 @@ class ChatAgent(BaseAgent):
|
|
|
1577
2970
|
except ImportError:
|
|
1578
2971
|
pass # Langfuse not available
|
|
1579
2972
|
|
|
2973
|
+
# Check if this call is from a RegisteredAgentToolkit to prevent tool
|
|
2974
|
+
# use
|
|
2975
|
+
disable_tools = self._is_called_from_registered_toolkit()
|
|
2976
|
+
|
|
1580
2977
|
# Handle response format compatibility with non-strict tools
|
|
1581
2978
|
original_response_format = response_format
|
|
1582
2979
|
input_message, response_format, used_prompt_formatting = (
|
|
@@ -1602,25 +2999,139 @@ class ChatAgent(BaseAgent):
|
|
|
1602
2999
|
step_token_usage = self._create_token_usage_tracker()
|
|
1603
3000
|
iteration_count: int = 0
|
|
1604
3001
|
prev_num_openai_messages: int = 0
|
|
3002
|
+
|
|
1605
3003
|
while True:
|
|
1606
3004
|
if self.pause_event is not None and not self.pause_event.is_set():
|
|
1607
|
-
|
|
3005
|
+
if isinstance(self.pause_event, asyncio.Event):
|
|
3006
|
+
await self.pause_event.wait()
|
|
3007
|
+
elif isinstance(self.pause_event, threading.Event):
|
|
3008
|
+
# For threading.Event in async context, run in executor
|
|
3009
|
+
loop = asyncio.get_event_loop()
|
|
3010
|
+
await loop.run_in_executor(None, self.pause_event.wait)
|
|
1608
3011
|
try:
|
|
1609
3012
|
openai_messages, num_tokens = self.memory.get_context()
|
|
3013
|
+
if self.summarize_threshold is not None:
|
|
3014
|
+
threshold = self._calculate_next_summary_threshold()
|
|
3015
|
+
summary_token_count = self._summary_token_count
|
|
3016
|
+
token_limit = self.model_backend.token_limit
|
|
3017
|
+
|
|
3018
|
+
if num_tokens <= token_limit:
|
|
3019
|
+
if (
|
|
3020
|
+
summary_token_count
|
|
3021
|
+
> token_limit * self.summary_window_ratio
|
|
3022
|
+
):
|
|
3023
|
+
logger.info(
|
|
3024
|
+
f"Summary tokens ({summary_token_count}) "
|
|
3025
|
+
f"exceed limit, full compression."
|
|
3026
|
+
)
|
|
3027
|
+
# Summarize everything (including summaries)
|
|
3028
|
+
summary = await self.asummarize(
|
|
3029
|
+
include_summaries=True
|
|
3030
|
+
)
|
|
3031
|
+
self._update_memory_with_summary(
|
|
3032
|
+
summary.get("summary", ""),
|
|
3033
|
+
include_summaries=True,
|
|
3034
|
+
)
|
|
3035
|
+
elif num_tokens > threshold:
|
|
3036
|
+
logger.info(
|
|
3037
|
+
f"Token count ({num_tokens}) exceed threshold "
|
|
3038
|
+
"({threshold}). Triggering summarization."
|
|
3039
|
+
)
|
|
3040
|
+
# Only summarize non-summary content
|
|
3041
|
+
summary = await self.asummarize(
|
|
3042
|
+
include_summaries=False
|
|
3043
|
+
)
|
|
3044
|
+
self._update_memory_with_summary(
|
|
3045
|
+
summary.get("summary", ""),
|
|
3046
|
+
include_summaries=False,
|
|
3047
|
+
)
|
|
1610
3048
|
accumulated_context_tokens += num_tokens
|
|
1611
3049
|
except RuntimeError as e:
|
|
1612
3050
|
return self._step_terminate(
|
|
1613
3051
|
e.args[1], tool_call_records, "max_tokens_exceeded"
|
|
1614
3052
|
)
|
|
3053
|
+
# Get response from model backend with token limit error handling
|
|
3054
|
+
try:
|
|
3055
|
+
response = await self._aget_model_response(
|
|
3056
|
+
openai_messages,
|
|
3057
|
+
num_tokens=num_tokens,
|
|
3058
|
+
current_iteration=iteration_count,
|
|
3059
|
+
response_format=response_format,
|
|
3060
|
+
tool_schemas=[]
|
|
3061
|
+
if disable_tools
|
|
3062
|
+
else self._get_full_tool_schemas(),
|
|
3063
|
+
prev_num_openai_messages=prev_num_openai_messages,
|
|
3064
|
+
)
|
|
3065
|
+
except Exception as exc:
|
|
3066
|
+
logger.exception("Model error: %s", exc)
|
|
3067
|
+
|
|
3068
|
+
if self._is_token_limit_error(exc):
|
|
3069
|
+
tool_signature = self._last_tool_call_signature
|
|
3070
|
+
if (
|
|
3071
|
+
tool_signature is not None
|
|
3072
|
+
and tool_signature
|
|
3073
|
+
== self._last_token_limit_tool_signature
|
|
3074
|
+
):
|
|
3075
|
+
description = self._describe_tool_call(
|
|
3076
|
+
self._last_tool_call_record
|
|
3077
|
+
)
|
|
3078
|
+
repeated_msg = (
|
|
3079
|
+
"Context exceeded again by the same tool call."
|
|
3080
|
+
)
|
|
3081
|
+
if description:
|
|
3082
|
+
repeated_msg += f" {description}"
|
|
3083
|
+
raise RuntimeError(repeated_msg) from exc
|
|
3084
|
+
|
|
3085
|
+
user_message_count = sum(
|
|
3086
|
+
1
|
|
3087
|
+
for msg in openai_messages
|
|
3088
|
+
if getattr(msg, "role", None) == "user"
|
|
3089
|
+
)
|
|
3090
|
+
if (
|
|
3091
|
+
user_message_count == 1
|
|
3092
|
+
and getattr(openai_messages[-1], "role", None)
|
|
3093
|
+
== "user"
|
|
3094
|
+
):
|
|
3095
|
+
raise RuntimeError(
|
|
3096
|
+
"The provided user input alone exceeds the"
|
|
3097
|
+
"context window. Please shorten the input."
|
|
3098
|
+
) from exc
|
|
3099
|
+
|
|
3100
|
+
logger.warning(
|
|
3101
|
+
"Token limit exceeded error detected. "
|
|
3102
|
+
"Summarizing context."
|
|
3103
|
+
)
|
|
3104
|
+
|
|
3105
|
+
recent_records: List[ContextRecord]
|
|
3106
|
+
try:
|
|
3107
|
+
recent_records = self.memory.retrieve()
|
|
3108
|
+
except Exception: # pragma: no cover - defensive guard
|
|
3109
|
+
recent_records = []
|
|
3110
|
+
|
|
3111
|
+
indices_to_remove = (
|
|
3112
|
+
self._find_indices_to_remove_for_last_tool_pair(
|
|
3113
|
+
recent_records
|
|
3114
|
+
)
|
|
3115
|
+
)
|
|
3116
|
+
self.memory.remove_records_by_indices(indices_to_remove)
|
|
3117
|
+
|
|
3118
|
+
summary = await self.asummarize()
|
|
3119
|
+
|
|
3120
|
+
tool_notice = self._format_tool_limit_notice()
|
|
3121
|
+
summary_messages = summary.get("summary", "")
|
|
3122
|
+
|
|
3123
|
+
if tool_notice:
|
|
3124
|
+
summary_messages += "\n\n" + tool_notice
|
|
3125
|
+
self._update_memory_with_summary(
|
|
3126
|
+
summary_messages, include_summaries=False
|
|
3127
|
+
)
|
|
3128
|
+
self._last_token_limit_tool_signature = tool_signature
|
|
3129
|
+
return await self._astep_non_streaming_task(
|
|
3130
|
+
input_message, response_format
|
|
3131
|
+
)
|
|
3132
|
+
|
|
3133
|
+
raise
|
|
1615
3134
|
|
|
1616
|
-
response = await self._aget_model_response(
|
|
1617
|
-
openai_messages,
|
|
1618
|
-
num_tokens=num_tokens,
|
|
1619
|
-
current_iteration=iteration_count,
|
|
1620
|
-
response_format=response_format,
|
|
1621
|
-
tool_schemas=self._get_full_tool_schemas(),
|
|
1622
|
-
prev_num_openai_messages=prev_num_openai_messages,
|
|
1623
|
-
)
|
|
1624
3135
|
prev_num_openai_messages = len(openai_messages)
|
|
1625
3136
|
iteration_count += 1
|
|
1626
3137
|
|
|
@@ -1633,7 +3144,7 @@ class ChatAgent(BaseAgent):
|
|
|
1633
3144
|
if self.stop_event and self.stop_event.is_set():
|
|
1634
3145
|
# Use the _step_terminate to terminate the agent with reason
|
|
1635
3146
|
logger.info(
|
|
1636
|
-
f"Termination triggered at iteration
|
|
3147
|
+
f"Termination triggered at iteration {iteration_count}"
|
|
1637
3148
|
)
|
|
1638
3149
|
return self._step_terminate(
|
|
1639
3150
|
accumulated_context_tokens,
|
|
@@ -1656,7 +3167,13 @@ class ChatAgent(BaseAgent):
|
|
|
1656
3167
|
self.pause_event is not None
|
|
1657
3168
|
and not self.pause_event.is_set()
|
|
1658
3169
|
):
|
|
1659
|
-
|
|
3170
|
+
if isinstance(self.pause_event, asyncio.Event):
|
|
3171
|
+
await self.pause_event.wait()
|
|
3172
|
+
elif isinstance(self.pause_event, threading.Event):
|
|
3173
|
+
loop = asyncio.get_event_loop()
|
|
3174
|
+
await loop.run_in_executor(
|
|
3175
|
+
None, self.pause_event.wait
|
|
3176
|
+
)
|
|
1660
3177
|
tool_call_record = await self._aexecute_tool(
|
|
1661
3178
|
tool_call_request
|
|
1662
3179
|
)
|
|
@@ -1691,6 +3208,8 @@ class ChatAgent(BaseAgent):
|
|
|
1691
3208
|
if self.prune_tool_calls_from_memory and tool_call_records:
|
|
1692
3209
|
self.memory.clean_tool_calls()
|
|
1693
3210
|
|
|
3211
|
+
self._last_token_limit_user_signature = None
|
|
3212
|
+
|
|
1694
3213
|
return self._convert_to_chatagent_response(
|
|
1695
3214
|
response,
|
|
1696
3215
|
tool_call_records,
|
|
@@ -1776,64 +3295,62 @@ class ChatAgent(BaseAgent):
|
|
|
1776
3295
|
tool_schemas: Optional[List[Dict[str, Any]]] = None,
|
|
1777
3296
|
prev_num_openai_messages: int = 0,
|
|
1778
3297
|
) -> ModelResponse:
|
|
1779
|
-
r"""Internal function for agent step model response.
|
|
1780
|
-
|
|
1781
|
-
openai_messages (List[OpenAIMessage]): The OpenAI
|
|
1782
|
-
messages to process.
|
|
1783
|
-
num_tokens (int): The number of tokens in the context.
|
|
1784
|
-
current_iteration (int): The current iteration of the step.
|
|
1785
|
-
response_format (Optional[Type[BaseModel]]): The response
|
|
1786
|
-
format to use.
|
|
1787
|
-
tool_schemas (Optional[List[Dict[str, Any]]]): The tool
|
|
1788
|
-
schemas to use.
|
|
1789
|
-
prev_num_openai_messages (int): The number of openai messages
|
|
1790
|
-
logged in the previous iteration.
|
|
1791
|
-
|
|
1792
|
-
Returns:
|
|
1793
|
-
ModelResponse: The model response.
|
|
1794
|
-
"""
|
|
3298
|
+
r"""Internal function for agent step model response."""
|
|
3299
|
+
last_error = None
|
|
1795
3300
|
|
|
1796
|
-
|
|
1797
|
-
|
|
1798
|
-
|
|
1799
|
-
|
|
1800
|
-
|
|
1801
|
-
|
|
1802
|
-
|
|
1803
|
-
|
|
1804
|
-
|
|
1805
|
-
|
|
1806
|
-
|
|
1807
|
-
|
|
1808
|
-
|
|
1809
|
-
|
|
1810
|
-
|
|
1811
|
-
|
|
1812
|
-
|
|
1813
|
-
|
|
1814
|
-
|
|
1815
|
-
|
|
3301
|
+
for attempt in range(self.retry_attempts):
|
|
3302
|
+
try:
|
|
3303
|
+
response = self.model_backend.run(
|
|
3304
|
+
openai_messages, response_format, tool_schemas or None
|
|
3305
|
+
)
|
|
3306
|
+
if response:
|
|
3307
|
+
break
|
|
3308
|
+
except RateLimitError as e:
|
|
3309
|
+
if self._is_token_limit_error(e):
|
|
3310
|
+
raise
|
|
3311
|
+
last_error = e
|
|
3312
|
+
if attempt < self.retry_attempts - 1:
|
|
3313
|
+
delay = min(self.retry_delay * (2**attempt), 60.0)
|
|
3314
|
+
delay = random.uniform(0, delay) # Add jitter
|
|
3315
|
+
logger.warning(
|
|
3316
|
+
f"Rate limit hit (attempt {attempt + 1}"
|
|
3317
|
+
f"/{self.retry_attempts}). Retrying in {delay:.1f}s"
|
|
3318
|
+
)
|
|
3319
|
+
time.sleep(delay)
|
|
3320
|
+
else:
|
|
3321
|
+
logger.error(
|
|
3322
|
+
f"Rate limit exhausted after "
|
|
3323
|
+
f"{self.retry_attempts} attempts"
|
|
3324
|
+
)
|
|
3325
|
+
except Exception:
|
|
3326
|
+
logger.error(
|
|
3327
|
+
f"Model error: {self.model_backend.model_type}",
|
|
3328
|
+
)
|
|
3329
|
+
raise
|
|
3330
|
+
else:
|
|
3331
|
+
# Loop completed without success
|
|
1816
3332
|
raise ModelProcessingError(
|
|
1817
|
-
f"Unable to process messages:
|
|
1818
|
-
f"
|
|
3333
|
+
f"Unable to process messages: "
|
|
3334
|
+
f"{str(last_error) if last_error else 'Unknown error'}"
|
|
1819
3335
|
)
|
|
1820
3336
|
|
|
1821
|
-
|
|
3337
|
+
# Log success
|
|
3338
|
+
sanitized = self._sanitize_messages_for_logging(
|
|
1822
3339
|
openai_messages, prev_num_openai_messages
|
|
1823
3340
|
)
|
|
1824
3341
|
logger.info(
|
|
1825
|
-
f"Model {self.model_backend.model_type}
|
|
1826
|
-
f"
|
|
1827
|
-
f"iteration {current_iteration}, "
|
|
1828
|
-
f"processed these messages: {sanitized_messages}"
|
|
3342
|
+
f"Model {self.model_backend.model_type} "
|
|
3343
|
+
f"[{current_iteration}]: {sanitized}"
|
|
1829
3344
|
)
|
|
3345
|
+
|
|
1830
3346
|
if not isinstance(response, ChatCompletion):
|
|
1831
3347
|
raise TypeError(
|
|
1832
|
-
f"Expected
|
|
1833
|
-
f"got {type(response).__name__} instead."
|
|
3348
|
+
f"Expected ChatCompletion, got {type(response).__name__}"
|
|
1834
3349
|
)
|
|
3350
|
+
|
|
1835
3351
|
return self._handle_batch_response(response)
|
|
1836
3352
|
|
|
3353
|
+
@observe()
|
|
1837
3354
|
async def _aget_model_response(
|
|
1838
3355
|
self,
|
|
1839
3356
|
openai_messages: List[OpenAIMessage],
|
|
@@ -1843,62 +3360,61 @@ class ChatAgent(BaseAgent):
|
|
|
1843
3360
|
tool_schemas: Optional[List[Dict[str, Any]]] = None,
|
|
1844
3361
|
prev_num_openai_messages: int = 0,
|
|
1845
3362
|
) -> ModelResponse:
|
|
1846
|
-
r"""Internal function for agent async step model response.
|
|
1847
|
-
|
|
1848
|
-
openai_messages (List[OpenAIMessage]): The OpenAI messages
|
|
1849
|
-
to process.
|
|
1850
|
-
num_tokens (int): The number of tokens in the context.
|
|
1851
|
-
current_iteration (int): The current iteration of the step.
|
|
1852
|
-
response_format (Optional[Type[BaseModel]]): The response
|
|
1853
|
-
format to use.
|
|
1854
|
-
tool_schemas (Optional[List[Dict[str, Any]]]): The tool schemas
|
|
1855
|
-
to use.
|
|
1856
|
-
prev_num_openai_messages (int): The number of openai messages
|
|
1857
|
-
logged in the previous iteration.
|
|
1858
|
-
|
|
1859
|
-
Returns:
|
|
1860
|
-
ModelResponse: The model response.
|
|
1861
|
-
"""
|
|
1862
|
-
|
|
1863
|
-
response = None
|
|
1864
|
-
try:
|
|
1865
|
-
response = await self.model_backend.arun(
|
|
1866
|
-
openai_messages, response_format, tool_schemas or None
|
|
1867
|
-
)
|
|
1868
|
-
except Exception as exc:
|
|
1869
|
-
logger.error(
|
|
1870
|
-
f"An error occurred while running model "
|
|
1871
|
-
f"{self.model_backend.model_type}, "
|
|
1872
|
-
f"index: {self.model_backend.current_model_index}",
|
|
1873
|
-
exc_info=exc,
|
|
1874
|
-
)
|
|
1875
|
-
error_info = str(exc)
|
|
3363
|
+
r"""Internal function for agent async step model response."""
|
|
3364
|
+
last_error = None
|
|
1876
3365
|
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
|
|
3366
|
+
for attempt in range(self.retry_attempts):
|
|
3367
|
+
try:
|
|
3368
|
+
response = await self.model_backend.arun(
|
|
3369
|
+
openai_messages, response_format, tool_schemas or None
|
|
3370
|
+
)
|
|
3371
|
+
if response:
|
|
3372
|
+
break
|
|
3373
|
+
except RateLimitError as e:
|
|
3374
|
+
if self._is_token_limit_error(e):
|
|
3375
|
+
raise
|
|
3376
|
+
last_error = e
|
|
3377
|
+
if attempt < self.retry_attempts - 1:
|
|
3378
|
+
delay = min(self.retry_delay * (2**attempt), 60.0)
|
|
3379
|
+
delay = random.uniform(0, delay) # Add jitter
|
|
3380
|
+
logger.warning(
|
|
3381
|
+
f"Rate limit hit (attempt {attempt + 1}"
|
|
3382
|
+
f"/{self.retry_attempts}). "
|
|
3383
|
+
f"Retrying in {delay:.1f}s"
|
|
3384
|
+
)
|
|
3385
|
+
await asyncio.sleep(delay)
|
|
3386
|
+
else:
|
|
3387
|
+
logger.error(
|
|
3388
|
+
f"Rate limit exhausted after "
|
|
3389
|
+
f"{self.retry_attempts} attempts"
|
|
3390
|
+
)
|
|
3391
|
+
except Exception:
|
|
3392
|
+
logger.error(
|
|
3393
|
+
f"Model error: {self.model_backend.model_type}",
|
|
3394
|
+
exc_info=True,
|
|
3395
|
+
)
|
|
3396
|
+
raise
|
|
3397
|
+
else:
|
|
3398
|
+
# Loop completed without success
|
|
1883
3399
|
raise ModelProcessingError(
|
|
1884
|
-
f"Unable to process messages:
|
|
1885
|
-
f"
|
|
3400
|
+
f"Unable to process messages: "
|
|
3401
|
+
f"{str(last_error) if last_error else 'Unknown error'}"
|
|
1886
3402
|
)
|
|
1887
3403
|
|
|
1888
|
-
|
|
3404
|
+
# Log success
|
|
3405
|
+
sanitized = self._sanitize_messages_for_logging(
|
|
1889
3406
|
openai_messages, prev_num_openai_messages
|
|
1890
3407
|
)
|
|
1891
3408
|
logger.info(
|
|
1892
|
-
f"Model {self.model_backend.model_type}
|
|
1893
|
-
f"
|
|
1894
|
-
f"iteration {current_iteration}, "
|
|
1895
|
-
f"processed these messages: {sanitized_messages}"
|
|
3409
|
+
f"Model {self.model_backend.model_type} "
|
|
3410
|
+
f"[{current_iteration}]: {sanitized}"
|
|
1896
3411
|
)
|
|
3412
|
+
|
|
1897
3413
|
if not isinstance(response, ChatCompletion):
|
|
1898
3414
|
raise TypeError(
|
|
1899
|
-
f"Expected
|
|
1900
|
-
f"got {type(response).__name__} instead."
|
|
3415
|
+
f"Expected ChatCompletion, got {type(response).__name__}"
|
|
1901
3416
|
)
|
|
3417
|
+
|
|
1902
3418
|
return self._handle_batch_response(response)
|
|
1903
3419
|
|
|
1904
3420
|
def _sanitize_messages_for_logging(
|
|
@@ -1915,11 +3431,6 @@ class ChatAgent(BaseAgent):
|
|
|
1915
3431
|
Returns:
|
|
1916
3432
|
List[OpenAIMessage]: The sanitized OpenAI messages.
|
|
1917
3433
|
"""
|
|
1918
|
-
import hashlib
|
|
1919
|
-
import os
|
|
1920
|
-
import re
|
|
1921
|
-
import tempfile
|
|
1922
|
-
|
|
1923
3434
|
# Create a copy of messages for logging to avoid modifying the
|
|
1924
3435
|
# original messages
|
|
1925
3436
|
sanitized_messages = []
|
|
@@ -1960,7 +3471,14 @@ class ChatAgent(BaseAgent):
|
|
|
1960
3471
|
|
|
1961
3472
|
# Save image to temp directory for viewing
|
|
1962
3473
|
try:
|
|
1963
|
-
|
|
3474
|
+
# Sanitize img_format to prevent path
|
|
3475
|
+
# traversal
|
|
3476
|
+
safe_format = re.sub(
|
|
3477
|
+
r'[^a-zA-Z0-9]', '', img_format
|
|
3478
|
+
)[:10]
|
|
3479
|
+
img_filename = (
|
|
3480
|
+
f"image_{img_hash}.{safe_format}"
|
|
3481
|
+
)
|
|
1964
3482
|
|
|
1965
3483
|
temp_dir = tempfile.gettempdir()
|
|
1966
3484
|
img_path = os.path.join(
|
|
@@ -1975,6 +3493,9 @@ class ChatAgent(BaseAgent):
|
|
|
1975
3493
|
base64_data
|
|
1976
3494
|
)
|
|
1977
3495
|
)
|
|
3496
|
+
# Register for cleanup
|
|
3497
|
+
with _temp_files_lock:
|
|
3498
|
+
_temp_files.add(img_path)
|
|
1978
3499
|
|
|
1979
3500
|
# Create a file:// URL that can be
|
|
1980
3501
|
# opened
|
|
@@ -2148,9 +3669,9 @@ class ChatAgent(BaseAgent):
|
|
|
2148
3669
|
if tool_calls := response.choices[0].message.tool_calls:
|
|
2149
3670
|
tool_call_requests = []
|
|
2150
3671
|
for tool_call in tool_calls:
|
|
2151
|
-
tool_name = tool_call.function.name
|
|
3672
|
+
tool_name = tool_call.function.name # type: ignore[union-attr]
|
|
2152
3673
|
tool_call_id = tool_call.id
|
|
2153
|
-
args = json.loads(tool_call.function.arguments)
|
|
3674
|
+
args = json.loads(tool_call.function.arguments) # type: ignore[union-attr]
|
|
2154
3675
|
tool_call_request = ToolCallRequest(
|
|
2155
3676
|
tool_name=tool_name, args=args, tool_call_id=tool_call_id
|
|
2156
3677
|
)
|
|
@@ -2227,7 +3748,8 @@ class ChatAgent(BaseAgent):
|
|
|
2227
3748
|
try:
|
|
2228
3749
|
raw_result = tool(**args)
|
|
2229
3750
|
if self.mask_tool_output:
|
|
2230
|
-
self.
|
|
3751
|
+
with self._secure_result_store_lock:
|
|
3752
|
+
self._secure_result_store[tool_call_id] = raw_result
|
|
2231
3753
|
result = (
|
|
2232
3754
|
"[The tool has been executed successfully, but the output"
|
|
2233
3755
|
" from the tool is masked. You can move forward]"
|
|
@@ -2285,7 +3807,7 @@ class ChatAgent(BaseAgent):
|
|
|
2285
3807
|
# Capture the error message to prevent framework crash
|
|
2286
3808
|
error_msg = f"Error executing async tool '{func_name}': {e!s}"
|
|
2287
3809
|
result = f"Tool execution failed: {error_msg}"
|
|
2288
|
-
|
|
3810
|
+
logger.warning(error_msg)
|
|
2289
3811
|
return self._record_tool_calling(func_name, args, result, tool_call_id)
|
|
2290
3812
|
|
|
2291
3813
|
def _record_tool_calling(
|
|
@@ -2336,22 +3858,34 @@ class ChatAgent(BaseAgent):
|
|
|
2336
3858
|
# This ensures the assistant message (tool call) always appears before
|
|
2337
3859
|
# the function message (tool result) in the conversation context
|
|
2338
3860
|
# Use time.time_ns() for nanosecond precision to avoid collisions
|
|
2339
|
-
import time
|
|
2340
|
-
|
|
2341
3861
|
current_time_ns = time.time_ns()
|
|
2342
3862
|
base_timestamp = current_time_ns / 1_000_000_000 # Convert to seconds
|
|
2343
3863
|
|
|
2344
3864
|
self.update_memory(
|
|
2345
|
-
assist_msg,
|
|
3865
|
+
assist_msg,
|
|
3866
|
+
OpenAIBackendRole.ASSISTANT,
|
|
3867
|
+
timestamp=base_timestamp,
|
|
3868
|
+
return_records=self._enable_snapshot_clean,
|
|
2346
3869
|
)
|
|
2347
3870
|
|
|
2348
3871
|
# Add minimal increment to ensure function message comes after
|
|
2349
|
-
self.update_memory(
|
|
3872
|
+
func_records = self.update_memory(
|
|
2350
3873
|
func_msg,
|
|
2351
3874
|
OpenAIBackendRole.FUNCTION,
|
|
2352
3875
|
timestamp=base_timestamp + 1e-6,
|
|
3876
|
+
return_records=self._enable_snapshot_clean,
|
|
2353
3877
|
)
|
|
2354
3878
|
|
|
3879
|
+
# Register tool output for snapshot cleaning if enabled
|
|
3880
|
+
if self._enable_snapshot_clean and not mask_output and func_records:
|
|
3881
|
+
serialized_result = self._serialize_tool_result(result)
|
|
3882
|
+
self._register_tool_output_for_cache(
|
|
3883
|
+
func_name,
|
|
3884
|
+
tool_call_id,
|
|
3885
|
+
serialized_result,
|
|
3886
|
+
cast(List[MemoryRecord], func_records),
|
|
3887
|
+
)
|
|
3888
|
+
|
|
2355
3889
|
# Record information about this tool call
|
|
2356
3890
|
tool_record = ToolCallingRecord(
|
|
2357
3891
|
tool_name=func_name,
|
|
@@ -2360,6 +3894,7 @@ class ChatAgent(BaseAgent):
|
|
|
2360
3894
|
tool_call_id=tool_call_id,
|
|
2361
3895
|
)
|
|
2362
3896
|
|
|
3897
|
+
self._update_last_tool_call_state(tool_record)
|
|
2363
3898
|
return tool_record
|
|
2364
3899
|
|
|
2365
3900
|
def _stream(
|
|
@@ -2428,7 +3963,7 @@ class ChatAgent(BaseAgent):
|
|
|
2428
3963
|
# Check termination condition
|
|
2429
3964
|
if self.stop_event and self.stop_event.is_set():
|
|
2430
3965
|
logger.info(
|
|
2431
|
-
f"Termination triggered at iteration
|
|
3966
|
+
f"Termination triggered at iteration {iteration_count}"
|
|
2432
3967
|
)
|
|
2433
3968
|
yield self._step_terminate(
|
|
2434
3969
|
num_tokens, tool_call_records, "termination_triggered"
|
|
@@ -2611,12 +4146,6 @@ class ChatAgent(BaseAgent):
|
|
|
2611
4146
|
stream_completed = False
|
|
2612
4147
|
|
|
2613
4148
|
for chunk in stream:
|
|
2614
|
-
# Update token usage if available
|
|
2615
|
-
if chunk.usage:
|
|
2616
|
-
self._update_token_usage_tracker(
|
|
2617
|
-
step_token_usage, safe_model_dump(chunk.usage)
|
|
2618
|
-
)
|
|
2619
|
-
|
|
2620
4149
|
# Process chunk delta
|
|
2621
4150
|
if chunk.choices and len(chunk.choices) > 0:
|
|
2622
4151
|
choice = chunk.choices[0]
|
|
@@ -2649,12 +4178,6 @@ class ChatAgent(BaseAgent):
|
|
|
2649
4178
|
# If we have complete tool calls, execute them with
|
|
2650
4179
|
# sync status updates
|
|
2651
4180
|
if accumulated_tool_calls:
|
|
2652
|
-
# Record assistant message with tool calls first
|
|
2653
|
-
self._record_assistant_tool_calls_message(
|
|
2654
|
-
accumulated_tool_calls,
|
|
2655
|
-
content_accumulator.get_full_content(),
|
|
2656
|
-
)
|
|
2657
|
-
|
|
2658
4181
|
# Execute tools synchronously with
|
|
2659
4182
|
# optimized status updates
|
|
2660
4183
|
for (
|
|
@@ -2687,7 +4210,49 @@ class ChatAgent(BaseAgent):
|
|
|
2687
4210
|
)
|
|
2688
4211
|
|
|
2689
4212
|
self.record_message(final_message)
|
|
2690
|
-
|
|
4213
|
+
elif chunk.usage and not chunk.choices:
|
|
4214
|
+
# Handle final chunk with usage but empty choices
|
|
4215
|
+
# This happens when stream_options={"include_usage": True}
|
|
4216
|
+
# Update the final usage from this chunk
|
|
4217
|
+
self._update_token_usage_tracker(
|
|
4218
|
+
step_token_usage, safe_model_dump(chunk.usage)
|
|
4219
|
+
)
|
|
4220
|
+
|
|
4221
|
+
# Create final response with final usage
|
|
4222
|
+
final_content = content_accumulator.get_full_content()
|
|
4223
|
+
if final_content.strip():
|
|
4224
|
+
final_message = BaseMessage(
|
|
4225
|
+
role_name=self.role_name,
|
|
4226
|
+
role_type=self.role_type,
|
|
4227
|
+
meta_dict={},
|
|
4228
|
+
content=final_content,
|
|
4229
|
+
)
|
|
4230
|
+
|
|
4231
|
+
if response_format:
|
|
4232
|
+
self._try_format_message(
|
|
4233
|
+
final_message, response_format
|
|
4234
|
+
)
|
|
4235
|
+
|
|
4236
|
+
# Create final response with final usage (not partial)
|
|
4237
|
+
final_response = ChatAgentResponse(
|
|
4238
|
+
msgs=[final_message],
|
|
4239
|
+
terminated=False,
|
|
4240
|
+
info={
|
|
4241
|
+
"id": getattr(chunk, 'id', ''),
|
|
4242
|
+
"usage": step_token_usage.copy(),
|
|
4243
|
+
"finish_reasons": ["stop"],
|
|
4244
|
+
"num_tokens": self._get_token_count(final_content),
|
|
4245
|
+
"tool_calls": tool_call_records or [],
|
|
4246
|
+
"external_tool_requests": None,
|
|
4247
|
+
"streaming": False,
|
|
4248
|
+
"partial": False,
|
|
4249
|
+
},
|
|
4250
|
+
)
|
|
4251
|
+
yield final_response
|
|
4252
|
+
break
|
|
4253
|
+
elif stream_completed:
|
|
4254
|
+
# If we've already seen finish_reason but no usage chunk, exit
|
|
4255
|
+
break
|
|
2691
4256
|
|
|
2692
4257
|
return stream_completed, tool_calls_complete
|
|
2693
4258
|
|
|
@@ -2767,77 +4332,70 @@ class ChatAgent(BaseAgent):
|
|
|
2767
4332
|
accumulated_tool_calls: Dict[str, Any],
|
|
2768
4333
|
tool_call_records: List[ToolCallingRecord],
|
|
2769
4334
|
) -> Generator[ChatAgentResponse, None, None]:
|
|
2770
|
-
r"""Execute multiple tools synchronously with
|
|
2771
|
-
|
|
2772
|
-
non-blocking status streaming."""
|
|
2773
|
-
|
|
2774
|
-
def tool_worker(result_queue, tool_call_data):
|
|
2775
|
-
try:
|
|
2776
|
-
tool_call_record = self._execute_tool_from_stream_data(
|
|
2777
|
-
tool_call_data
|
|
2778
|
-
)
|
|
2779
|
-
result_queue.put(tool_call_record)
|
|
2780
|
-
except Exception as e:
|
|
2781
|
-
logger.error(f"Error in threaded tool execution: {e}")
|
|
2782
|
-
result_queue.put(None)
|
|
4335
|
+
r"""Execute multiple tools synchronously with proper content
|
|
4336
|
+
accumulation, using ThreadPoolExecutor for better timeout handling."""
|
|
2783
4337
|
|
|
2784
4338
|
tool_calls_to_execute = []
|
|
2785
4339
|
for _tool_call_index, tool_call_data in accumulated_tool_calls.items():
|
|
2786
4340
|
if tool_call_data.get('complete', False):
|
|
2787
4341
|
tool_calls_to_execute.append(tool_call_data)
|
|
2788
4342
|
|
|
2789
|
-
|
|
2790
|
-
|
|
2791
|
-
|
|
2792
|
-
|
|
2793
|
-
|
|
2794
|
-
|
|
2795
|
-
|
|
2796
|
-
|
|
2797
|
-
|
|
2798
|
-
|
|
2799
|
-
|
|
2800
|
-
|
|
2801
|
-
|
|
2802
|
-
|
|
2803
|
-
|
|
2804
|
-
|
|
2805
|
-
|
|
2806
|
-
|
|
2807
|
-
)
|
|
2808
|
-
thread.start()
|
|
2809
|
-
|
|
2810
|
-
# Log debug info instead of adding to content
|
|
2811
|
-
logger.info(
|
|
2812
|
-
f"Calling function: {function_name} with arguments: {args}"
|
|
2813
|
-
)
|
|
2814
|
-
|
|
2815
|
-
# wait for tool thread to finish with optional timeout
|
|
2816
|
-
thread.join(self.tool_execution_timeout)
|
|
4343
|
+
if not tool_calls_to_execute:
|
|
4344
|
+
# No tools to execute, return immediately
|
|
4345
|
+
return
|
|
4346
|
+
yield # Make this a generator
|
|
4347
|
+
|
|
4348
|
+
# Execute tools using ThreadPoolExecutor for proper timeout handling
|
|
4349
|
+
# Use max_workers=len() for parallel execution, with min of 1
|
|
4350
|
+
with concurrent.futures.ThreadPoolExecutor(
|
|
4351
|
+
max_workers=max(1, len(tool_calls_to_execute))
|
|
4352
|
+
) as executor:
|
|
4353
|
+
# Submit all tools first (parallel execution)
|
|
4354
|
+
futures_map = {}
|
|
4355
|
+
for tool_call_data in tool_calls_to_execute:
|
|
4356
|
+
function_name = tool_call_data['function']['name']
|
|
4357
|
+
try:
|
|
4358
|
+
args = json.loads(tool_call_data['function']['arguments'])
|
|
4359
|
+
except json.JSONDecodeError:
|
|
4360
|
+
args = tool_call_data['function']['arguments']
|
|
2817
4361
|
|
|
2818
|
-
|
|
2819
|
-
|
|
2820
|
-
|
|
2821
|
-
logger.warning(
|
|
2822
|
-
f"Function '{function_name}' timed out after "
|
|
2823
|
-
f"{self.tool_execution_timeout} seconds"
|
|
4362
|
+
# Log debug info
|
|
4363
|
+
logger.info(
|
|
4364
|
+
f"Calling function: {function_name} with arguments: {args}"
|
|
2824
4365
|
)
|
|
2825
4366
|
|
|
2826
|
-
#
|
|
2827
|
-
|
|
2828
|
-
|
|
2829
|
-
|
|
2830
|
-
|
|
2831
|
-
|
|
2832
|
-
|
|
2833
|
-
|
|
2834
|
-
|
|
4367
|
+
# Submit tool execution (non-blocking)
|
|
4368
|
+
future = executor.submit(
|
|
4369
|
+
self._execute_tool_from_stream_data, tool_call_data
|
|
4370
|
+
)
|
|
4371
|
+
futures_map[future] = (function_name, tool_call_data)
|
|
4372
|
+
|
|
4373
|
+
# Wait for all futures to complete (or timeout)
|
|
4374
|
+
for future in concurrent.futures.as_completed(
|
|
4375
|
+
futures_map.keys(),
|
|
4376
|
+
timeout=self.tool_execution_timeout
|
|
4377
|
+
if self.tool_execution_timeout
|
|
4378
|
+
else None,
|
|
4379
|
+
):
|
|
4380
|
+
function_name, tool_call_data = futures_map[future]
|
|
2835
4381
|
|
|
2836
|
-
|
|
2837
|
-
|
|
2838
|
-
|
|
2839
|
-
|
|
2840
|
-
|
|
4382
|
+
try:
|
|
4383
|
+
tool_call_record = future.result()
|
|
4384
|
+
if tool_call_record:
|
|
4385
|
+
tool_call_records.append(tool_call_record)
|
|
4386
|
+
logger.info(
|
|
4387
|
+
f"Function output: {tool_call_record.result}"
|
|
4388
|
+
)
|
|
4389
|
+
except concurrent.futures.TimeoutError:
|
|
4390
|
+
logger.warning(
|
|
4391
|
+
f"Function '{function_name}' timed out after "
|
|
4392
|
+
f"{self.tool_execution_timeout} seconds"
|
|
4393
|
+
)
|
|
4394
|
+
future.cancel()
|
|
4395
|
+
except Exception as e:
|
|
4396
|
+
logger.error(
|
|
4397
|
+
f"Error executing tool '{function_name}': {e}"
|
|
4398
|
+
)
|
|
2841
4399
|
|
|
2842
4400
|
# Ensure this function remains a generator (required by type signature)
|
|
2843
4401
|
return
|
|
@@ -2857,10 +4415,19 @@ class ChatAgent(BaseAgent):
|
|
|
2857
4415
|
tool = self._internal_tools[function_name]
|
|
2858
4416
|
try:
|
|
2859
4417
|
result = tool(**args)
|
|
4418
|
+
# First, create and record the assistant message with tool
|
|
4419
|
+
# call
|
|
4420
|
+
assist_msg = FunctionCallingMessage(
|
|
4421
|
+
role_name=self.role_name,
|
|
4422
|
+
role_type=self.role_type,
|
|
4423
|
+
meta_dict=None,
|
|
4424
|
+
content="",
|
|
4425
|
+
func_name=function_name,
|
|
4426
|
+
args=args,
|
|
4427
|
+
tool_call_id=tool_call_id,
|
|
4428
|
+
)
|
|
2860
4429
|
|
|
2861
|
-
#
|
|
2862
|
-
# message assistant message with tool_calls was already
|
|
2863
|
-
# recorded in _record_assistant_tool_calls_message
|
|
4430
|
+
# Then create the tool response message
|
|
2864
4431
|
func_msg = FunctionCallingMessage(
|
|
2865
4432
|
role_name=self.role_name,
|
|
2866
4433
|
role_type=self.role_type,
|
|
@@ -2871,21 +4438,39 @@ class ChatAgent(BaseAgent):
|
|
|
2871
4438
|
tool_call_id=tool_call_id,
|
|
2872
4439
|
)
|
|
2873
4440
|
|
|
2874
|
-
|
|
4441
|
+
# Record both messages with precise timestamps to ensure
|
|
4442
|
+
# correct ordering
|
|
4443
|
+
current_time_ns = time.time_ns()
|
|
4444
|
+
base_timestamp = (
|
|
4445
|
+
current_time_ns / 1_000_000_000
|
|
4446
|
+
) # Convert to seconds
|
|
4447
|
+
|
|
4448
|
+
self.update_memory(
|
|
4449
|
+
assist_msg,
|
|
4450
|
+
OpenAIBackendRole.ASSISTANT,
|
|
4451
|
+
timestamp=base_timestamp,
|
|
4452
|
+
)
|
|
4453
|
+
self.update_memory(
|
|
4454
|
+
func_msg,
|
|
4455
|
+
OpenAIBackendRole.FUNCTION,
|
|
4456
|
+
timestamp=base_timestamp + 1e-6,
|
|
4457
|
+
)
|
|
2875
4458
|
|
|
2876
|
-
|
|
4459
|
+
tool_record = ToolCallingRecord(
|
|
2877
4460
|
tool_name=function_name,
|
|
2878
4461
|
args=args,
|
|
2879
4462
|
result=result,
|
|
2880
4463
|
tool_call_id=tool_call_id,
|
|
2881
4464
|
)
|
|
4465
|
+
self._update_last_tool_call_state(tool_record)
|
|
4466
|
+
return tool_record
|
|
2882
4467
|
|
|
2883
4468
|
except Exception as e:
|
|
2884
4469
|
error_msg = (
|
|
2885
4470
|
f"Error executing tool '{function_name}': {e!s}"
|
|
2886
4471
|
)
|
|
2887
4472
|
result = {"error": error_msg}
|
|
2888
|
-
|
|
4473
|
+
logger.warning(error_msg)
|
|
2889
4474
|
|
|
2890
4475
|
# Record error response
|
|
2891
4476
|
func_msg = FunctionCallingMessage(
|
|
@@ -2900,12 +4485,14 @@ class ChatAgent(BaseAgent):
|
|
|
2900
4485
|
|
|
2901
4486
|
self.update_memory(func_msg, OpenAIBackendRole.FUNCTION)
|
|
2902
4487
|
|
|
2903
|
-
|
|
4488
|
+
tool_record = ToolCallingRecord(
|
|
2904
4489
|
tool_name=function_name,
|
|
2905
4490
|
args=args,
|
|
2906
4491
|
result=result,
|
|
2907
4492
|
tool_call_id=tool_call_id,
|
|
2908
4493
|
)
|
|
4494
|
+
self._update_last_tool_call_state(tool_record)
|
|
4495
|
+
return tool_record
|
|
2909
4496
|
else:
|
|
2910
4497
|
logger.warning(
|
|
2911
4498
|
f"Tool '{function_name}' not found in internal tools"
|
|
@@ -2927,6 +4514,23 @@ class ChatAgent(BaseAgent):
|
|
|
2927
4514
|
tool_call_id = tool_call_data['id']
|
|
2928
4515
|
|
|
2929
4516
|
if function_name in self._internal_tools:
|
|
4517
|
+
# Create the tool call message
|
|
4518
|
+
assist_msg = FunctionCallingMessage(
|
|
4519
|
+
role_name=self.role_name,
|
|
4520
|
+
role_type=self.role_type,
|
|
4521
|
+
meta_dict=None,
|
|
4522
|
+
content="",
|
|
4523
|
+
func_name=function_name,
|
|
4524
|
+
args=args,
|
|
4525
|
+
tool_call_id=tool_call_id,
|
|
4526
|
+
)
|
|
4527
|
+
assist_ts = time.time_ns() / 1_000_000_000
|
|
4528
|
+
self.update_memory(
|
|
4529
|
+
assist_msg,
|
|
4530
|
+
OpenAIBackendRole.ASSISTANT,
|
|
4531
|
+
timestamp=assist_ts,
|
|
4532
|
+
)
|
|
4533
|
+
|
|
2930
4534
|
tool = self._internal_tools[function_name]
|
|
2931
4535
|
try:
|
|
2932
4536
|
# Try different invocation paths in order of preference
|
|
@@ -2956,9 +4560,7 @@ class ChatAgent(BaseAgent):
|
|
|
2956
4560
|
# Fallback: synchronous call
|
|
2957
4561
|
result = tool(**args)
|
|
2958
4562
|
|
|
2959
|
-
#
|
|
2960
|
-
# message assistant message with tool_calls was already
|
|
2961
|
-
# recorded in _record_assistant_tool_calls_message
|
|
4563
|
+
# Create the tool response message
|
|
2962
4564
|
func_msg = FunctionCallingMessage(
|
|
2963
4565
|
role_name=self.role_name,
|
|
2964
4566
|
role_type=self.role_type,
|
|
@@ -2968,22 +4570,28 @@ class ChatAgent(BaseAgent):
|
|
|
2968
4570
|
result=result,
|
|
2969
4571
|
tool_call_id=tool_call_id,
|
|
2970
4572
|
)
|
|
4573
|
+
func_ts = time.time_ns() / 1_000_000_000
|
|
4574
|
+
self.update_memory(
|
|
4575
|
+
func_msg,
|
|
4576
|
+
OpenAIBackendRole.FUNCTION,
|
|
4577
|
+
timestamp=func_ts,
|
|
4578
|
+
)
|
|
2971
4579
|
|
|
2972
|
-
|
|
2973
|
-
|
|
2974
|
-
return ToolCallingRecord(
|
|
4580
|
+
tool_record = ToolCallingRecord(
|
|
2975
4581
|
tool_name=function_name,
|
|
2976
4582
|
args=args,
|
|
2977
4583
|
result=result,
|
|
2978
4584
|
tool_call_id=tool_call_id,
|
|
2979
4585
|
)
|
|
4586
|
+
self._update_last_tool_call_state(tool_record)
|
|
4587
|
+
return tool_record
|
|
2980
4588
|
|
|
2981
4589
|
except Exception as e:
|
|
2982
4590
|
error_msg = (
|
|
2983
4591
|
f"Error executing async tool '{function_name}': {e!s}"
|
|
2984
4592
|
)
|
|
2985
4593
|
result = {"error": error_msg}
|
|
2986
|
-
|
|
4594
|
+
logger.warning(error_msg)
|
|
2987
4595
|
|
|
2988
4596
|
# Record error response
|
|
2989
4597
|
func_msg = FunctionCallingMessage(
|
|
@@ -2995,15 +4603,21 @@ class ChatAgent(BaseAgent):
|
|
|
2995
4603
|
result=result,
|
|
2996
4604
|
tool_call_id=tool_call_id,
|
|
2997
4605
|
)
|
|
4606
|
+
func_ts = time.time_ns() / 1_000_000_000
|
|
4607
|
+
self.update_memory(
|
|
4608
|
+
func_msg,
|
|
4609
|
+
OpenAIBackendRole.FUNCTION,
|
|
4610
|
+
timestamp=func_ts,
|
|
4611
|
+
)
|
|
2998
4612
|
|
|
2999
|
-
|
|
3000
|
-
|
|
3001
|
-
return ToolCallingRecord(
|
|
4613
|
+
tool_record = ToolCallingRecord(
|
|
3002
4614
|
tool_name=function_name,
|
|
3003
4615
|
args=args,
|
|
3004
4616
|
result=result,
|
|
3005
4617
|
tool_call_id=tool_call_id,
|
|
3006
4618
|
)
|
|
4619
|
+
self._update_last_tool_call_state(tool_record)
|
|
4620
|
+
return tool_record
|
|
3007
4621
|
else:
|
|
3008
4622
|
logger.warning(
|
|
3009
4623
|
f"Tool '{function_name}' not found in internal tools"
|
|
@@ -3093,7 +4707,7 @@ class ChatAgent(BaseAgent):
|
|
|
3093
4707
|
# Check termination condition
|
|
3094
4708
|
if self.stop_event and self.stop_event.is_set():
|
|
3095
4709
|
logger.info(
|
|
3096
|
-
f"Termination triggered at iteration
|
|
4710
|
+
f"Termination triggered at iteration {iteration_count}"
|
|
3097
4711
|
)
|
|
3098
4712
|
yield self._step_terminate(
|
|
3099
4713
|
num_tokens, tool_call_records, "termination_triggered"
|
|
@@ -3320,18 +4934,13 @@ class ChatAgent(BaseAgent):
|
|
|
3320
4934
|
response_format: Optional[Type[BaseModel]] = None,
|
|
3321
4935
|
) -> AsyncGenerator[Union[ChatAgentResponse, Tuple[bool, bool]], None]:
|
|
3322
4936
|
r"""Async version of process streaming chunks with
|
|
3323
|
-
content accumulator.
|
|
4937
|
+
content accumulator.
|
|
4938
|
+
"""
|
|
3324
4939
|
|
|
3325
4940
|
tool_calls_complete = False
|
|
3326
4941
|
stream_completed = False
|
|
3327
4942
|
|
|
3328
4943
|
async for chunk in stream:
|
|
3329
|
-
# Update token usage if available
|
|
3330
|
-
if chunk.usage:
|
|
3331
|
-
self._update_token_usage_tracker(
|
|
3332
|
-
step_token_usage, safe_model_dump(chunk.usage)
|
|
3333
|
-
)
|
|
3334
|
-
|
|
3335
4944
|
# Process chunk delta
|
|
3336
4945
|
if chunk.choices and len(chunk.choices) > 0:
|
|
3337
4946
|
choice = chunk.choices[0]
|
|
@@ -3364,13 +4973,6 @@ class ChatAgent(BaseAgent):
|
|
|
3364
4973
|
# If we have complete tool calls, execute them with
|
|
3365
4974
|
# async status updates
|
|
3366
4975
|
if accumulated_tool_calls:
|
|
3367
|
-
# Record assistant message with
|
|
3368
|
-
# tool calls first
|
|
3369
|
-
self._record_assistant_tool_calls_message(
|
|
3370
|
-
accumulated_tool_calls,
|
|
3371
|
-
content_accumulator.get_full_content(),
|
|
3372
|
-
)
|
|
3373
|
-
|
|
3374
4976
|
# Execute tools asynchronously with real-time
|
|
3375
4977
|
# status updates
|
|
3376
4978
|
async for (
|
|
@@ -3405,7 +5007,49 @@ class ChatAgent(BaseAgent):
|
|
|
3405
5007
|
)
|
|
3406
5008
|
|
|
3407
5009
|
self.record_message(final_message)
|
|
3408
|
-
|
|
5010
|
+
elif chunk.usage and not chunk.choices:
|
|
5011
|
+
# Handle final chunk with usage but empty choices
|
|
5012
|
+
# This happens when stream_options={"include_usage": True}
|
|
5013
|
+
# Update the final usage from this chunk
|
|
5014
|
+
self._update_token_usage_tracker(
|
|
5015
|
+
step_token_usage, safe_model_dump(chunk.usage)
|
|
5016
|
+
)
|
|
5017
|
+
|
|
5018
|
+
# Create final response with final usage
|
|
5019
|
+
final_content = content_accumulator.get_full_content()
|
|
5020
|
+
if final_content.strip():
|
|
5021
|
+
final_message = BaseMessage(
|
|
5022
|
+
role_name=self.role_name,
|
|
5023
|
+
role_type=self.role_type,
|
|
5024
|
+
meta_dict={},
|
|
5025
|
+
content=final_content,
|
|
5026
|
+
)
|
|
5027
|
+
|
|
5028
|
+
if response_format:
|
|
5029
|
+
self._try_format_message(
|
|
5030
|
+
final_message, response_format
|
|
5031
|
+
)
|
|
5032
|
+
|
|
5033
|
+
# Create final response with final usage (not partial)
|
|
5034
|
+
final_response = ChatAgentResponse(
|
|
5035
|
+
msgs=[final_message],
|
|
5036
|
+
terminated=False,
|
|
5037
|
+
info={
|
|
5038
|
+
"id": getattr(chunk, 'id', ''),
|
|
5039
|
+
"usage": step_token_usage.copy(),
|
|
5040
|
+
"finish_reasons": ["stop"],
|
|
5041
|
+
"num_tokens": self._get_token_count(final_content),
|
|
5042
|
+
"tool_calls": tool_call_records or [],
|
|
5043
|
+
"external_tool_requests": None,
|
|
5044
|
+
"streaming": False,
|
|
5045
|
+
"partial": False,
|
|
5046
|
+
},
|
|
5047
|
+
)
|
|
5048
|
+
yield final_response
|
|
5049
|
+
break
|
|
5050
|
+
elif stream_completed:
|
|
5051
|
+
# If we've already seen finish_reason but no usage chunk, exit
|
|
5052
|
+
break
|
|
3409
5053
|
|
|
3410
5054
|
# Yield the final status as a tuple
|
|
3411
5055
|
yield (stream_completed, tool_calls_complete)
|
|
@@ -3498,15 +5142,18 @@ class ChatAgent(BaseAgent):
|
|
|
3498
5142
|
) -> ChatAgentResponse:
|
|
3499
5143
|
r"""Create a streaming response using content accumulator."""
|
|
3500
5144
|
|
|
3501
|
-
# Add new content
|
|
5145
|
+
# Add new content; only build full content when needed
|
|
3502
5146
|
accumulator.add_streaming_content(new_content)
|
|
3503
|
-
|
|
5147
|
+
if self.stream_accumulate:
|
|
5148
|
+
message_content = accumulator.get_full_content()
|
|
5149
|
+
else:
|
|
5150
|
+
message_content = new_content
|
|
3504
5151
|
|
|
3505
5152
|
message = BaseMessage(
|
|
3506
5153
|
role_name=self.role_name,
|
|
3507
5154
|
role_type=self.role_type,
|
|
3508
5155
|
meta_dict={},
|
|
3509
|
-
content=
|
|
5156
|
+
content=message_content,
|
|
3510
5157
|
)
|
|
3511
5158
|
|
|
3512
5159
|
return ChatAgentResponse(
|
|
@@ -3516,7 +5163,7 @@ class ChatAgent(BaseAgent):
|
|
|
3516
5163
|
"id": response_id,
|
|
3517
5164
|
"usage": step_token_usage.copy(),
|
|
3518
5165
|
"finish_reasons": ["streaming"],
|
|
3519
|
-
"num_tokens": self._get_token_count(
|
|
5166
|
+
"num_tokens": self._get_token_count(message_content),
|
|
3520
5167
|
"tool_calls": tool_call_records or [],
|
|
3521
5168
|
"external_tool_requests": None,
|
|
3522
5169
|
"streaming": True,
|
|
@@ -3572,10 +5219,12 @@ class ChatAgent(BaseAgent):
|
|
|
3572
5219
|
configuration.
|
|
3573
5220
|
"""
|
|
3574
5221
|
# Create a new instance with the same configuration
|
|
3575
|
-
# If with_memory is True, set system_message to None
|
|
3576
|
-
#
|
|
5222
|
+
# If with_memory is True, set system_message to None (it will be
|
|
5223
|
+
# copied from memory below, including any workflow context)
|
|
5224
|
+
# If with_memory is False, use the current system message
|
|
5225
|
+
# (which may include appended workflow context)
|
|
3577
5226
|
# To avoid duplicated system memory.
|
|
3578
|
-
system_message = None if with_memory else self.
|
|
5227
|
+
system_message = None if with_memory else self._system_message
|
|
3579
5228
|
|
|
3580
5229
|
# Clone tools and collect toolkits that need registration
|
|
3581
5230
|
cloned_tools, toolkits_to_register = self._clone_tools()
|
|
@@ -3589,7 +5238,7 @@ class ChatAgent(BaseAgent):
|
|
|
3589
5238
|
self.memory.get_context_creator(), "token_limit", None
|
|
3590
5239
|
),
|
|
3591
5240
|
output_language=self._output_language,
|
|
3592
|
-
tools=cloned_tools,
|
|
5241
|
+
tools=cast(List[Union[FunctionTool, Callable]], cloned_tools),
|
|
3593
5242
|
toolkits_to_register_agent=toolkits_to_register,
|
|
3594
5243
|
external_tools=[
|
|
3595
5244
|
schema for schema in self._external_tool_schemas.values()
|
|
@@ -3603,6 +5252,7 @@ class ChatAgent(BaseAgent):
|
|
|
3603
5252
|
tool_execution_timeout=self.tool_execution_timeout,
|
|
3604
5253
|
pause_event=self.pause_event,
|
|
3605
5254
|
prune_tool_calls_from_memory=self.prune_tool_calls_from_memory,
|
|
5255
|
+
stream_accumulate=self.stream_accumulate,
|
|
3606
5256
|
)
|
|
3607
5257
|
|
|
3608
5258
|
# Copy memory if requested
|
|
@@ -3617,9 +5267,7 @@ class ChatAgent(BaseAgent):
|
|
|
3617
5267
|
|
|
3618
5268
|
def _clone_tools(
|
|
3619
5269
|
self,
|
|
3620
|
-
) -> Tuple[
|
|
3621
|
-
List[Union[FunctionTool, Callable]], List[RegisteredAgentToolkit]
|
|
3622
|
-
]:
|
|
5270
|
+
) -> Tuple[List[FunctionTool], List[RegisteredAgentToolkit]]:
|
|
3623
5271
|
r"""Clone tools and return toolkits that need agent registration.
|
|
3624
5272
|
|
|
3625
5273
|
This method handles stateful toolkits by cloning them if they have
|
|
@@ -3674,15 +5322,65 @@ class ChatAgent(BaseAgent):
|
|
|
3674
5322
|
# Get the method from the cloned (or original) toolkit
|
|
3675
5323
|
toolkit = cloned_toolkits[toolkit_id]
|
|
3676
5324
|
method_name = tool.func.__name__
|
|
5325
|
+
|
|
5326
|
+
# Check if toolkit was actually cloned or just reused
|
|
5327
|
+
toolkit_was_cloned = toolkit is not toolkit_instance
|
|
5328
|
+
|
|
3677
5329
|
if hasattr(toolkit, method_name):
|
|
3678
5330
|
new_method = getattr(toolkit, method_name)
|
|
3679
|
-
|
|
5331
|
+
|
|
5332
|
+
# If toolkit wasn't cloned (stateless), preserve the
|
|
5333
|
+
# original function to maintain any enhancements/wrappers
|
|
5334
|
+
if not toolkit_was_cloned:
|
|
5335
|
+
# Toolkit is stateless, safe to reuse original function
|
|
5336
|
+
cloned_tools.append(
|
|
5337
|
+
FunctionTool(
|
|
5338
|
+
func=tool.func,
|
|
5339
|
+
openai_tool_schema=tool.get_openai_tool_schema(),
|
|
5340
|
+
)
|
|
5341
|
+
)
|
|
5342
|
+
continue
|
|
5343
|
+
|
|
5344
|
+
# Toolkit was cloned, use the new method
|
|
5345
|
+
# Wrap cloned method into a new FunctionTool,
|
|
5346
|
+
# preserving schema
|
|
5347
|
+
try:
|
|
5348
|
+
new_tool = FunctionTool(
|
|
5349
|
+
func=new_method,
|
|
5350
|
+
openai_tool_schema=tool.get_openai_tool_schema(),
|
|
5351
|
+
)
|
|
5352
|
+
cloned_tools.append(new_tool)
|
|
5353
|
+
except Exception as e:
|
|
5354
|
+
# If wrapping fails, fallback to wrapping the original
|
|
5355
|
+
# function with its schema to maintain consistency
|
|
5356
|
+
logger.warning(
|
|
5357
|
+
f"Failed to wrap cloned toolkit "
|
|
5358
|
+
f"method '{method_name}' "
|
|
5359
|
+
f"with FunctionTool: {e}. Using original "
|
|
5360
|
+
f"function with preserved schema instead."
|
|
5361
|
+
)
|
|
5362
|
+
cloned_tools.append(
|
|
5363
|
+
FunctionTool(
|
|
5364
|
+
func=tool.func,
|
|
5365
|
+
openai_tool_schema=tool.get_openai_tool_schema(),
|
|
5366
|
+
)
|
|
5367
|
+
)
|
|
3680
5368
|
else:
|
|
3681
|
-
# Fallback to original function
|
|
3682
|
-
cloned_tools.append(
|
|
5369
|
+
# Fallback to original function wrapped in FunctionTool
|
|
5370
|
+
cloned_tools.append(
|
|
5371
|
+
FunctionTool(
|
|
5372
|
+
func=tool.func,
|
|
5373
|
+
openai_tool_schema=tool.get_openai_tool_schema(),
|
|
5374
|
+
)
|
|
5375
|
+
)
|
|
3683
5376
|
else:
|
|
3684
|
-
# Not a toolkit method,
|
|
3685
|
-
cloned_tools.append(
|
|
5377
|
+
# Not a toolkit method, preserve FunctionTool schema directly
|
|
5378
|
+
cloned_tools.append(
|
|
5379
|
+
FunctionTool(
|
|
5380
|
+
func=tool.func,
|
|
5381
|
+
openai_tool_schema=tool.get_openai_tool_schema(),
|
|
5382
|
+
)
|
|
5383
|
+
)
|
|
3686
5384
|
|
|
3687
5385
|
return cloned_tools, toolkits_to_register
|
|
3688
5386
|
|