letta-nightly 0.7.30.dev20250603104343__py3-none-any.whl → 0.8.0.dev20250604104349__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +7 -1
- letta/agent.py +14 -7
- letta/agents/base_agent.py +1 -0
- letta/agents/ephemeral_summary_agent.py +104 -0
- letta/agents/helpers.py +35 -3
- letta/agents/letta_agent.py +492 -176
- letta/agents/letta_agent_batch.py +22 -16
- letta/agents/prompts/summary_system_prompt.txt +62 -0
- letta/agents/voice_agent.py +22 -7
- letta/agents/voice_sleeptime_agent.py +13 -8
- letta/constants.py +33 -1
- letta/data_sources/connectors.py +52 -36
- letta/errors.py +4 -0
- letta/functions/ast_parsers.py +13 -30
- letta/functions/function_sets/base.py +3 -1
- letta/functions/functions.py +2 -0
- letta/functions/mcp_client/base_client.py +151 -97
- letta/functions/mcp_client/sse_client.py +49 -31
- letta/functions/mcp_client/stdio_client.py +107 -106
- letta/functions/schema_generator.py +22 -22
- letta/groups/helpers.py +3 -4
- letta/groups/sleeptime_multi_agent.py +4 -4
- letta/groups/sleeptime_multi_agent_v2.py +22 -0
- letta/helpers/composio_helpers.py +16 -0
- letta/helpers/converters.py +20 -0
- letta/helpers/datetime_helpers.py +1 -6
- letta/helpers/tool_rule_solver.py +2 -1
- letta/interfaces/anthropic_streaming_interface.py +17 -2
- letta/interfaces/openai_chat_completions_streaming_interface.py +1 -0
- letta/interfaces/openai_streaming_interface.py +18 -2
- letta/llm_api/anthropic_client.py +24 -3
- letta/llm_api/google_ai_client.py +0 -15
- letta/llm_api/google_vertex_client.py +6 -5
- letta/llm_api/llm_client_base.py +15 -0
- letta/llm_api/openai.py +2 -2
- letta/llm_api/openai_client.py +60 -8
- letta/orm/__init__.py +2 -0
- letta/orm/agent.py +45 -43
- letta/orm/base.py +0 -2
- letta/orm/block.py +1 -0
- letta/orm/custom_columns.py +13 -0
- letta/orm/enums.py +5 -0
- letta/orm/file.py +3 -1
- letta/orm/files_agents.py +68 -0
- letta/orm/mcp_server.py +48 -0
- letta/orm/message.py +1 -0
- letta/orm/organization.py +11 -2
- letta/orm/passage.py +25 -10
- letta/orm/sandbox_config.py +5 -2
- letta/orm/sqlalchemy_base.py +171 -110
- letta/prompts/system/memgpt_base.txt +6 -1
- letta/prompts/system/memgpt_v2_chat.txt +57 -0
- letta/prompts/system/sleeptime.txt +2 -0
- letta/prompts/system/sleeptime_v2.txt +28 -0
- letta/schemas/agent.py +87 -20
- letta/schemas/block.py +7 -1
- letta/schemas/file.py +57 -0
- letta/schemas/mcp.py +74 -0
- letta/schemas/memory.py +5 -2
- letta/schemas/message.py +9 -0
- letta/schemas/openai/openai.py +0 -6
- letta/schemas/providers.py +33 -4
- letta/schemas/tool.py +26 -21
- letta/schemas/tool_execution_result.py +5 -0
- letta/server/db.py +23 -8
- letta/server/rest_api/app.py +73 -56
- letta/server/rest_api/interface.py +4 -4
- letta/server/rest_api/routers/v1/agents.py +132 -47
- letta/server/rest_api/routers/v1/blocks.py +3 -2
- letta/server/rest_api/routers/v1/embeddings.py +3 -3
- letta/server/rest_api/routers/v1/groups.py +3 -3
- letta/server/rest_api/routers/v1/jobs.py +14 -17
- letta/server/rest_api/routers/v1/organizations.py +10 -10
- letta/server/rest_api/routers/v1/providers.py +12 -10
- letta/server/rest_api/routers/v1/runs.py +3 -3
- letta/server/rest_api/routers/v1/sandbox_configs.py +12 -12
- letta/server/rest_api/routers/v1/sources.py +108 -43
- letta/server/rest_api/routers/v1/steps.py +8 -6
- letta/server/rest_api/routers/v1/tools.py +134 -95
- letta/server/rest_api/utils.py +12 -1
- letta/server/server.py +272 -73
- letta/services/agent_manager.py +246 -313
- letta/services/block_manager.py +30 -9
- letta/services/context_window_calculator/__init__.py +0 -0
- letta/services/context_window_calculator/context_window_calculator.py +150 -0
- letta/services/context_window_calculator/token_counter.py +82 -0
- letta/services/file_processor/__init__.py +0 -0
- letta/services/file_processor/chunker/__init__.py +0 -0
- letta/services/file_processor/chunker/llama_index_chunker.py +29 -0
- letta/services/file_processor/embedder/__init__.py +0 -0
- letta/services/file_processor/embedder/openai_embedder.py +84 -0
- letta/services/file_processor/file_processor.py +123 -0
- letta/services/file_processor/parser/__init__.py +0 -0
- letta/services/file_processor/parser/base_parser.py +9 -0
- letta/services/file_processor/parser/mistral_parser.py +54 -0
- letta/services/file_processor/types.py +0 -0
- letta/services/files_agents_manager.py +184 -0
- letta/services/group_manager.py +118 -0
- letta/services/helpers/agent_manager_helper.py +76 -21
- letta/services/helpers/tool_execution_helper.py +3 -0
- letta/services/helpers/tool_parser_helper.py +100 -0
- letta/services/identity_manager.py +44 -42
- letta/services/job_manager.py +21 -10
- letta/services/mcp/base_client.py +5 -2
- letta/services/mcp/sse_client.py +3 -5
- letta/services/mcp/stdio_client.py +3 -5
- letta/services/mcp_manager.py +281 -0
- letta/services/message_manager.py +40 -26
- letta/services/organization_manager.py +55 -19
- letta/services/passage_manager.py +211 -13
- letta/services/provider_manager.py +48 -2
- letta/services/sandbox_config_manager.py +105 -0
- letta/services/source_manager.py +4 -5
- letta/services/step_manager.py +9 -6
- letta/services/summarizer/summarizer.py +50 -23
- letta/services/telemetry_manager.py +7 -0
- letta/services/tool_executor/tool_execution_manager.py +11 -52
- letta/services/tool_executor/tool_execution_sandbox.py +4 -34
- letta/services/tool_executor/tool_executor.py +107 -105
- letta/services/tool_manager.py +56 -17
- letta/services/tool_sandbox/base.py +39 -92
- letta/services/tool_sandbox/e2b_sandbox.py +16 -11
- letta/services/tool_sandbox/local_sandbox.py +51 -23
- letta/services/user_manager.py +36 -3
- letta/settings.py +10 -3
- letta/templates/__init__.py +0 -0
- letta/templates/sandbox_code_file.py.j2 +47 -0
- letta/templates/template_helper.py +16 -0
- letta/tracing.py +30 -1
- letta/types/__init__.py +7 -0
- letta/utils.py +25 -1
- {letta_nightly-0.7.30.dev20250603104343.dist-info → letta_nightly-0.8.0.dev20250604104349.dist-info}/METADATA +7 -2
- {letta_nightly-0.7.30.dev20250603104343.dist-info → letta_nightly-0.8.0.dev20250604104349.dist-info}/RECORD +136 -110
- {letta_nightly-0.7.30.dev20250603104343.dist-info → letta_nightly-0.8.0.dev20250604104349.dist-info}/LICENSE +0 -0
- {letta_nightly-0.7.30.dev20250603104343.dist-info → letta_nightly-0.8.0.dev20250604104349.dist-info}/WHEEL +0 -0
- {letta_nightly-0.7.30.dev20250603104343.dist-info → letta_nightly-0.8.0.dev20250604104349.dist-info}/entry_points.txt +0 -0
letta/agents/letta_agent.py
CHANGED
@@ -1,14 +1,20 @@
|
|
1
1
|
import asyncio
|
2
2
|
import json
|
3
3
|
import uuid
|
4
|
-
from typing import
|
4
|
+
from typing import AsyncGenerator, Dict, List, Optional, Tuple, Union
|
5
5
|
|
6
6
|
from openai import AsyncStream
|
7
|
-
from openai.types import
|
8
|
-
from openai.types.chat import ChatCompletion, ChatCompletionChunk
|
7
|
+
from openai.types.chat import ChatCompletionChunk
|
9
8
|
|
10
9
|
from letta.agents.base_agent import BaseAgent
|
11
|
-
from letta.agents.
|
10
|
+
from letta.agents.ephemeral_summary_agent import EphemeralSummaryAgent
|
11
|
+
from letta.agents.helpers import (
|
12
|
+
_create_letta_response,
|
13
|
+
_prepare_in_context_messages_async,
|
14
|
+
_prepare_in_context_messages_no_persist_async,
|
15
|
+
generate_step_id,
|
16
|
+
)
|
17
|
+
from letta.errors import LLMContextWindowExceededError
|
12
18
|
from letta.helpers import ToolRulesSolver
|
13
19
|
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
|
14
20
|
from letta.helpers.tool_execution_helper import enable_strict_mode
|
@@ -21,9 +27,9 @@ from letta.log import get_logger
|
|
21
27
|
from letta.orm.enums import ToolType
|
22
28
|
from letta.schemas.agent import AgentState
|
23
29
|
from letta.schemas.enums import MessageRole, MessageStreamStatus
|
24
|
-
from letta.schemas.letta_message import AssistantMessage
|
25
30
|
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
|
26
31
|
from letta.schemas.letta_response import LettaResponse
|
32
|
+
from letta.schemas.llm_config import LLMConfig
|
27
33
|
from letta.schemas.message import Message, MessageCreate
|
28
34
|
from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
|
29
35
|
from letta.schemas.provider_trace import ProviderTraceCreate
|
@@ -32,13 +38,18 @@ from letta.schemas.user import User
|
|
32
38
|
from letta.server.rest_api.utils import create_letta_messages_from_llm_response
|
33
39
|
from letta.services.agent_manager import AgentManager
|
34
40
|
from letta.services.block_manager import BlockManager
|
41
|
+
from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema
|
35
42
|
from letta.services.message_manager import MessageManager
|
36
43
|
from letta.services.passage_manager import PassageManager
|
37
44
|
from letta.services.step_manager import NoopStepManager, StepManager
|
45
|
+
from letta.services.summarizer.enums import SummarizationMode
|
46
|
+
from letta.services.summarizer.summarizer import Summarizer
|
38
47
|
from letta.services.telemetry_manager import NoopTelemetryManager, TelemetryManager
|
39
48
|
from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
|
49
|
+
from letta.settings import model_settings
|
40
50
|
from letta.system import package_function_response
|
41
51
|
from letta.tracing import log_event, trace_method, tracer
|
52
|
+
from letta.utils import log_telemetry, validate_function_response
|
42
53
|
|
43
54
|
logger = get_logger(__name__)
|
44
55
|
|
@@ -55,6 +66,11 @@ class LettaAgent(BaseAgent):
|
|
55
66
|
actor: User,
|
56
67
|
step_manager: StepManager = NoopStepManager(),
|
57
68
|
telemetry_manager: TelemetryManager = NoopTelemetryManager(),
|
69
|
+
summary_block_label: str = "conversation_summary",
|
70
|
+
message_buffer_limit: int = 60, # TODO: Make this configurable
|
71
|
+
message_buffer_min: int = 15, # TODO: Make this configurable
|
72
|
+
enable_summarization: bool = True, # TODO: Make this configurable
|
73
|
+
max_summarization_retries: int = 3, # TODO: Make this configurable
|
58
74
|
):
|
59
75
|
super().__init__(agent_id=agent_id, openai_client=None, message_manager=message_manager, agent_manager=agent_manager, actor=actor)
|
60
76
|
|
@@ -69,23 +85,63 @@ class LettaAgent(BaseAgent):
|
|
69
85
|
self.last_function_response = None
|
70
86
|
|
71
87
|
# Cached archival memory/message size
|
72
|
-
self.num_messages =
|
73
|
-
self.num_archival_memories =
|
88
|
+
self.num_messages = None
|
89
|
+
self.num_archival_memories = None
|
90
|
+
|
91
|
+
self.summarization_agent = None
|
92
|
+
self.summary_block_label = summary_block_label
|
93
|
+
self.max_summarization_retries = max_summarization_retries
|
94
|
+
|
95
|
+
# TODO: Expand to more
|
96
|
+
if enable_summarization and model_settings.openai_api_key:
|
97
|
+
self.summarization_agent = EphemeralSummaryAgent(
|
98
|
+
target_block_label=self.summary_block_label,
|
99
|
+
agent_id=agent_id,
|
100
|
+
block_manager=self.block_manager,
|
101
|
+
message_manager=self.message_manager,
|
102
|
+
agent_manager=self.agent_manager,
|
103
|
+
actor=self.actor,
|
104
|
+
)
|
105
|
+
|
106
|
+
self.summarizer = Summarizer(
|
107
|
+
mode=SummarizationMode.STATIC_MESSAGE_BUFFER,
|
108
|
+
summarizer_agent=self.summarization_agent,
|
109
|
+
# TODO: Make this configurable
|
110
|
+
message_buffer_limit=message_buffer_limit,
|
111
|
+
message_buffer_min=message_buffer_min,
|
112
|
+
)
|
74
113
|
|
75
114
|
@trace_method
|
76
|
-
async def step(
|
115
|
+
async def step(
|
116
|
+
self,
|
117
|
+
input_messages: List[MessageCreate],
|
118
|
+
max_steps: int = 10,
|
119
|
+
use_assistant_message: bool = True,
|
120
|
+
request_start_timestamp_ns: Optional[int] = None,
|
121
|
+
) -> LettaResponse:
|
77
122
|
agent_state = await self.agent_manager.get_agent_by_id_async(
|
78
|
-
agent_id=self.agent_id, include_relationships=["tools", "memory"], actor=self.actor
|
123
|
+
agent_id=self.agent_id, include_relationships=["tools", "memory", "tool_exec_environment_variables"], actor=self.actor
|
124
|
+
)
|
125
|
+
_, new_in_context_messages, usage = await self._step(
|
126
|
+
agent_state=agent_state,
|
127
|
+
input_messages=input_messages,
|
128
|
+
max_steps=max_steps,
|
129
|
+
request_start_timestamp_ns=request_start_timestamp_ns,
|
79
130
|
)
|
80
|
-
_, new_in_context_messages, usage = await self._step(agent_state=agent_state, input_messages=input_messages, max_steps=max_steps)
|
81
131
|
return _create_letta_response(
|
82
132
|
new_in_context_messages=new_in_context_messages, use_assistant_message=use_assistant_message, usage=usage
|
83
133
|
)
|
84
134
|
|
85
135
|
@trace_method
|
86
|
-
async def step_stream_no_tokens(
|
136
|
+
async def step_stream_no_tokens(
|
137
|
+
self,
|
138
|
+
input_messages: List[MessageCreate],
|
139
|
+
max_steps: int = 10,
|
140
|
+
use_assistant_message: bool = True,
|
141
|
+
request_start_timestamp_ns: Optional[int] = None,
|
142
|
+
):
|
87
143
|
agent_state = await self.agent_manager.get_agent_by_id_async(
|
88
|
-
agent_id=self.agent_id, include_relationships=["tools", "memory"], actor=self.actor
|
144
|
+
agent_id=self.agent_id, include_relationships=["tools", "memory", "tool_exec_environment_variables"], actor=self.actor
|
89
145
|
)
|
90
146
|
current_in_context_messages, new_in_context_messages = await _prepare_in_context_messages_async(
|
91
147
|
input_messages, agent_state, self.message_manager, self.actor
|
@@ -97,32 +153,33 @@ class LettaAgent(BaseAgent):
|
|
97
153
|
actor=self.actor,
|
98
154
|
)
|
99
155
|
usage = LettaUsageStatistics()
|
156
|
+
|
157
|
+
# span for request
|
158
|
+
request_span = tracer.start_span("time_to_first_token", start_time=request_start_timestamp_ns)
|
159
|
+
request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
|
160
|
+
|
100
161
|
for _ in range(max_steps):
|
101
162
|
step_id = generate_step_id()
|
163
|
+
step_start = get_utc_timestamp_ns()
|
164
|
+
agent_step_span = tracer.start_span("agent_step", start_time=step_start)
|
165
|
+
agent_step_span.set_attributes({"step_id": step_id})
|
102
166
|
|
103
|
-
|
104
|
-
current_in_context_messages
|
167
|
+
request_data, response_data, current_in_context_messages, new_in_context_messages = await self._build_and_request_from_llm(
|
168
|
+
current_in_context_messages,
|
169
|
+
new_in_context_messages,
|
105
170
|
agent_state,
|
106
|
-
|
107
|
-
|
108
|
-
)
|
109
|
-
log_event("agent.stream_no_tokens.messages.refreshed") # [1^]
|
110
|
-
|
111
|
-
request_data = await self._create_llm_request_data_async(
|
112
|
-
llm_client=llm_client,
|
113
|
-
in_context_messages=in_context_messages,
|
114
|
-
agent_state=agent_state,
|
115
|
-
tool_rules_solver=tool_rules_solver,
|
116
|
-
# TODO: pass in reasoning content
|
171
|
+
llm_client,
|
172
|
+
tool_rules_solver,
|
117
173
|
)
|
118
|
-
|
174
|
+
in_context_messages = current_in_context_messages + new_in_context_messages
|
119
175
|
|
120
|
-
try:
|
121
|
-
response_data = await llm_client.request_async(request_data, agent_state.llm_config)
|
122
|
-
except Exception as e:
|
123
|
-
raise llm_client.handle_llm_error(e)
|
124
176
|
log_event("agent.stream_no_tokens.llm_response.received") # [3^]
|
125
177
|
|
178
|
+
# log llm request time
|
179
|
+
now = get_utc_timestamp_ns()
|
180
|
+
llm_request_ns = now - step_start
|
181
|
+
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ns // 1_000_000})
|
182
|
+
|
126
183
|
response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
|
127
184
|
|
128
185
|
# update usage
|
@@ -144,16 +201,35 @@ class LettaAgent(BaseAgent):
|
|
144
201
|
signature=response.choices[0].message.reasoning_content_signature,
|
145
202
|
)
|
146
203
|
]
|
147
|
-
|
204
|
+
elif response.choices[0].message.content:
|
148
205
|
reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
|
206
|
+
else:
|
207
|
+
logger.info("No reasoning content found.")
|
208
|
+
reasoning = None
|
209
|
+
|
210
|
+
# log LLM request time
|
211
|
+
now = get_utc_timestamp_ns()
|
212
|
+
llm_request_ns = now - step_start
|
213
|
+
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ns // 1_000_000})
|
149
214
|
|
150
215
|
persisted_messages, should_continue = await self._handle_ai_response(
|
151
|
-
tool_call,
|
216
|
+
tool_call,
|
217
|
+
agent_state,
|
218
|
+
tool_rules_solver,
|
219
|
+
response.usage,
|
220
|
+
reasoning_content=reasoning,
|
221
|
+
agent_step_span=agent_step_span,
|
152
222
|
)
|
153
223
|
self.response_messages.extend(persisted_messages)
|
154
224
|
new_in_context_messages.extend(persisted_messages)
|
155
225
|
log_event("agent.stream_no_tokens.llm_response.processed") # [4^]
|
156
226
|
|
227
|
+
# log step time
|
228
|
+
now = get_utc_timestamp_ns()
|
229
|
+
step_ns = now - step_start
|
230
|
+
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": step_ns // 1_000_000})
|
231
|
+
agent_step_span.end()
|
232
|
+
|
157
233
|
# Log LLM Trace
|
158
234
|
await self.telemetry_manager.create_provider_trace_async(
|
159
235
|
actor=self.actor,
|
@@ -179,15 +255,32 @@ class LettaAgent(BaseAgent):
|
|
179
255
|
|
180
256
|
# Extend the in context message ids
|
181
257
|
if not agent_state.message_buffer_autoclear:
|
182
|
-
|
183
|
-
|
258
|
+
await self._rebuild_context_window(
|
259
|
+
in_context_messages=current_in_context_messages,
|
260
|
+
new_letta_messages=new_in_context_messages,
|
261
|
+
llm_config=agent_state.llm_config,
|
262
|
+
total_tokens=usage.total_tokens,
|
263
|
+
force=False,
|
264
|
+
)
|
265
|
+
|
266
|
+
# log request time
|
267
|
+
if request_start_timestamp_ns:
|
268
|
+
now = get_utc_timestamp_ns()
|
269
|
+
request_ns = now - request_start_timestamp_ns
|
270
|
+
request_span.add_event(name="letta_request_ms", attributes={"duration_ms": request_ns // 1_000_000})
|
271
|
+
request_span.end()
|
184
272
|
|
185
273
|
# Return back usage
|
186
274
|
yield f"data: {usage.model_dump_json()}\n\n"
|
275
|
+
yield f"data: {MessageStreamStatus.done.model_dump_json()}\n\n"
|
187
276
|
|
188
277
|
async def _step(
|
189
|
-
self,
|
190
|
-
|
278
|
+
self,
|
279
|
+
agent_state: AgentState,
|
280
|
+
input_messages: List[MessageCreate],
|
281
|
+
max_steps: int = 10,
|
282
|
+
request_start_timestamp_ns: Optional[int] = None,
|
283
|
+
) -> Tuple[List[Message], List[Message], LettaUsageStatistics]:
|
191
284
|
"""
|
192
285
|
Carries out an invocation of the agent loop. In each step, the agent
|
193
286
|
1. Rebuilds its memory
|
@@ -204,35 +297,32 @@ class LettaAgent(BaseAgent):
|
|
204
297
|
put_inner_thoughts_first=True,
|
205
298
|
actor=self.actor,
|
206
299
|
)
|
300
|
+
|
301
|
+
# span for request
|
302
|
+
request_span = tracer.start_span("time_to_first_token")
|
303
|
+
request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
|
304
|
+
|
207
305
|
usage = LettaUsageStatistics()
|
208
306
|
for _ in range(max_steps):
|
209
307
|
step_id = generate_step_id()
|
308
|
+
step_start = get_utc_timestamp_ns()
|
309
|
+
agent_step_span = tracer.start_span("agent_step", start_time=step_start)
|
310
|
+
agent_step_span.set_attributes({"step_id": step_id})
|
210
311
|
|
211
|
-
|
212
|
-
current_in_context_messages
|
213
|
-
agent_state,
|
214
|
-
num_messages=self.num_messages,
|
215
|
-
num_archival_memories=self.num_archival_memories,
|
216
|
-
)
|
217
|
-
log_event("agent.step.messages.refreshed") # [1^]
|
218
|
-
|
219
|
-
request_data = await self._create_llm_request_data_async(
|
220
|
-
llm_client=llm_client,
|
221
|
-
in_context_messages=in_context_messages,
|
222
|
-
agent_state=agent_state,
|
223
|
-
tool_rules_solver=tool_rules_solver,
|
224
|
-
# TODO: pass in reasoning content
|
312
|
+
request_data, response_data, current_in_context_messages, new_in_context_messages = await self._build_and_request_from_llm(
|
313
|
+
current_in_context_messages, new_in_context_messages, agent_state, llm_client, tool_rules_solver
|
225
314
|
)
|
226
|
-
|
315
|
+
in_context_messages = current_in_context_messages + new_in_context_messages
|
227
316
|
|
228
|
-
try:
|
229
|
-
response_data = await llm_client.request_async(request_data, agent_state.llm_config)
|
230
|
-
except Exception as e:
|
231
|
-
raise llm_client.handle_llm_error(e)
|
232
317
|
log_event("agent.step.llm_response.received") # [3^]
|
233
318
|
|
234
319
|
response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
|
235
320
|
|
321
|
+
# log LLM request time
|
322
|
+
now = get_utc_timestamp_ns()
|
323
|
+
llm_request_ns = now - step_start
|
324
|
+
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ns // 1_000_000})
|
325
|
+
|
236
326
|
# TODO: add run_id
|
237
327
|
usage.step_count += 1
|
238
328
|
usage.completion_tokens += response.usage.completion_tokens
|
@@ -251,16 +341,31 @@ class LettaAgent(BaseAgent):
|
|
251
341
|
signature=response.choices[0].message.reasoning_content_signature,
|
252
342
|
)
|
253
343
|
]
|
254
|
-
|
344
|
+
elif response.choices[0].message.content:
|
255
345
|
reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
|
346
|
+
else:
|
347
|
+
logger.info("No reasoning content found.")
|
348
|
+
reasoning = None
|
256
349
|
|
257
350
|
persisted_messages, should_continue = await self._handle_ai_response(
|
258
|
-
tool_call,
|
351
|
+
tool_call,
|
352
|
+
agent_state,
|
353
|
+
tool_rules_solver,
|
354
|
+
response.usage,
|
355
|
+
reasoning_content=reasoning,
|
356
|
+
step_id=step_id,
|
357
|
+
agent_step_span=agent_step_span,
|
259
358
|
)
|
260
359
|
self.response_messages.extend(persisted_messages)
|
261
360
|
new_in_context_messages.extend(persisted_messages)
|
262
361
|
log_event("agent.step.llm_response.processed") # [4^]
|
263
362
|
|
363
|
+
# log step time
|
364
|
+
now = get_utc_timestamp_ns()
|
365
|
+
step_ns = now - step_start
|
366
|
+
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": step_ns // 1_000_000})
|
367
|
+
agent_step_span.end()
|
368
|
+
|
264
369
|
# Log LLM Trace
|
265
370
|
await self.telemetry_manager.create_provider_trace_async(
|
266
371
|
actor=self.actor,
|
@@ -275,10 +380,22 @@ class LettaAgent(BaseAgent):
|
|
275
380
|
if not should_continue:
|
276
381
|
break
|
277
382
|
|
383
|
+
# log request time
|
384
|
+
if request_start_timestamp_ns:
|
385
|
+
now = get_utc_timestamp_ns()
|
386
|
+
request_ns = now - request_start_timestamp_ns
|
387
|
+
request_span.add_event(name="request_ms", attributes={"duration_ms": request_ns // 1_000_000})
|
388
|
+
request_span.end()
|
389
|
+
|
278
390
|
# Extend the in context message ids
|
279
391
|
if not agent_state.message_buffer_autoclear:
|
280
|
-
|
281
|
-
|
392
|
+
await self._rebuild_context_window(
|
393
|
+
in_context_messages=current_in_context_messages,
|
394
|
+
new_letta_messages=new_in_context_messages,
|
395
|
+
llm_config=agent_state.llm_config,
|
396
|
+
total_tokens=usage.total_tokens,
|
397
|
+
force=False,
|
398
|
+
)
|
282
399
|
|
283
400
|
return current_in_context_messages, new_in_context_messages, usage
|
284
401
|
|
@@ -299,11 +416,17 @@ class LettaAgent(BaseAgent):
|
|
299
416
|
4. Processes the response
|
300
417
|
"""
|
301
418
|
agent_state = await self.agent_manager.get_agent_by_id_async(
|
302
|
-
agent_id=self.agent_id, include_relationships=["tools", "memory"], actor=self.actor
|
419
|
+
agent_id=self.agent_id, include_relationships=["tools", "memory", "tool_exec_environment_variables"], actor=self.actor
|
303
420
|
)
|
304
|
-
current_in_context_messages, new_in_context_messages = await
|
421
|
+
current_in_context_messages, new_in_context_messages = await _prepare_in_context_messages_no_persist_async(
|
305
422
|
input_messages, agent_state, self.message_manager, self.actor
|
306
423
|
)
|
424
|
+
|
425
|
+
# Special strategy to lower TTFT
|
426
|
+
# Delay persistence of the initial input message as much as possible
|
427
|
+
persisted_input_messages = False
|
428
|
+
initial_messages = new_in_context_messages
|
429
|
+
|
307
430
|
tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
|
308
431
|
llm_client = LLMClient.create(
|
309
432
|
provider_type=agent_state.llm_config.model_endpoint_type,
|
@@ -311,29 +434,28 @@ class LettaAgent(BaseAgent):
|
|
311
434
|
actor=self.actor,
|
312
435
|
)
|
313
436
|
usage = LettaUsageStatistics()
|
437
|
+
first_chunk, request_span = True, None
|
438
|
+
if request_start_timestamp_ns:
|
439
|
+
request_span = tracer.start_span("time_to_first_token", start_time=request_start_timestamp_ns)
|
440
|
+
request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
|
314
441
|
|
442
|
+
provider_request_start_timestamp_ns = None
|
315
443
|
for _ in range(max_steps):
|
316
444
|
step_id = generate_step_id()
|
317
|
-
|
318
|
-
|
445
|
+
step_start = get_utc_timestamp_ns()
|
446
|
+
agent_step_span = tracer.start_span("agent_step", start_time=step_start)
|
447
|
+
agent_step_span.set_attributes({"step_id": step_id})
|
448
|
+
|
449
|
+
request_data, stream, current_in_context_messages, new_in_context_messages = await self._build_and_request_from_llm_streaming(
|
450
|
+
first_chunk,
|
451
|
+
agent_step_span,
|
452
|
+
request_start_timestamp_ns,
|
453
|
+
current_in_context_messages,
|
454
|
+
new_in_context_messages,
|
319
455
|
agent_state,
|
320
|
-
|
321
|
-
|
322
|
-
)
|
323
|
-
log_event("agent.step.messages.refreshed") # [1^]
|
324
|
-
|
325
|
-
request_data = await self._create_llm_request_data_async(
|
326
|
-
llm_client=llm_client,
|
327
|
-
in_context_messages=in_context_messages,
|
328
|
-
agent_state=agent_state,
|
329
|
-
tool_rules_solver=tool_rules_solver,
|
456
|
+
llm_client,
|
457
|
+
tool_rules_solver,
|
330
458
|
)
|
331
|
-
log_event("agent.stream.llm_request.created") # [2^]
|
332
|
-
|
333
|
-
try:
|
334
|
-
stream = await llm_client.stream_async(request_data, agent_state.llm_config)
|
335
|
-
except Exception as e:
|
336
|
-
raise llm_client.handle_llm_error(e)
|
337
459
|
log_event("agent.stream.llm_response.received") # [3^]
|
338
460
|
|
339
461
|
# TODO: THIS IS INCREDIBLY UGLY
|
@@ -351,18 +473,14 @@ class LettaAgent(BaseAgent):
|
|
351
473
|
else:
|
352
474
|
raise ValueError(f"Streaming not supported for {agent_state.llm_config}")
|
353
475
|
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
ttft_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
|
358
|
-
|
359
|
-
async for chunk in interface.process(stream):
|
476
|
+
async for chunk in interface.process(
|
477
|
+
stream, ttft_span=request_span, provider_request_start_timestamp_ns=provider_request_start_timestamp_ns
|
478
|
+
):
|
360
479
|
# Measure time to first token
|
361
|
-
if first_chunk and
|
480
|
+
if first_chunk and request_span is not None:
|
362
481
|
now = get_utc_timestamp_ns()
|
363
482
|
ttft_ns = now - request_start_timestamp_ns
|
364
|
-
|
365
|
-
ttft_span.end()
|
483
|
+
request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ttft_ns // 1_000_000})
|
366
484
|
first_chunk = False
|
367
485
|
|
368
486
|
yield f"data: {chunk.model_dump_json()}\n\n"
|
@@ -373,6 +491,17 @@ class LettaAgent(BaseAgent):
|
|
373
491
|
usage.prompt_tokens += interface.input_tokens
|
374
492
|
usage.total_tokens += interface.input_tokens + interface.output_tokens
|
375
493
|
|
494
|
+
# Persist input messages if not already
|
495
|
+
# Special strategy to lower TTFT
|
496
|
+
if not persisted_input_messages:
|
497
|
+
await self.message_manager.create_many_messages_async(initial_messages, actor=self.actor)
|
498
|
+
persisted_input_messages = True
|
499
|
+
|
500
|
+
# log LLM request time
|
501
|
+
now = get_utc_timestamp_ns()
|
502
|
+
llm_request_ns = now - step_start
|
503
|
+
agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ns // 1_000_000})
|
504
|
+
|
376
505
|
# Process resulting stream content
|
377
506
|
tool_call = interface.get_tool_call_object()
|
378
507
|
reasoning_content = interface.get_reasoning_content()
|
@@ -389,10 +518,17 @@ class LettaAgent(BaseAgent):
|
|
389
518
|
pre_computed_assistant_message_id=interface.letta_assistant_message_id,
|
390
519
|
pre_computed_tool_message_id=interface.letta_tool_message_id,
|
391
520
|
step_id=step_id,
|
521
|
+
agent_step_span=agent_step_span,
|
392
522
|
)
|
393
523
|
self.response_messages.extend(persisted_messages)
|
394
524
|
new_in_context_messages.extend(persisted_messages)
|
395
525
|
|
526
|
+
# log total step time
|
527
|
+
now = get_utc_timestamp_ns()
|
528
|
+
step_ns = now - step_start
|
529
|
+
agent_step_span.add_event(name="step_ms", attributes={"duration_ms": step_ns // 1_000_000})
|
530
|
+
agent_step_span.end()
|
531
|
+
|
396
532
|
# TODO (cliandy): the stream POST request span has ended at this point, we should tie this to the stream
|
397
533
|
# log_event("agent.stream.llm_response.processed") # [4^]
|
398
534
|
|
@@ -429,18 +565,180 @@ class LettaAgent(BaseAgent):
|
|
429
565
|
|
430
566
|
# Extend the in context message ids
|
431
567
|
if not agent_state.message_buffer_autoclear:
|
432
|
-
|
433
|
-
|
568
|
+
await self._rebuild_context_window(
|
569
|
+
in_context_messages=current_in_context_messages,
|
570
|
+
new_letta_messages=new_in_context_messages,
|
571
|
+
llm_config=agent_state.llm_config,
|
572
|
+
total_tokens=usage.total_tokens,
|
573
|
+
force=False,
|
574
|
+
)
|
434
575
|
|
435
|
-
#
|
436
|
-
|
437
|
-
|
438
|
-
|
576
|
+
# log time of entire request
|
577
|
+
if request_start_timestamp_ns:
|
578
|
+
now = get_utc_timestamp_ns()
|
579
|
+
request_ns = now - request_start_timestamp_ns
|
580
|
+
request_span.add_event(name="letta_request_ms", attributes={"duration_ms": request_ns // 1_000_000})
|
581
|
+
request_span.end()
|
439
582
|
|
440
583
|
# TODO: Also yield out a letta usage stats SSE
|
441
584
|
yield f"data: {usage.model_dump_json()}\n\n"
|
442
585
|
yield f"data: {MessageStreamStatus.done.model_dump_json()}\n\n"
|
443
586
|
|
587
|
+
async def _build_and_request_from_llm(
|
588
|
+
self,
|
589
|
+
current_in_context_messages: List[Message],
|
590
|
+
new_in_context_messages: List[Message],
|
591
|
+
agent_state: AgentState,
|
592
|
+
llm_client: LLMClientBase,
|
593
|
+
tool_rules_solver: ToolRulesSolver,
|
594
|
+
) -> Tuple[Dict, Dict, List[Message], List[Message]]:
|
595
|
+
for attempt in range(self.max_summarization_retries + 1):
|
596
|
+
try:
|
597
|
+
log_event("agent.stream_no_tokens.messages.refreshed")
|
598
|
+
# Create LLM request data
|
599
|
+
request_data = await self._create_llm_request_data_async(
|
600
|
+
llm_client=llm_client,
|
601
|
+
in_context_messages=current_in_context_messages + new_in_context_messages,
|
602
|
+
agent_state=agent_state,
|
603
|
+
tool_rules_solver=tool_rules_solver,
|
604
|
+
)
|
605
|
+
log_event("agent.stream_no_tokens.llm_request.created")
|
606
|
+
|
607
|
+
# Attempt LLM request
|
608
|
+
return (
|
609
|
+
request_data,
|
610
|
+
await llm_client.request_async(request_data, agent_state.llm_config),
|
611
|
+
current_in_context_messages,
|
612
|
+
new_in_context_messages,
|
613
|
+
)
|
614
|
+
|
615
|
+
except Exception as e:
|
616
|
+
if attempt == self.max_summarization_retries:
|
617
|
+
raise e
|
618
|
+
|
619
|
+
# Handle the error and prepare for retry
|
620
|
+
current_in_context_messages = await self._handle_llm_error(
|
621
|
+
e,
|
622
|
+
llm_client=llm_client,
|
623
|
+
in_context_messages=current_in_context_messages,
|
624
|
+
new_letta_messages=new_in_context_messages,
|
625
|
+
llm_config=agent_state.llm_config,
|
626
|
+
force=True,
|
627
|
+
)
|
628
|
+
new_in_context_messages = []
|
629
|
+
log_event(f"agent.stream_no_tokens.retry_attempt.{attempt + 1}")
|
630
|
+
|
631
|
+
async def _build_and_request_from_llm_streaming(
|
632
|
+
self,
|
633
|
+
first_chunk: bool,
|
634
|
+
ttft_span: "Span",
|
635
|
+
request_start_timestamp_ns: int,
|
636
|
+
current_in_context_messages: List[Message],
|
637
|
+
new_in_context_messages: List[Message],
|
638
|
+
agent_state: AgentState,
|
639
|
+
llm_client: LLMClientBase,
|
640
|
+
tool_rules_solver: ToolRulesSolver,
|
641
|
+
) -> Tuple[Dict, AsyncStream[ChatCompletionChunk], List[Message], List[Message]]:
|
642
|
+
for attempt in range(self.max_summarization_retries + 1):
|
643
|
+
try:
|
644
|
+
log_event("agent.stream_no_tokens.messages.refreshed")
|
645
|
+
# Create LLM request data
|
646
|
+
request_data = await self._create_llm_request_data_async(
|
647
|
+
llm_client=llm_client,
|
648
|
+
in_context_messages=current_in_context_messages + new_in_context_messages,
|
649
|
+
agent_state=agent_state,
|
650
|
+
tool_rules_solver=tool_rules_solver,
|
651
|
+
)
|
652
|
+
log_event("agent.stream.llm_request.created") # [2^]
|
653
|
+
|
654
|
+
if first_chunk and ttft_span is not None:
|
655
|
+
provider_request_start_timestamp_ns = get_utc_timestamp_ns()
|
656
|
+
provider_req_start_ns = provider_request_start_timestamp_ns - request_start_timestamp_ns
|
657
|
+
ttft_span.add_event(
|
658
|
+
name="provider_req_start_ns", attributes={"provider_req_start_ms": provider_req_start_ns // 1_000_000}
|
659
|
+
)
|
660
|
+
|
661
|
+
# Attempt LLM request
|
662
|
+
return (
|
663
|
+
request_data,
|
664
|
+
await llm_client.stream_async(request_data, agent_state.llm_config),
|
665
|
+
current_in_context_messages,
|
666
|
+
new_in_context_messages,
|
667
|
+
)
|
668
|
+
|
669
|
+
except Exception as e:
|
670
|
+
if attempt == self.max_summarization_retries:
|
671
|
+
raise e
|
672
|
+
|
673
|
+
# Handle the error and prepare for retry
|
674
|
+
current_in_context_messages = await self._handle_llm_error(
|
675
|
+
e,
|
676
|
+
llm_client=llm_client,
|
677
|
+
in_context_messages=current_in_context_messages,
|
678
|
+
new_letta_messages=new_in_context_messages,
|
679
|
+
llm_config=agent_state.llm_config,
|
680
|
+
force=True,
|
681
|
+
)
|
682
|
+
new_in_context_messages = []
|
683
|
+
log_event(f"agent.stream_no_tokens.retry_attempt.{attempt + 1}")
|
684
|
+
|
685
|
+
@trace_method
|
686
|
+
async def _handle_llm_error(
|
687
|
+
self,
|
688
|
+
e: Exception,
|
689
|
+
llm_client: LLMClientBase,
|
690
|
+
in_context_messages: List[Message],
|
691
|
+
new_letta_messages: List[Message],
|
692
|
+
llm_config: LLMConfig,
|
693
|
+
force: bool,
|
694
|
+
) -> List[Message]:
|
695
|
+
if isinstance(e, LLMContextWindowExceededError):
|
696
|
+
return await self._rebuild_context_window(
|
697
|
+
in_context_messages=in_context_messages, new_letta_messages=new_letta_messages, llm_config=llm_config, force=force
|
698
|
+
)
|
699
|
+
else:
|
700
|
+
raise llm_client.handle_llm_error(e)
|
701
|
+
|
702
|
+
@trace_method
|
703
|
+
async def _rebuild_context_window(
|
704
|
+
self,
|
705
|
+
in_context_messages: List[Message],
|
706
|
+
new_letta_messages: List[Message],
|
707
|
+
llm_config: LLMConfig,
|
708
|
+
total_tokens: Optional[int] = None,
|
709
|
+
force: bool = False,
|
710
|
+
) -> List[Message]:
|
711
|
+
# If total tokens is reached, we truncate down
|
712
|
+
# TODO: This can be broken by bad configs, e.g. lower bound too high, initial messages too fat, etc.
|
713
|
+
if force or (total_tokens and total_tokens > llm_config.context_window):
|
714
|
+
self.logger.warning(
|
715
|
+
f"Total tokens {total_tokens} exceeds configured max tokens {llm_config.context_window}, forcefully clearing message history."
|
716
|
+
)
|
717
|
+
new_in_context_messages, updated = self.summarizer.summarize(
|
718
|
+
in_context_messages=in_context_messages, new_letta_messages=new_letta_messages, force=True, clear=True
|
719
|
+
)
|
720
|
+
else:
|
721
|
+
new_in_context_messages, updated = self.summarizer.summarize(
|
722
|
+
in_context_messages=in_context_messages, new_letta_messages=new_letta_messages
|
723
|
+
)
|
724
|
+
await self.agent_manager.set_in_context_messages_async(
|
725
|
+
agent_id=self.agent_id, message_ids=[m.id for m in new_in_context_messages], actor=self.actor
|
726
|
+
)
|
727
|
+
|
728
|
+
return new_in_context_messages
|
729
|
+
|
730
|
+
@trace_method
|
731
|
+
async def summarize_conversation_history(self) -> AgentState:
|
732
|
+
agent_state = await self.agent_manager.get_agent_by_id_async(agent_id=self.agent_id, actor=self.actor)
|
733
|
+
message_ids = agent_state.message_ids
|
734
|
+
in_context_messages = await self.message_manager.get_messages_by_ids_async(message_ids=message_ids, actor=self.actor)
|
735
|
+
new_in_context_messages, updated = self.summarizer.summarize(
|
736
|
+
in_context_messages=in_context_messages, new_letta_messages=[], force=True
|
737
|
+
)
|
738
|
+
return await self.agent_manager.set_in_context_messages_async(
|
739
|
+
agent_id=self.agent_id, message_ids=[m.id for m in new_in_context_messages], actor=self.actor
|
740
|
+
)
|
741
|
+
|
444
742
|
@trace_method
|
445
743
|
async def _create_llm_request_data_async(
|
446
744
|
self,
|
@@ -448,10 +746,18 @@ class LettaAgent(BaseAgent):
|
|
448
746
|
in_context_messages: List[Message],
|
449
747
|
agent_state: AgentState,
|
450
748
|
tool_rules_solver: ToolRulesSolver,
|
451
|
-
) ->
|
452
|
-
self.num_messages
|
453
|
-
|
454
|
-
|
749
|
+
) -> dict:
|
750
|
+
self.num_messages, self.num_archival_memories = await asyncio.gather(
|
751
|
+
(
|
752
|
+
self.message_manager.size_async(actor=self.actor, agent_id=agent_state.id)
|
753
|
+
if self.num_messages is None
|
754
|
+
else asyncio.sleep(0, result=self.num_messages)
|
755
|
+
),
|
756
|
+
(
|
757
|
+
self.passage_manager.size_async(actor=self.actor, agent_id=agent_state.id)
|
758
|
+
if self.num_archival_memories is None
|
759
|
+
else asyncio.sleep(0, result=self.num_archival_memories)
|
760
|
+
),
|
455
761
|
)
|
456
762
|
in_context_messages = await self._rebuild_memory_async(
|
457
763
|
in_context_messages, agent_state, num_messages=self.num_messages, num_archival_memories=self.num_archival_memories
|
@@ -469,13 +775,14 @@ class LettaAgent(BaseAgent):
|
|
469
775
|
ToolType.LETTA_SLEEPTIME_CORE,
|
470
776
|
ToolType.LETTA_VOICE_SLEEPTIME_CORE,
|
471
777
|
ToolType.LETTA_BUILTIN,
|
778
|
+
ToolType.EXTERNAL_COMPOSIO,
|
779
|
+
ToolType.EXTERNAL_MCP,
|
472
780
|
}
|
473
|
-
or (t.tool_type == ToolType.EXTERNAL_COMPOSIO)
|
474
781
|
]
|
475
782
|
|
476
783
|
# Mirror the sync agent loop: get allowed tools or allow all if none are allowed
|
477
784
|
if self.last_function_response is None:
|
478
|
-
self.last_function_response =
|
785
|
+
self.last_function_response = self._load_last_function_response(in_context_messages)
|
479
786
|
valid_tool_names = tool_rules_solver.get_allowed_tool_names(
|
480
787
|
available_tools=set([t.name for t in tools]),
|
481
788
|
last_function_response=self.last_function_response,
|
@@ -488,6 +795,9 @@ class LettaAgent(BaseAgent):
|
|
488
795
|
force_tool_call = valid_tool_names[0]
|
489
796
|
|
490
797
|
allowed_tools = [enable_strict_mode(t.json_schema) for t in tools if t.name in set(valid_tool_names)]
|
798
|
+
allowed_tools = runtime_override_tool_json_schema(
|
799
|
+
tool_list=allowed_tools, response_format=agent_state.response_format, request_heartbeat=True
|
800
|
+
)
|
491
801
|
|
492
802
|
return llm_client.build_request_data(in_context_messages, agent_state.llm_config, allowed_tools, force_tool_call)
|
493
803
|
|
@@ -502,6 +812,8 @@ class LettaAgent(BaseAgent):
|
|
502
812
|
pre_computed_assistant_message_id: Optional[str] = None,
|
503
813
|
pre_computed_tool_message_id: Optional[str] = None,
|
504
814
|
step_id: str | None = None,
|
815
|
+
new_in_context_messages: Optional[List[Message]] = None,
|
816
|
+
agent_step_span: Optional["Span"] = None,
|
505
817
|
) -> Tuple[List[Message], bool]:
|
506
818
|
"""
|
507
819
|
Now that streaming is done, handle the final AI response.
|
@@ -533,12 +845,43 @@ class LettaAgent(BaseAgent):
|
|
533
845
|
|
534
846
|
tool_call_id = tool_call.id or f"call_{uuid.uuid4().hex[:8]}"
|
535
847
|
|
536
|
-
|
848
|
+
log_telemetry(
|
849
|
+
self.logger,
|
850
|
+
"_handle_ai_response execute tool start",
|
851
|
+
tool_name=tool_call_name,
|
852
|
+
tool_args=tool_args,
|
853
|
+
tool_call_id=tool_call_id,
|
854
|
+
request_heartbeat=request_heartbeat,
|
855
|
+
)
|
856
|
+
|
857
|
+
tool_execution_result = await self._execute_tool(
|
537
858
|
tool_name=tool_call_name,
|
538
859
|
tool_args=tool_args,
|
539
860
|
agent_state=agent_state,
|
861
|
+
agent_step_span=agent_step_span,
|
862
|
+
)
|
863
|
+
log_telemetry(
|
864
|
+
self.logger, "_handle_ai_response execute tool finish", tool_execution_result=tool_execution_result, tool_call_id=tool_call_id
|
865
|
+
)
|
866
|
+
|
867
|
+
if tool_call_name in ["conversation_search", "conversation_search_date", "archival_memory_search"]:
|
868
|
+
# with certain functions we rely on the paging mechanism to handle overflow
|
869
|
+
truncate = False
|
870
|
+
else:
|
871
|
+
# but by default, we add a truncation safeguard to prevent bad functions from
|
872
|
+
# overflow the agent context window
|
873
|
+
truncate = True
|
874
|
+
|
875
|
+
# get the function response limit
|
876
|
+
target_tool = next((x for x in agent_state.tools if x.name == tool_call_name), None)
|
877
|
+
return_char_limit = target_tool.return_char_limit
|
878
|
+
function_response_string = validate_function_response(
|
879
|
+
tool_execution_result.func_return, return_char_limit=return_char_limit, truncate=truncate
|
880
|
+
)
|
881
|
+
function_response = package_function_response(
|
882
|
+
was_success=tool_execution_result.success_flag,
|
883
|
+
response_string=function_response_string,
|
540
884
|
)
|
541
|
-
function_response = package_function_response(tool_result, success_flag)
|
542
885
|
|
543
886
|
# 4. Register tool call with tool rule solver
|
544
887
|
# Resolve whether or not to continue stepping
|
@@ -575,9 +918,10 @@ class LettaAgent(BaseAgent):
|
|
575
918
|
model=agent_state.llm_config.model,
|
576
919
|
function_name=tool_call_name,
|
577
920
|
function_arguments=tool_args,
|
921
|
+
tool_execution_result=tool_execution_result,
|
578
922
|
tool_call_id=tool_call_id,
|
579
|
-
function_call_success=success_flag,
|
580
|
-
function_response=
|
923
|
+
function_call_success=tool_execution_result.success_flag,
|
924
|
+
function_response=function_response_string,
|
581
925
|
actor=self.actor,
|
582
926
|
add_heartbeat_request_system_message=continue_stepping,
|
583
927
|
reasoning_content=reasoning_content,
|
@@ -585,97 +929,69 @@ class LettaAgent(BaseAgent):
|
|
585
929
|
pre_computed_tool_message_id=pre_computed_tool_message_id,
|
586
930
|
step_id=logged_step.id if logged_step else None, # TODO (cliandy): eventually move over other agent loops
|
587
931
|
)
|
932
|
+
|
588
933
|
persisted_messages = await self.message_manager.create_many_messages_async(tool_call_messages, actor=self.actor)
|
589
934
|
self.last_function_response = function_response
|
590
935
|
|
591
936
|
return persisted_messages, continue_stepping
|
592
937
|
|
593
938
|
@trace_method
|
594
|
-
async def _execute_tool(
|
939
|
+
async def _execute_tool(
|
940
|
+
self, tool_name: str, tool_args: dict, agent_state: AgentState, agent_step_span: Optional["Span"] = None
|
941
|
+
) -> "ToolExecutionResult":
|
595
942
|
"""
|
596
943
|
Executes a tool and returns (result, success_flag).
|
597
944
|
"""
|
945
|
+
from letta.schemas.tool_execution_result import ToolExecutionResult
|
946
|
+
|
598
947
|
# Special memory case
|
599
948
|
target_tool = next((x for x in agent_state.tools if x.name == tool_name), None)
|
600
949
|
if not target_tool:
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
tool_execution_manager = ToolExecutionManager(
|
606
|
-
agent_state=agent_state,
|
607
|
-
message_manager=self.message_manager,
|
608
|
-
agent_manager=self.agent_manager,
|
609
|
-
block_manager=self.block_manager,
|
610
|
-
passage_manager=self.passage_manager,
|
611
|
-
actor=self.actor,
|
612
|
-
)
|
613
|
-
# TODO: Integrate sandbox result
|
614
|
-
log_event(name=f"start_{tool_name}_execution", attributes=tool_args)
|
615
|
-
tool_execution_result = await tool_execution_manager.execute_tool_async(
|
616
|
-
function_name=tool_name, function_args=tool_args, tool=target_tool
|
950
|
+
# TODO: fix this error message
|
951
|
+
return ToolExecutionResult(
|
952
|
+
func_return=f"Tool {tool_name} not found",
|
953
|
+
status="error",
|
617
954
|
)
|
618
|
-
log_event(name=f"finish_{tool_name}_execution", attributes=tool_args)
|
619
|
-
return tool_execution_result.func_return, True
|
620
|
-
except Exception as e:
|
621
|
-
return f"Failed to call tool. Error: {e}", False
|
622
|
-
|
623
|
-
@trace_method
|
624
|
-
async def _send_message_to_agents_matching_tags(
|
625
|
-
self, message: str, match_all: List[str], match_some: List[str]
|
626
|
-
) -> List[Dict[str, Any]]:
|
627
|
-
# Find matching agents
|
628
|
-
matching_agents = self.agent_manager.list_agents_matching_tags(actor=self.actor, match_all=match_all, match_some=match_some)
|
629
|
-
if not matching_agents:
|
630
|
-
return []
|
631
|
-
|
632
|
-
async def process_agent(agent_state: AgentState, message: str) -> Dict[str, Any]:
|
633
|
-
try:
|
634
|
-
letta_agent = LettaAgent(
|
635
|
-
agent_id=agent_state.id,
|
636
|
-
message_manager=self.message_manager,
|
637
|
-
agent_manager=self.agent_manager,
|
638
|
-
block_manager=self.block_manager,
|
639
|
-
passage_manager=self.passage_manager,
|
640
|
-
actor=self.actor,
|
641
|
-
)
|
642
955
|
|
643
|
-
|
644
|
-
"[Incoming message from external Letta agent - to reply to this message, "
|
645
|
-
"make sure to use the 'send_message' at the end, and the system will notify "
|
646
|
-
"the sender of your response] "
|
647
|
-
f"{message}"
|
648
|
-
)
|
649
|
-
|
650
|
-
letta_response = await letta_agent.step(
|
651
|
-
[MessageCreate(role=MessageRole.system, content=[TextContent(text=augmented_message)])]
|
652
|
-
)
|
653
|
-
messages = letta_response.messages
|
654
|
-
|
655
|
-
send_message_content = [message.content for message in messages if isinstance(message, AssistantMessage)]
|
956
|
+
# TODO: This temp. Move this logic and code to executors
|
656
957
|
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
"response": send_message_content if send_message_content else ["<no response>"],
|
661
|
-
}
|
958
|
+
if agent_step_span:
|
959
|
+
start_time = get_utc_timestamp_ns()
|
960
|
+
agent_step_span.add_event(name="tool_execution_started")
|
662
961
|
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
962
|
+
sandbox_env_vars = {var.key: var.value for var in agent_state.tool_exec_environment_variables}
|
963
|
+
tool_execution_manager = ToolExecutionManager(
|
964
|
+
agent_state=agent_state,
|
965
|
+
message_manager=self.message_manager,
|
966
|
+
agent_manager=self.agent_manager,
|
967
|
+
block_manager=self.block_manager,
|
968
|
+
passage_manager=self.passage_manager,
|
969
|
+
sandbox_env_vars=sandbox_env_vars,
|
970
|
+
actor=self.actor,
|
971
|
+
)
|
972
|
+
# TODO: Integrate sandbox result
|
973
|
+
log_event(name=f"start_{tool_name}_execution", attributes=tool_args)
|
974
|
+
tool_execution_result = await tool_execution_manager.execute_tool_async(
|
975
|
+
function_name=tool_name, function_args=tool_args, tool=target_tool
|
976
|
+
)
|
977
|
+
if agent_step_span:
|
978
|
+
end_time = get_utc_timestamp_ns()
|
979
|
+
agent_step_span.add_event(
|
980
|
+
name="tool_execution_completed",
|
981
|
+
attributes={
|
982
|
+
"tool_name": target_tool.name,
|
983
|
+
"duration_ms": (end_time - start_time) // 1_000_000,
|
984
|
+
"success": tool_execution_result.success_flag,
|
985
|
+
"tool_type": target_tool.tool_type,
|
986
|
+
"tool_id": target_tool.id,
|
987
|
+
},
|
988
|
+
)
|
989
|
+
log_event(name=f"finish_{tool_name}_execution", attributes=tool_execution_result.model_dump())
|
990
|
+
return tool_execution_result
|
674
991
|
|
675
992
|
@trace_method
|
676
|
-
|
993
|
+
def _load_last_function_response(self, in_context_messages: List[Message]):
|
677
994
|
"""Load the last function response from message history"""
|
678
|
-
in_context_messages = await self.agent_manager.get_in_context_messages_async(agent_id=self.agent_id, actor=self.actor)
|
679
995
|
for msg in reversed(in_context_messages):
|
680
996
|
if msg.role == MessageRole.tool and msg.content and len(msg.content) == 1 and isinstance(msg.content[0], TextContent):
|
681
997
|
text_content = msg.content[0].text
|