letta-nightly 0.7.30.dev20250603104343__py3-none-any.whl → 0.8.0.dev20250604104349__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. letta/__init__.py +7 -1
  2. letta/agent.py +14 -7
  3. letta/agents/base_agent.py +1 -0
  4. letta/agents/ephemeral_summary_agent.py +104 -0
  5. letta/agents/helpers.py +35 -3
  6. letta/agents/letta_agent.py +492 -176
  7. letta/agents/letta_agent_batch.py +22 -16
  8. letta/agents/prompts/summary_system_prompt.txt +62 -0
  9. letta/agents/voice_agent.py +22 -7
  10. letta/agents/voice_sleeptime_agent.py +13 -8
  11. letta/constants.py +33 -1
  12. letta/data_sources/connectors.py +52 -36
  13. letta/errors.py +4 -0
  14. letta/functions/ast_parsers.py +13 -30
  15. letta/functions/function_sets/base.py +3 -1
  16. letta/functions/functions.py +2 -0
  17. letta/functions/mcp_client/base_client.py +151 -97
  18. letta/functions/mcp_client/sse_client.py +49 -31
  19. letta/functions/mcp_client/stdio_client.py +107 -106
  20. letta/functions/schema_generator.py +22 -22
  21. letta/groups/helpers.py +3 -4
  22. letta/groups/sleeptime_multi_agent.py +4 -4
  23. letta/groups/sleeptime_multi_agent_v2.py +22 -0
  24. letta/helpers/composio_helpers.py +16 -0
  25. letta/helpers/converters.py +20 -0
  26. letta/helpers/datetime_helpers.py +1 -6
  27. letta/helpers/tool_rule_solver.py +2 -1
  28. letta/interfaces/anthropic_streaming_interface.py +17 -2
  29. letta/interfaces/openai_chat_completions_streaming_interface.py +1 -0
  30. letta/interfaces/openai_streaming_interface.py +18 -2
  31. letta/llm_api/anthropic_client.py +24 -3
  32. letta/llm_api/google_ai_client.py +0 -15
  33. letta/llm_api/google_vertex_client.py +6 -5
  34. letta/llm_api/llm_client_base.py +15 -0
  35. letta/llm_api/openai.py +2 -2
  36. letta/llm_api/openai_client.py +60 -8
  37. letta/orm/__init__.py +2 -0
  38. letta/orm/agent.py +45 -43
  39. letta/orm/base.py +0 -2
  40. letta/orm/block.py +1 -0
  41. letta/orm/custom_columns.py +13 -0
  42. letta/orm/enums.py +5 -0
  43. letta/orm/file.py +3 -1
  44. letta/orm/files_agents.py +68 -0
  45. letta/orm/mcp_server.py +48 -0
  46. letta/orm/message.py +1 -0
  47. letta/orm/organization.py +11 -2
  48. letta/orm/passage.py +25 -10
  49. letta/orm/sandbox_config.py +5 -2
  50. letta/orm/sqlalchemy_base.py +171 -110
  51. letta/prompts/system/memgpt_base.txt +6 -1
  52. letta/prompts/system/memgpt_v2_chat.txt +57 -0
  53. letta/prompts/system/sleeptime.txt +2 -0
  54. letta/prompts/system/sleeptime_v2.txt +28 -0
  55. letta/schemas/agent.py +87 -20
  56. letta/schemas/block.py +7 -1
  57. letta/schemas/file.py +57 -0
  58. letta/schemas/mcp.py +74 -0
  59. letta/schemas/memory.py +5 -2
  60. letta/schemas/message.py +9 -0
  61. letta/schemas/openai/openai.py +0 -6
  62. letta/schemas/providers.py +33 -4
  63. letta/schemas/tool.py +26 -21
  64. letta/schemas/tool_execution_result.py +5 -0
  65. letta/server/db.py +23 -8
  66. letta/server/rest_api/app.py +73 -56
  67. letta/server/rest_api/interface.py +4 -4
  68. letta/server/rest_api/routers/v1/agents.py +132 -47
  69. letta/server/rest_api/routers/v1/blocks.py +3 -2
  70. letta/server/rest_api/routers/v1/embeddings.py +3 -3
  71. letta/server/rest_api/routers/v1/groups.py +3 -3
  72. letta/server/rest_api/routers/v1/jobs.py +14 -17
  73. letta/server/rest_api/routers/v1/organizations.py +10 -10
  74. letta/server/rest_api/routers/v1/providers.py +12 -10
  75. letta/server/rest_api/routers/v1/runs.py +3 -3
  76. letta/server/rest_api/routers/v1/sandbox_configs.py +12 -12
  77. letta/server/rest_api/routers/v1/sources.py +108 -43
  78. letta/server/rest_api/routers/v1/steps.py +8 -6
  79. letta/server/rest_api/routers/v1/tools.py +134 -95
  80. letta/server/rest_api/utils.py +12 -1
  81. letta/server/server.py +272 -73
  82. letta/services/agent_manager.py +246 -313
  83. letta/services/block_manager.py +30 -9
  84. letta/services/context_window_calculator/__init__.py +0 -0
  85. letta/services/context_window_calculator/context_window_calculator.py +150 -0
  86. letta/services/context_window_calculator/token_counter.py +82 -0
  87. letta/services/file_processor/__init__.py +0 -0
  88. letta/services/file_processor/chunker/__init__.py +0 -0
  89. letta/services/file_processor/chunker/llama_index_chunker.py +29 -0
  90. letta/services/file_processor/embedder/__init__.py +0 -0
  91. letta/services/file_processor/embedder/openai_embedder.py +84 -0
  92. letta/services/file_processor/file_processor.py +123 -0
  93. letta/services/file_processor/parser/__init__.py +0 -0
  94. letta/services/file_processor/parser/base_parser.py +9 -0
  95. letta/services/file_processor/parser/mistral_parser.py +54 -0
  96. letta/services/file_processor/types.py +0 -0
  97. letta/services/files_agents_manager.py +184 -0
  98. letta/services/group_manager.py +118 -0
  99. letta/services/helpers/agent_manager_helper.py +76 -21
  100. letta/services/helpers/tool_execution_helper.py +3 -0
  101. letta/services/helpers/tool_parser_helper.py +100 -0
  102. letta/services/identity_manager.py +44 -42
  103. letta/services/job_manager.py +21 -10
  104. letta/services/mcp/base_client.py +5 -2
  105. letta/services/mcp/sse_client.py +3 -5
  106. letta/services/mcp/stdio_client.py +3 -5
  107. letta/services/mcp_manager.py +281 -0
  108. letta/services/message_manager.py +40 -26
  109. letta/services/organization_manager.py +55 -19
  110. letta/services/passage_manager.py +211 -13
  111. letta/services/provider_manager.py +48 -2
  112. letta/services/sandbox_config_manager.py +105 -0
  113. letta/services/source_manager.py +4 -5
  114. letta/services/step_manager.py +9 -6
  115. letta/services/summarizer/summarizer.py +50 -23
  116. letta/services/telemetry_manager.py +7 -0
  117. letta/services/tool_executor/tool_execution_manager.py +11 -52
  118. letta/services/tool_executor/tool_execution_sandbox.py +4 -34
  119. letta/services/tool_executor/tool_executor.py +107 -105
  120. letta/services/tool_manager.py +56 -17
  121. letta/services/tool_sandbox/base.py +39 -92
  122. letta/services/tool_sandbox/e2b_sandbox.py +16 -11
  123. letta/services/tool_sandbox/local_sandbox.py +51 -23
  124. letta/services/user_manager.py +36 -3
  125. letta/settings.py +10 -3
  126. letta/templates/__init__.py +0 -0
  127. letta/templates/sandbox_code_file.py.j2 +47 -0
  128. letta/templates/template_helper.py +16 -0
  129. letta/tracing.py +30 -1
  130. letta/types/__init__.py +7 -0
  131. letta/utils.py +25 -1
  132. {letta_nightly-0.7.30.dev20250603104343.dist-info → letta_nightly-0.8.0.dev20250604104349.dist-info}/METADATA +7 -2
  133. {letta_nightly-0.7.30.dev20250603104343.dist-info → letta_nightly-0.8.0.dev20250604104349.dist-info}/RECORD +136 -110
  134. {letta_nightly-0.7.30.dev20250603104343.dist-info → letta_nightly-0.8.0.dev20250604104349.dist-info}/LICENSE +0 -0
  135. {letta_nightly-0.7.30.dev20250603104343.dist-info → letta_nightly-0.8.0.dev20250604104349.dist-info}/WHEEL +0 -0
  136. {letta_nightly-0.7.30.dev20250603104343.dist-info → letta_nightly-0.8.0.dev20250604104349.dist-info}/entry_points.txt +0 -0
@@ -1,14 +1,20 @@
1
1
  import asyncio
2
2
  import json
3
3
  import uuid
4
- from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple, Union
4
+ from typing import AsyncGenerator, Dict, List, Optional, Tuple, Union
5
5
 
6
6
  from openai import AsyncStream
7
- from openai.types import CompletionUsage
8
- from openai.types.chat import ChatCompletion, ChatCompletionChunk
7
+ from openai.types.chat import ChatCompletionChunk
9
8
 
10
9
  from letta.agents.base_agent import BaseAgent
11
- from letta.agents.helpers import _create_letta_response, _prepare_in_context_messages_async, generate_step_id
10
+ from letta.agents.ephemeral_summary_agent import EphemeralSummaryAgent
11
+ from letta.agents.helpers import (
12
+ _create_letta_response,
13
+ _prepare_in_context_messages_async,
14
+ _prepare_in_context_messages_no_persist_async,
15
+ generate_step_id,
16
+ )
17
+ from letta.errors import LLMContextWindowExceededError
12
18
  from letta.helpers import ToolRulesSolver
13
19
  from letta.helpers.datetime_helpers import get_utc_timestamp_ns
14
20
  from letta.helpers.tool_execution_helper import enable_strict_mode
@@ -21,9 +27,9 @@ from letta.log import get_logger
21
27
  from letta.orm.enums import ToolType
22
28
  from letta.schemas.agent import AgentState
23
29
  from letta.schemas.enums import MessageRole, MessageStreamStatus
24
- from letta.schemas.letta_message import AssistantMessage
25
30
  from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
26
31
  from letta.schemas.letta_response import LettaResponse
32
+ from letta.schemas.llm_config import LLMConfig
27
33
  from letta.schemas.message import Message, MessageCreate
28
34
  from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
29
35
  from letta.schemas.provider_trace import ProviderTraceCreate
@@ -32,13 +38,18 @@ from letta.schemas.user import User
32
38
  from letta.server.rest_api.utils import create_letta_messages_from_llm_response
33
39
  from letta.services.agent_manager import AgentManager
34
40
  from letta.services.block_manager import BlockManager
41
+ from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema
35
42
  from letta.services.message_manager import MessageManager
36
43
  from letta.services.passage_manager import PassageManager
37
44
  from letta.services.step_manager import NoopStepManager, StepManager
45
+ from letta.services.summarizer.enums import SummarizationMode
46
+ from letta.services.summarizer.summarizer import Summarizer
38
47
  from letta.services.telemetry_manager import NoopTelemetryManager, TelemetryManager
39
48
  from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
49
+ from letta.settings import model_settings
40
50
  from letta.system import package_function_response
41
51
  from letta.tracing import log_event, trace_method, tracer
52
+ from letta.utils import log_telemetry, validate_function_response
42
53
 
43
54
  logger = get_logger(__name__)
44
55
 
@@ -55,6 +66,11 @@ class LettaAgent(BaseAgent):
55
66
  actor: User,
56
67
  step_manager: StepManager = NoopStepManager(),
57
68
  telemetry_manager: TelemetryManager = NoopTelemetryManager(),
69
+ summary_block_label: str = "conversation_summary",
70
+ message_buffer_limit: int = 60, # TODO: Make this configurable
71
+ message_buffer_min: int = 15, # TODO: Make this configurable
72
+ enable_summarization: bool = True, # TODO: Make this configurable
73
+ max_summarization_retries: int = 3, # TODO: Make this configurable
58
74
  ):
59
75
  super().__init__(agent_id=agent_id, openai_client=None, message_manager=message_manager, agent_manager=agent_manager, actor=actor)
60
76
 
@@ -69,23 +85,63 @@ class LettaAgent(BaseAgent):
69
85
  self.last_function_response = None
70
86
 
71
87
  # Cached archival memory/message size
72
- self.num_messages = 0
73
- self.num_archival_memories = 0
88
+ self.num_messages = None
89
+ self.num_archival_memories = None
90
+
91
+ self.summarization_agent = None
92
+ self.summary_block_label = summary_block_label
93
+ self.max_summarization_retries = max_summarization_retries
94
+
95
+ # TODO: Expand to more
96
+ if enable_summarization and model_settings.openai_api_key:
97
+ self.summarization_agent = EphemeralSummaryAgent(
98
+ target_block_label=self.summary_block_label,
99
+ agent_id=agent_id,
100
+ block_manager=self.block_manager,
101
+ message_manager=self.message_manager,
102
+ agent_manager=self.agent_manager,
103
+ actor=self.actor,
104
+ )
105
+
106
+ self.summarizer = Summarizer(
107
+ mode=SummarizationMode.STATIC_MESSAGE_BUFFER,
108
+ summarizer_agent=self.summarization_agent,
109
+ # TODO: Make this configurable
110
+ message_buffer_limit=message_buffer_limit,
111
+ message_buffer_min=message_buffer_min,
112
+ )
74
113
 
75
114
  @trace_method
76
- async def step(self, input_messages: List[MessageCreate], max_steps: int = 10, use_assistant_message: bool = True) -> LettaResponse:
115
+ async def step(
116
+ self,
117
+ input_messages: List[MessageCreate],
118
+ max_steps: int = 10,
119
+ use_assistant_message: bool = True,
120
+ request_start_timestamp_ns: Optional[int] = None,
121
+ ) -> LettaResponse:
77
122
  agent_state = await self.agent_manager.get_agent_by_id_async(
78
- agent_id=self.agent_id, include_relationships=["tools", "memory"], actor=self.actor
123
+ agent_id=self.agent_id, include_relationships=["tools", "memory", "tool_exec_environment_variables"], actor=self.actor
124
+ )
125
+ _, new_in_context_messages, usage = await self._step(
126
+ agent_state=agent_state,
127
+ input_messages=input_messages,
128
+ max_steps=max_steps,
129
+ request_start_timestamp_ns=request_start_timestamp_ns,
79
130
  )
80
- _, new_in_context_messages, usage = await self._step(agent_state=agent_state, input_messages=input_messages, max_steps=max_steps)
81
131
  return _create_letta_response(
82
132
  new_in_context_messages=new_in_context_messages, use_assistant_message=use_assistant_message, usage=usage
83
133
  )
84
134
 
85
135
  @trace_method
86
- async def step_stream_no_tokens(self, input_messages: List[MessageCreate], max_steps: int = 10, use_assistant_message: bool = True):
136
+ async def step_stream_no_tokens(
137
+ self,
138
+ input_messages: List[MessageCreate],
139
+ max_steps: int = 10,
140
+ use_assistant_message: bool = True,
141
+ request_start_timestamp_ns: Optional[int] = None,
142
+ ):
87
143
  agent_state = await self.agent_manager.get_agent_by_id_async(
88
- agent_id=self.agent_id, include_relationships=["tools", "memory"], actor=self.actor
144
+ agent_id=self.agent_id, include_relationships=["tools", "memory", "tool_exec_environment_variables"], actor=self.actor
89
145
  )
90
146
  current_in_context_messages, new_in_context_messages = await _prepare_in_context_messages_async(
91
147
  input_messages, agent_state, self.message_manager, self.actor
@@ -97,32 +153,33 @@ class LettaAgent(BaseAgent):
97
153
  actor=self.actor,
98
154
  )
99
155
  usage = LettaUsageStatistics()
156
+
157
+ # span for request
158
+ request_span = tracer.start_span("time_to_first_token", start_time=request_start_timestamp_ns)
159
+ request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
160
+
100
161
  for _ in range(max_steps):
101
162
  step_id = generate_step_id()
163
+ step_start = get_utc_timestamp_ns()
164
+ agent_step_span = tracer.start_span("agent_step", start_time=step_start)
165
+ agent_step_span.set_attributes({"step_id": step_id})
102
166
 
103
- in_context_messages = await self._rebuild_memory_async(
104
- current_in_context_messages + new_in_context_messages,
167
+ request_data, response_data, current_in_context_messages, new_in_context_messages = await self._build_and_request_from_llm(
168
+ current_in_context_messages,
169
+ new_in_context_messages,
105
170
  agent_state,
106
- num_messages=self.num_messages,
107
- num_archival_memories=self.num_archival_memories,
108
- )
109
- log_event("agent.stream_no_tokens.messages.refreshed") # [1^]
110
-
111
- request_data = await self._create_llm_request_data_async(
112
- llm_client=llm_client,
113
- in_context_messages=in_context_messages,
114
- agent_state=agent_state,
115
- tool_rules_solver=tool_rules_solver,
116
- # TODO: pass in reasoning content
171
+ llm_client,
172
+ tool_rules_solver,
117
173
  )
118
- log_event("agent.stream_no_tokens.llm_request.created") # [2^]
174
+ in_context_messages = current_in_context_messages + new_in_context_messages
119
175
 
120
- try:
121
- response_data = await llm_client.request_async(request_data, agent_state.llm_config)
122
- except Exception as e:
123
- raise llm_client.handle_llm_error(e)
124
176
  log_event("agent.stream_no_tokens.llm_response.received") # [3^]
125
177
 
178
+ # log llm request time
179
+ now = get_utc_timestamp_ns()
180
+ llm_request_ns = now - step_start
181
+ agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ns // 1_000_000})
182
+
126
183
  response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
127
184
 
128
185
  # update usage
@@ -144,16 +201,35 @@ class LettaAgent(BaseAgent):
144
201
  signature=response.choices[0].message.reasoning_content_signature,
145
202
  )
146
203
  ]
147
- else:
204
+ elif response.choices[0].message.content:
148
205
  reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
206
+ else:
207
+ logger.info("No reasoning content found.")
208
+ reasoning = None
209
+
210
+ # log LLM request time
211
+ now = get_utc_timestamp_ns()
212
+ llm_request_ns = now - step_start
213
+ agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ns // 1_000_000})
149
214
 
150
215
  persisted_messages, should_continue = await self._handle_ai_response(
151
- tool_call, agent_state, tool_rules_solver, response.usage, reasoning_content=reasoning
216
+ tool_call,
217
+ agent_state,
218
+ tool_rules_solver,
219
+ response.usage,
220
+ reasoning_content=reasoning,
221
+ agent_step_span=agent_step_span,
152
222
  )
153
223
  self.response_messages.extend(persisted_messages)
154
224
  new_in_context_messages.extend(persisted_messages)
155
225
  log_event("agent.stream_no_tokens.llm_response.processed") # [4^]
156
226
 
227
+ # log step time
228
+ now = get_utc_timestamp_ns()
229
+ step_ns = now - step_start
230
+ agent_step_span.add_event(name="step_ms", attributes={"duration_ms": step_ns // 1_000_000})
231
+ agent_step_span.end()
232
+
157
233
  # Log LLM Trace
158
234
  await self.telemetry_manager.create_provider_trace_async(
159
235
  actor=self.actor,
@@ -179,15 +255,32 @@ class LettaAgent(BaseAgent):
179
255
 
180
256
  # Extend the in context message ids
181
257
  if not agent_state.message_buffer_autoclear:
182
- message_ids = [m.id for m in (current_in_context_messages + new_in_context_messages)]
183
- await self.agent_manager.set_in_context_messages_async(agent_id=self.agent_id, message_ids=message_ids, actor=self.actor)
258
+ await self._rebuild_context_window(
259
+ in_context_messages=current_in_context_messages,
260
+ new_letta_messages=new_in_context_messages,
261
+ llm_config=agent_state.llm_config,
262
+ total_tokens=usage.total_tokens,
263
+ force=False,
264
+ )
265
+
266
+ # log request time
267
+ if request_start_timestamp_ns:
268
+ now = get_utc_timestamp_ns()
269
+ request_ns = now - request_start_timestamp_ns
270
+ request_span.add_event(name="letta_request_ms", attributes={"duration_ms": request_ns // 1_000_000})
271
+ request_span.end()
184
272
 
185
273
  # Return back usage
186
274
  yield f"data: {usage.model_dump_json()}\n\n"
275
+ yield f"data: {MessageStreamStatus.done.model_dump_json()}\n\n"
187
276
 
188
277
  async def _step(
189
- self, agent_state: AgentState, input_messages: List[MessageCreate], max_steps: int = 10
190
- ) -> Tuple[List[Message], List[Message], CompletionUsage]:
278
+ self,
279
+ agent_state: AgentState,
280
+ input_messages: List[MessageCreate],
281
+ max_steps: int = 10,
282
+ request_start_timestamp_ns: Optional[int] = None,
283
+ ) -> Tuple[List[Message], List[Message], LettaUsageStatistics]:
191
284
  """
192
285
  Carries out an invocation of the agent loop. In each step, the agent
193
286
  1. Rebuilds its memory
@@ -204,35 +297,32 @@ class LettaAgent(BaseAgent):
204
297
  put_inner_thoughts_first=True,
205
298
  actor=self.actor,
206
299
  )
300
+
301
+ # span for request
302
+ request_span = tracer.start_span("time_to_first_token")
303
+ request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
304
+
207
305
  usage = LettaUsageStatistics()
208
306
  for _ in range(max_steps):
209
307
  step_id = generate_step_id()
308
+ step_start = get_utc_timestamp_ns()
309
+ agent_step_span = tracer.start_span("agent_step", start_time=step_start)
310
+ agent_step_span.set_attributes({"step_id": step_id})
210
311
 
211
- in_context_messages = await self._rebuild_memory_async(
212
- current_in_context_messages + new_in_context_messages,
213
- agent_state,
214
- num_messages=self.num_messages,
215
- num_archival_memories=self.num_archival_memories,
216
- )
217
- log_event("agent.step.messages.refreshed") # [1^]
218
-
219
- request_data = await self._create_llm_request_data_async(
220
- llm_client=llm_client,
221
- in_context_messages=in_context_messages,
222
- agent_state=agent_state,
223
- tool_rules_solver=tool_rules_solver,
224
- # TODO: pass in reasoning content
312
+ request_data, response_data, current_in_context_messages, new_in_context_messages = await self._build_and_request_from_llm(
313
+ current_in_context_messages, new_in_context_messages, agent_state, llm_client, tool_rules_solver
225
314
  )
226
- log_event("agent.step.llm_request.created") # [2^]
315
+ in_context_messages = current_in_context_messages + new_in_context_messages
227
316
 
228
- try:
229
- response_data = await llm_client.request_async(request_data, agent_state.llm_config)
230
- except Exception as e:
231
- raise llm_client.handle_llm_error(e)
232
317
  log_event("agent.step.llm_response.received") # [3^]
233
318
 
234
319
  response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
235
320
 
321
+ # log LLM request time
322
+ now = get_utc_timestamp_ns()
323
+ llm_request_ns = now - step_start
324
+ agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ns // 1_000_000})
325
+
236
326
  # TODO: add run_id
237
327
  usage.step_count += 1
238
328
  usage.completion_tokens += response.usage.completion_tokens
@@ -251,16 +341,31 @@ class LettaAgent(BaseAgent):
251
341
  signature=response.choices[0].message.reasoning_content_signature,
252
342
  )
253
343
  ]
254
- else:
344
+ elif response.choices[0].message.content:
255
345
  reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
346
+ else:
347
+ logger.info("No reasoning content found.")
348
+ reasoning = None
256
349
 
257
350
  persisted_messages, should_continue = await self._handle_ai_response(
258
- tool_call, agent_state, tool_rules_solver, response.usage, reasoning_content=reasoning, step_id=step_id
351
+ tool_call,
352
+ agent_state,
353
+ tool_rules_solver,
354
+ response.usage,
355
+ reasoning_content=reasoning,
356
+ step_id=step_id,
357
+ agent_step_span=agent_step_span,
259
358
  )
260
359
  self.response_messages.extend(persisted_messages)
261
360
  new_in_context_messages.extend(persisted_messages)
262
361
  log_event("agent.step.llm_response.processed") # [4^]
263
362
 
363
+ # log step time
364
+ now = get_utc_timestamp_ns()
365
+ step_ns = now - step_start
366
+ agent_step_span.add_event(name="step_ms", attributes={"duration_ms": step_ns // 1_000_000})
367
+ agent_step_span.end()
368
+
264
369
  # Log LLM Trace
265
370
  await self.telemetry_manager.create_provider_trace_async(
266
371
  actor=self.actor,
@@ -275,10 +380,22 @@ class LettaAgent(BaseAgent):
275
380
  if not should_continue:
276
381
  break
277
382
 
383
+ # log request time
384
+ if request_start_timestamp_ns:
385
+ now = get_utc_timestamp_ns()
386
+ request_ns = now - request_start_timestamp_ns
387
+ request_span.add_event(name="request_ms", attributes={"duration_ms": request_ns // 1_000_000})
388
+ request_span.end()
389
+
278
390
  # Extend the in context message ids
279
391
  if not agent_state.message_buffer_autoclear:
280
- message_ids = [m.id for m in (current_in_context_messages + new_in_context_messages)]
281
- await self.agent_manager.set_in_context_messages_async(agent_id=self.agent_id, message_ids=message_ids, actor=self.actor)
392
+ await self._rebuild_context_window(
393
+ in_context_messages=current_in_context_messages,
394
+ new_letta_messages=new_in_context_messages,
395
+ llm_config=agent_state.llm_config,
396
+ total_tokens=usage.total_tokens,
397
+ force=False,
398
+ )
282
399
 
283
400
  return current_in_context_messages, new_in_context_messages, usage
284
401
 
@@ -299,11 +416,17 @@ class LettaAgent(BaseAgent):
299
416
  4. Processes the response
300
417
  """
301
418
  agent_state = await self.agent_manager.get_agent_by_id_async(
302
- agent_id=self.agent_id, include_relationships=["tools", "memory"], actor=self.actor
419
+ agent_id=self.agent_id, include_relationships=["tools", "memory", "tool_exec_environment_variables"], actor=self.actor
303
420
  )
304
- current_in_context_messages, new_in_context_messages = await _prepare_in_context_messages_async(
421
+ current_in_context_messages, new_in_context_messages = await _prepare_in_context_messages_no_persist_async(
305
422
  input_messages, agent_state, self.message_manager, self.actor
306
423
  )
424
+
425
+ # Special strategy to lower TTFT
426
+ # Delay persistence of the initial input message as much as possible
427
+ persisted_input_messages = False
428
+ initial_messages = new_in_context_messages
429
+
307
430
  tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
308
431
  llm_client = LLMClient.create(
309
432
  provider_type=agent_state.llm_config.model_endpoint_type,
@@ -311,29 +434,28 @@ class LettaAgent(BaseAgent):
311
434
  actor=self.actor,
312
435
  )
313
436
  usage = LettaUsageStatistics()
437
+ first_chunk, request_span = True, None
438
+ if request_start_timestamp_ns:
439
+ request_span = tracer.start_span("time_to_first_token", start_time=request_start_timestamp_ns)
440
+ request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
314
441
 
442
+ provider_request_start_timestamp_ns = None
315
443
  for _ in range(max_steps):
316
444
  step_id = generate_step_id()
317
- in_context_messages = await self._rebuild_memory_async(
318
- current_in_context_messages + new_in_context_messages,
445
+ step_start = get_utc_timestamp_ns()
446
+ agent_step_span = tracer.start_span("agent_step", start_time=step_start)
447
+ agent_step_span.set_attributes({"step_id": step_id})
448
+
449
+ request_data, stream, current_in_context_messages, new_in_context_messages = await self._build_and_request_from_llm_streaming(
450
+ first_chunk,
451
+ agent_step_span,
452
+ request_start_timestamp_ns,
453
+ current_in_context_messages,
454
+ new_in_context_messages,
319
455
  agent_state,
320
- num_messages=self.num_messages,
321
- num_archival_memories=self.num_archival_memories,
322
- )
323
- log_event("agent.step.messages.refreshed") # [1^]
324
-
325
- request_data = await self._create_llm_request_data_async(
326
- llm_client=llm_client,
327
- in_context_messages=in_context_messages,
328
- agent_state=agent_state,
329
- tool_rules_solver=tool_rules_solver,
456
+ llm_client,
457
+ tool_rules_solver,
330
458
  )
331
- log_event("agent.stream.llm_request.created") # [2^]
332
-
333
- try:
334
- stream = await llm_client.stream_async(request_data, agent_state.llm_config)
335
- except Exception as e:
336
- raise llm_client.handle_llm_error(e)
337
459
  log_event("agent.stream.llm_response.received") # [3^]
338
460
 
339
461
  # TODO: THIS IS INCREDIBLY UGLY
@@ -351,18 +473,14 @@ class LettaAgent(BaseAgent):
351
473
  else:
352
474
  raise ValueError(f"Streaming not supported for {agent_state.llm_config}")
353
475
 
354
- first_chunk, ttft_span = True, None
355
- if request_start_timestamp_ns is not None:
356
- ttft_span = tracer.start_span("time_to_first_token", start_time=request_start_timestamp_ns)
357
- ttft_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
358
-
359
- async for chunk in interface.process(stream):
476
+ async for chunk in interface.process(
477
+ stream, ttft_span=request_span, provider_request_start_timestamp_ns=provider_request_start_timestamp_ns
478
+ ):
360
479
  # Measure time to first token
361
- if first_chunk and ttft_span is not None:
480
+ if first_chunk and request_span is not None:
362
481
  now = get_utc_timestamp_ns()
363
482
  ttft_ns = now - request_start_timestamp_ns
364
- ttft_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ttft_ns // 1_000_000})
365
- ttft_span.end()
483
+ request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ttft_ns // 1_000_000})
366
484
  first_chunk = False
367
485
 
368
486
  yield f"data: {chunk.model_dump_json()}\n\n"
@@ -373,6 +491,17 @@ class LettaAgent(BaseAgent):
373
491
  usage.prompt_tokens += interface.input_tokens
374
492
  usage.total_tokens += interface.input_tokens + interface.output_tokens
375
493
 
494
+ # Persist input messages if not already
495
+ # Special strategy to lower TTFT
496
+ if not persisted_input_messages:
497
+ await self.message_manager.create_many_messages_async(initial_messages, actor=self.actor)
498
+ persisted_input_messages = True
499
+
500
+ # log LLM request time
501
+ now = get_utc_timestamp_ns()
502
+ llm_request_ns = now - step_start
503
+ agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ns // 1_000_000})
504
+
376
505
  # Process resulting stream content
377
506
  tool_call = interface.get_tool_call_object()
378
507
  reasoning_content = interface.get_reasoning_content()
@@ -389,10 +518,17 @@ class LettaAgent(BaseAgent):
389
518
  pre_computed_assistant_message_id=interface.letta_assistant_message_id,
390
519
  pre_computed_tool_message_id=interface.letta_tool_message_id,
391
520
  step_id=step_id,
521
+ agent_step_span=agent_step_span,
392
522
  )
393
523
  self.response_messages.extend(persisted_messages)
394
524
  new_in_context_messages.extend(persisted_messages)
395
525
 
526
+ # log total step time
527
+ now = get_utc_timestamp_ns()
528
+ step_ns = now - step_start
529
+ agent_step_span.add_event(name="step_ms", attributes={"duration_ms": step_ns // 1_000_000})
530
+ agent_step_span.end()
531
+
396
532
  # TODO (cliandy): the stream POST request span has ended at this point, we should tie this to the stream
397
533
  # log_event("agent.stream.llm_response.processed") # [4^]
398
534
 
@@ -429,18 +565,180 @@ class LettaAgent(BaseAgent):
429
565
 
430
566
  # Extend the in context message ids
431
567
  if not agent_state.message_buffer_autoclear:
432
- message_ids = [m.id for m in (current_in_context_messages + new_in_context_messages)]
433
- await self.agent_manager.set_in_context_messages_async(agent_id=self.agent_id, message_ids=message_ids, actor=self.actor)
568
+ await self._rebuild_context_window(
569
+ in_context_messages=current_in_context_messages,
570
+ new_letta_messages=new_in_context_messages,
571
+ llm_config=agent_state.llm_config,
572
+ total_tokens=usage.total_tokens,
573
+ force=False,
574
+ )
434
575
 
435
- # TODO: This may be out of sync, if in between steps users add files
436
- # NOTE (cliandy): temporary for now for particlar use cases.
437
- self.num_messages = await self.message_manager.size_async(actor=self.actor, agent_id=agent_state.id)
438
- self.num_archival_memories = await self.passage_manager.size_async(actor=self.actor, agent_id=agent_state.id)
576
+ # log time of entire request
577
+ if request_start_timestamp_ns:
578
+ now = get_utc_timestamp_ns()
579
+ request_ns = now - request_start_timestamp_ns
580
+ request_span.add_event(name="letta_request_ms", attributes={"duration_ms": request_ns // 1_000_000})
581
+ request_span.end()
439
582
 
440
583
  # TODO: Also yield out a letta usage stats SSE
441
584
  yield f"data: {usage.model_dump_json()}\n\n"
442
585
  yield f"data: {MessageStreamStatus.done.model_dump_json()}\n\n"
443
586
 
587
+ async def _build_and_request_from_llm(
588
+ self,
589
+ current_in_context_messages: List[Message],
590
+ new_in_context_messages: List[Message],
591
+ agent_state: AgentState,
592
+ llm_client: LLMClientBase,
593
+ tool_rules_solver: ToolRulesSolver,
594
+ ) -> Tuple[Dict, Dict, List[Message], List[Message]]:
595
+ for attempt in range(self.max_summarization_retries + 1):
596
+ try:
597
+ log_event("agent.stream_no_tokens.messages.refreshed")
598
+ # Create LLM request data
599
+ request_data = await self._create_llm_request_data_async(
600
+ llm_client=llm_client,
601
+ in_context_messages=current_in_context_messages + new_in_context_messages,
602
+ agent_state=agent_state,
603
+ tool_rules_solver=tool_rules_solver,
604
+ )
605
+ log_event("agent.stream_no_tokens.llm_request.created")
606
+
607
+ # Attempt LLM request
608
+ return (
609
+ request_data,
610
+ await llm_client.request_async(request_data, agent_state.llm_config),
611
+ current_in_context_messages,
612
+ new_in_context_messages,
613
+ )
614
+
615
+ except Exception as e:
616
+ if attempt == self.max_summarization_retries:
617
+ raise e
618
+
619
+ # Handle the error and prepare for retry
620
+ current_in_context_messages = await self._handle_llm_error(
621
+ e,
622
+ llm_client=llm_client,
623
+ in_context_messages=current_in_context_messages,
624
+ new_letta_messages=new_in_context_messages,
625
+ llm_config=agent_state.llm_config,
626
+ force=True,
627
+ )
628
+ new_in_context_messages = []
629
+ log_event(f"agent.stream_no_tokens.retry_attempt.{attempt + 1}")
630
+
631
+ async def _build_and_request_from_llm_streaming(
632
+ self,
633
+ first_chunk: bool,
634
+ ttft_span: "Span",
635
+ request_start_timestamp_ns: int,
636
+ current_in_context_messages: List[Message],
637
+ new_in_context_messages: List[Message],
638
+ agent_state: AgentState,
639
+ llm_client: LLMClientBase,
640
+ tool_rules_solver: ToolRulesSolver,
641
+ ) -> Tuple[Dict, AsyncStream[ChatCompletionChunk], List[Message], List[Message]]:
642
+ for attempt in range(self.max_summarization_retries + 1):
643
+ try:
644
+ log_event("agent.stream_no_tokens.messages.refreshed")
645
+ # Create LLM request data
646
+ request_data = await self._create_llm_request_data_async(
647
+ llm_client=llm_client,
648
+ in_context_messages=current_in_context_messages + new_in_context_messages,
649
+ agent_state=agent_state,
650
+ tool_rules_solver=tool_rules_solver,
651
+ )
652
+ log_event("agent.stream.llm_request.created") # [2^]
653
+
654
+ if first_chunk and ttft_span is not None:
655
+ provider_request_start_timestamp_ns = get_utc_timestamp_ns()
656
+ provider_req_start_ns = provider_request_start_timestamp_ns - request_start_timestamp_ns
657
+ ttft_span.add_event(
658
+ name="provider_req_start_ns", attributes={"provider_req_start_ms": provider_req_start_ns // 1_000_000}
659
+ )
660
+
661
+ # Attempt LLM request
662
+ return (
663
+ request_data,
664
+ await llm_client.stream_async(request_data, agent_state.llm_config),
665
+ current_in_context_messages,
666
+ new_in_context_messages,
667
+ )
668
+
669
+ except Exception as e:
670
+ if attempt == self.max_summarization_retries:
671
+ raise e
672
+
673
+ # Handle the error and prepare for retry
674
+ current_in_context_messages = await self._handle_llm_error(
675
+ e,
676
+ llm_client=llm_client,
677
+ in_context_messages=current_in_context_messages,
678
+ new_letta_messages=new_in_context_messages,
679
+ llm_config=agent_state.llm_config,
680
+ force=True,
681
+ )
682
+ new_in_context_messages = []
683
+ log_event(f"agent.stream_no_tokens.retry_attempt.{attempt + 1}")
684
+
685
+ @trace_method
686
+ async def _handle_llm_error(
687
+ self,
688
+ e: Exception,
689
+ llm_client: LLMClientBase,
690
+ in_context_messages: List[Message],
691
+ new_letta_messages: List[Message],
692
+ llm_config: LLMConfig,
693
+ force: bool,
694
+ ) -> List[Message]:
695
+ if isinstance(e, LLMContextWindowExceededError):
696
+ return await self._rebuild_context_window(
697
+ in_context_messages=in_context_messages, new_letta_messages=new_letta_messages, llm_config=llm_config, force=force
698
+ )
699
+ else:
700
+ raise llm_client.handle_llm_error(e)
701
+
702
+ @trace_method
703
+ async def _rebuild_context_window(
704
+ self,
705
+ in_context_messages: List[Message],
706
+ new_letta_messages: List[Message],
707
+ llm_config: LLMConfig,
708
+ total_tokens: Optional[int] = None,
709
+ force: bool = False,
710
+ ) -> List[Message]:
711
+ # If total tokens is reached, we truncate down
712
+ # TODO: This can be broken by bad configs, e.g. lower bound too high, initial messages too fat, etc.
713
+ if force or (total_tokens and total_tokens > llm_config.context_window):
714
+ self.logger.warning(
715
+ f"Total tokens {total_tokens} exceeds configured max tokens {llm_config.context_window}, forcefully clearing message history."
716
+ )
717
+ new_in_context_messages, updated = self.summarizer.summarize(
718
+ in_context_messages=in_context_messages, new_letta_messages=new_letta_messages, force=True, clear=True
719
+ )
720
+ else:
721
+ new_in_context_messages, updated = self.summarizer.summarize(
722
+ in_context_messages=in_context_messages, new_letta_messages=new_letta_messages
723
+ )
724
+ await self.agent_manager.set_in_context_messages_async(
725
+ agent_id=self.agent_id, message_ids=[m.id for m in new_in_context_messages], actor=self.actor
726
+ )
727
+
728
+ return new_in_context_messages
729
+
730
+ @trace_method
731
+ async def summarize_conversation_history(self) -> AgentState:
732
+ agent_state = await self.agent_manager.get_agent_by_id_async(agent_id=self.agent_id, actor=self.actor)
733
+ message_ids = agent_state.message_ids
734
+ in_context_messages = await self.message_manager.get_messages_by_ids_async(message_ids=message_ids, actor=self.actor)
735
+ new_in_context_messages, updated = self.summarizer.summarize(
736
+ in_context_messages=in_context_messages, new_letta_messages=[], force=True
737
+ )
738
+ return await self.agent_manager.set_in_context_messages_async(
739
+ agent_id=self.agent_id, message_ids=[m.id for m in new_in_context_messages], actor=self.actor
740
+ )
741
+
444
742
  @trace_method
445
743
  async def _create_llm_request_data_async(
446
744
  self,
@@ -448,10 +746,18 @@ class LettaAgent(BaseAgent):
448
746
  in_context_messages: List[Message],
449
747
  agent_state: AgentState,
450
748
  tool_rules_solver: ToolRulesSolver,
451
- ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
452
- self.num_messages = self.num_messages or (await self.message_manager.size_async(actor=self.actor, agent_id=agent_state.id))
453
- self.num_archival_memories = self.num_archival_memories or (
454
- await self.passage_manager.size_async(actor=self.actor, agent_id=agent_state.id)
749
+ ) -> dict:
750
+ self.num_messages, self.num_archival_memories = await asyncio.gather(
751
+ (
752
+ self.message_manager.size_async(actor=self.actor, agent_id=agent_state.id)
753
+ if self.num_messages is None
754
+ else asyncio.sleep(0, result=self.num_messages)
755
+ ),
756
+ (
757
+ self.passage_manager.size_async(actor=self.actor, agent_id=agent_state.id)
758
+ if self.num_archival_memories is None
759
+ else asyncio.sleep(0, result=self.num_archival_memories)
760
+ ),
455
761
  )
456
762
  in_context_messages = await self._rebuild_memory_async(
457
763
  in_context_messages, agent_state, num_messages=self.num_messages, num_archival_memories=self.num_archival_memories
@@ -469,13 +775,14 @@ class LettaAgent(BaseAgent):
469
775
  ToolType.LETTA_SLEEPTIME_CORE,
470
776
  ToolType.LETTA_VOICE_SLEEPTIME_CORE,
471
777
  ToolType.LETTA_BUILTIN,
778
+ ToolType.EXTERNAL_COMPOSIO,
779
+ ToolType.EXTERNAL_MCP,
472
780
  }
473
- or (t.tool_type == ToolType.EXTERNAL_COMPOSIO)
474
781
  ]
475
782
 
476
783
  # Mirror the sync agent loop: get allowed tools or allow all if none are allowed
477
784
  if self.last_function_response is None:
478
- self.last_function_response = await self._load_last_function_response_async()
785
+ self.last_function_response = self._load_last_function_response(in_context_messages)
479
786
  valid_tool_names = tool_rules_solver.get_allowed_tool_names(
480
787
  available_tools=set([t.name for t in tools]),
481
788
  last_function_response=self.last_function_response,
@@ -488,6 +795,9 @@ class LettaAgent(BaseAgent):
488
795
  force_tool_call = valid_tool_names[0]
489
796
 
490
797
  allowed_tools = [enable_strict_mode(t.json_schema) for t in tools if t.name in set(valid_tool_names)]
798
+ allowed_tools = runtime_override_tool_json_schema(
799
+ tool_list=allowed_tools, response_format=agent_state.response_format, request_heartbeat=True
800
+ )
491
801
 
492
802
  return llm_client.build_request_data(in_context_messages, agent_state.llm_config, allowed_tools, force_tool_call)
493
803
 
@@ -502,6 +812,8 @@ class LettaAgent(BaseAgent):
502
812
  pre_computed_assistant_message_id: Optional[str] = None,
503
813
  pre_computed_tool_message_id: Optional[str] = None,
504
814
  step_id: str | None = None,
815
+ new_in_context_messages: Optional[List[Message]] = None,
816
+ agent_step_span: Optional["Span"] = None,
505
817
  ) -> Tuple[List[Message], bool]:
506
818
  """
507
819
  Now that streaming is done, handle the final AI response.
@@ -533,12 +845,43 @@ class LettaAgent(BaseAgent):
533
845
 
534
846
  tool_call_id = tool_call.id or f"call_{uuid.uuid4().hex[:8]}"
535
847
 
536
- tool_result, success_flag = await self._execute_tool(
848
+ log_telemetry(
849
+ self.logger,
850
+ "_handle_ai_response execute tool start",
851
+ tool_name=tool_call_name,
852
+ tool_args=tool_args,
853
+ tool_call_id=tool_call_id,
854
+ request_heartbeat=request_heartbeat,
855
+ )
856
+
857
+ tool_execution_result = await self._execute_tool(
537
858
  tool_name=tool_call_name,
538
859
  tool_args=tool_args,
539
860
  agent_state=agent_state,
861
+ agent_step_span=agent_step_span,
862
+ )
863
+ log_telemetry(
864
+ self.logger, "_handle_ai_response execute tool finish", tool_execution_result=tool_execution_result, tool_call_id=tool_call_id
865
+ )
866
+
867
+ if tool_call_name in ["conversation_search", "conversation_search_date", "archival_memory_search"]:
868
+ # with certain functions we rely on the paging mechanism to handle overflow
869
+ truncate = False
870
+ else:
871
+ # but by default, we add a truncation safeguard to prevent bad functions from
872
+ # overflow the agent context window
873
+ truncate = True
874
+
875
+ # get the function response limit
876
+ target_tool = next((x for x in agent_state.tools if x.name == tool_call_name), None)
877
+ return_char_limit = target_tool.return_char_limit
878
+ function_response_string = validate_function_response(
879
+ tool_execution_result.func_return, return_char_limit=return_char_limit, truncate=truncate
880
+ )
881
+ function_response = package_function_response(
882
+ was_success=tool_execution_result.success_flag,
883
+ response_string=function_response_string,
540
884
  )
541
- function_response = package_function_response(tool_result, success_flag)
542
885
 
543
886
  # 4. Register tool call with tool rule solver
544
887
  # Resolve whether or not to continue stepping
@@ -575,9 +918,10 @@ class LettaAgent(BaseAgent):
575
918
  model=agent_state.llm_config.model,
576
919
  function_name=tool_call_name,
577
920
  function_arguments=tool_args,
921
+ tool_execution_result=tool_execution_result,
578
922
  tool_call_id=tool_call_id,
579
- function_call_success=success_flag,
580
- function_response=tool_result,
923
+ function_call_success=tool_execution_result.success_flag,
924
+ function_response=function_response_string,
581
925
  actor=self.actor,
582
926
  add_heartbeat_request_system_message=continue_stepping,
583
927
  reasoning_content=reasoning_content,
@@ -585,97 +929,69 @@ class LettaAgent(BaseAgent):
585
929
  pre_computed_tool_message_id=pre_computed_tool_message_id,
586
930
  step_id=logged_step.id if logged_step else None, # TODO (cliandy): eventually move over other agent loops
587
931
  )
932
+
588
933
  persisted_messages = await self.message_manager.create_many_messages_async(tool_call_messages, actor=self.actor)
589
934
  self.last_function_response = function_response
590
935
 
591
936
  return persisted_messages, continue_stepping
592
937
 
593
938
  @trace_method
594
- async def _execute_tool(self, tool_name: str, tool_args: dict, agent_state: AgentState) -> Tuple[str, bool]:
939
+ async def _execute_tool(
940
+ self, tool_name: str, tool_args: dict, agent_state: AgentState, agent_step_span: Optional["Span"] = None
941
+ ) -> "ToolExecutionResult":
595
942
  """
596
943
  Executes a tool and returns (result, success_flag).
597
944
  """
945
+ from letta.schemas.tool_execution_result import ToolExecutionResult
946
+
598
947
  # Special memory case
599
948
  target_tool = next((x for x in agent_state.tools if x.name == tool_name), None)
600
949
  if not target_tool:
601
- return f"Tool not found: {tool_name}", False
602
-
603
- # TODO: This temp. Move this logic and code to executors
604
- try:
605
- tool_execution_manager = ToolExecutionManager(
606
- agent_state=agent_state,
607
- message_manager=self.message_manager,
608
- agent_manager=self.agent_manager,
609
- block_manager=self.block_manager,
610
- passage_manager=self.passage_manager,
611
- actor=self.actor,
612
- )
613
- # TODO: Integrate sandbox result
614
- log_event(name=f"start_{tool_name}_execution", attributes=tool_args)
615
- tool_execution_result = await tool_execution_manager.execute_tool_async(
616
- function_name=tool_name, function_args=tool_args, tool=target_tool
950
+ # TODO: fix this error message
951
+ return ToolExecutionResult(
952
+ func_return=f"Tool {tool_name} not found",
953
+ status="error",
617
954
  )
618
- log_event(name=f"finish_{tool_name}_execution", attributes=tool_args)
619
- return tool_execution_result.func_return, True
620
- except Exception as e:
621
- return f"Failed to call tool. Error: {e}", False
622
-
623
- @trace_method
624
- async def _send_message_to_agents_matching_tags(
625
- self, message: str, match_all: List[str], match_some: List[str]
626
- ) -> List[Dict[str, Any]]:
627
- # Find matching agents
628
- matching_agents = self.agent_manager.list_agents_matching_tags(actor=self.actor, match_all=match_all, match_some=match_some)
629
- if not matching_agents:
630
- return []
631
-
632
- async def process_agent(agent_state: AgentState, message: str) -> Dict[str, Any]:
633
- try:
634
- letta_agent = LettaAgent(
635
- agent_id=agent_state.id,
636
- message_manager=self.message_manager,
637
- agent_manager=self.agent_manager,
638
- block_manager=self.block_manager,
639
- passage_manager=self.passage_manager,
640
- actor=self.actor,
641
- )
642
955
 
643
- augmented_message = (
644
- "[Incoming message from external Letta agent - to reply to this message, "
645
- "make sure to use the 'send_message' at the end, and the system will notify "
646
- "the sender of your response] "
647
- f"{message}"
648
- )
649
-
650
- letta_response = await letta_agent.step(
651
- [MessageCreate(role=MessageRole.system, content=[TextContent(text=augmented_message)])]
652
- )
653
- messages = letta_response.messages
654
-
655
- send_message_content = [message.content for message in messages if isinstance(message, AssistantMessage)]
956
+ # TODO: This temp. Move this logic and code to executors
656
957
 
657
- return {
658
- "agent_id": agent_state.id,
659
- "agent_name": agent_state.name,
660
- "response": send_message_content if send_message_content else ["<no response>"],
661
- }
958
+ if agent_step_span:
959
+ start_time = get_utc_timestamp_ns()
960
+ agent_step_span.add_event(name="tool_execution_started")
662
961
 
663
- except Exception as e:
664
- return {
665
- "agent_id": agent_state.id,
666
- "agent_name": agent_state.name,
667
- "error": str(e),
668
- "type": type(e).__name__,
669
- }
670
-
671
- tasks = [asyncio.create_task(process_agent(agent_state=agent_state, message=message)) for agent_state in matching_agents]
672
- results = await asyncio.gather(*tasks)
673
- return results
962
+ sandbox_env_vars = {var.key: var.value for var in agent_state.tool_exec_environment_variables}
963
+ tool_execution_manager = ToolExecutionManager(
964
+ agent_state=agent_state,
965
+ message_manager=self.message_manager,
966
+ agent_manager=self.agent_manager,
967
+ block_manager=self.block_manager,
968
+ passage_manager=self.passage_manager,
969
+ sandbox_env_vars=sandbox_env_vars,
970
+ actor=self.actor,
971
+ )
972
+ # TODO: Integrate sandbox result
973
+ log_event(name=f"start_{tool_name}_execution", attributes=tool_args)
974
+ tool_execution_result = await tool_execution_manager.execute_tool_async(
975
+ function_name=tool_name, function_args=tool_args, tool=target_tool
976
+ )
977
+ if agent_step_span:
978
+ end_time = get_utc_timestamp_ns()
979
+ agent_step_span.add_event(
980
+ name="tool_execution_completed",
981
+ attributes={
982
+ "tool_name": target_tool.name,
983
+ "duration_ms": (end_time - start_time) // 1_000_000,
984
+ "success": tool_execution_result.success_flag,
985
+ "tool_type": target_tool.tool_type,
986
+ "tool_id": target_tool.id,
987
+ },
988
+ )
989
+ log_event(name=f"finish_{tool_name}_execution", attributes=tool_execution_result.model_dump())
990
+ return tool_execution_result
674
991
 
675
992
  @trace_method
676
- async def _load_last_function_response_async(self):
993
+ def _load_last_function_response(self, in_context_messages: List[Message]):
677
994
  """Load the last function response from message history"""
678
- in_context_messages = await self.agent_manager.get_in_context_messages_async(agent_id=self.agent_id, actor=self.actor)
679
995
  for msg in reversed(in_context_messages):
680
996
  if msg.role == MessageRole.tool and msg.content and len(msg.content) == 1 and isinstance(msg.content[0], TextContent):
681
997
  text_content = msg.content[0].text