letta-nightly 0.7.20.dev20250521104258__py3-none-any.whl → 0.7.21.dev20250522104246__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +290 -3
  3. letta/agents/base_agent.py +0 -55
  4. letta/agents/helpers.py +5 -0
  5. letta/agents/letta_agent.py +314 -64
  6. letta/agents/letta_agent_batch.py +102 -55
  7. letta/agents/voice_agent.py +5 -5
  8. letta/client/client.py +9 -18
  9. letta/constants.py +55 -1
  10. letta/functions/function_sets/builtin.py +27 -0
  11. letta/groups/sleeptime_multi_agent_v2.py +1 -1
  12. letta/interfaces/anthropic_streaming_interface.py +10 -1
  13. letta/interfaces/openai_streaming_interface.py +9 -2
  14. letta/llm_api/anthropic.py +21 -2
  15. letta/llm_api/anthropic_client.py +33 -6
  16. letta/llm_api/google_ai_client.py +136 -423
  17. letta/llm_api/google_vertex_client.py +173 -22
  18. letta/llm_api/llm_api_tools.py +27 -0
  19. letta/llm_api/llm_client.py +1 -1
  20. letta/llm_api/llm_client_base.py +32 -21
  21. letta/llm_api/openai.py +57 -0
  22. letta/llm_api/openai_client.py +7 -11
  23. letta/memory.py +0 -1
  24. letta/orm/__init__.py +1 -0
  25. letta/orm/enums.py +1 -0
  26. letta/orm/provider_trace.py +26 -0
  27. letta/orm/step.py +1 -0
  28. letta/schemas/provider_trace.py +43 -0
  29. letta/schemas/providers.py +210 -65
  30. letta/schemas/step.py +1 -0
  31. letta/schemas/tool.py +4 -0
  32. letta/server/db.py +37 -19
  33. letta/server/rest_api/routers/v1/__init__.py +2 -0
  34. letta/server/rest_api/routers/v1/agents.py +57 -34
  35. letta/server/rest_api/routers/v1/blocks.py +3 -3
  36. letta/server/rest_api/routers/v1/identities.py +24 -26
  37. letta/server/rest_api/routers/v1/jobs.py +3 -3
  38. letta/server/rest_api/routers/v1/llms.py +13 -8
  39. letta/server/rest_api/routers/v1/sandbox_configs.py +6 -6
  40. letta/server/rest_api/routers/v1/tags.py +3 -3
  41. letta/server/rest_api/routers/v1/telemetry.py +18 -0
  42. letta/server/rest_api/routers/v1/tools.py +6 -6
  43. letta/server/rest_api/streaming_response.py +105 -0
  44. letta/server/rest_api/utils.py +4 -0
  45. letta/server/server.py +140 -1
  46. letta/services/agent_manager.py +251 -18
  47. letta/services/block_manager.py +52 -37
  48. letta/services/helpers/noop_helper.py +10 -0
  49. letta/services/identity_manager.py +43 -38
  50. letta/services/job_manager.py +29 -0
  51. letta/services/message_manager.py +111 -0
  52. letta/services/sandbox_config_manager.py +36 -0
  53. letta/services/step_manager.py +146 -0
  54. letta/services/telemetry_manager.py +58 -0
  55. letta/services/tool_executor/tool_execution_manager.py +49 -5
  56. letta/services/tool_executor/tool_execution_sandbox.py +47 -0
  57. letta/services/tool_executor/tool_executor.py +236 -7
  58. letta/services/tool_manager.py +160 -1
  59. letta/services/tool_sandbox/e2b_sandbox.py +65 -3
  60. letta/settings.py +10 -2
  61. letta/tracing.py +5 -5
  62. {letta_nightly-0.7.20.dev20250521104258.dist-info → letta_nightly-0.7.21.dev20250522104246.dist-info}/METADATA +3 -2
  63. {letta_nightly-0.7.20.dev20250521104258.dist-info → letta_nightly-0.7.21.dev20250522104246.dist-info}/RECORD +66 -59
  64. {letta_nightly-0.7.20.dev20250521104258.dist-info → letta_nightly-0.7.21.dev20250522104246.dist-info}/LICENSE +0 -0
  65. {letta_nightly-0.7.20.dev20250521104258.dist-info → letta_nightly-0.7.21.dev20250522104246.dist-info}/WHEEL +0 -0
  66. {letta_nightly-0.7.20.dev20250521104258.dist-info → letta_nightly-0.7.21.dev20250522104246.dist-info}/entry_points.txt +0 -0
@@ -8,8 +8,9 @@ from openai.types import CompletionUsage
8
8
  from openai.types.chat import ChatCompletion, ChatCompletionChunk
9
9
 
10
10
  from letta.agents.base_agent import BaseAgent
11
- from letta.agents.helpers import _create_letta_response, _prepare_in_context_messages_async
11
+ from letta.agents.helpers import _create_letta_response, _prepare_in_context_messages_async, generate_step_id
12
12
  from letta.helpers import ToolRulesSolver
13
+ from letta.helpers.datetime_helpers import get_utc_timestamp_ns
13
14
  from letta.helpers.tool_execution_helper import enable_strict_mode
14
15
  from letta.interfaces.anthropic_streaming_interface import AnthropicStreamingInterface
15
16
  from letta.interfaces.openai_streaming_interface import OpenAIStreamingInterface
@@ -24,7 +25,8 @@ from letta.schemas.letta_message import AssistantMessage
24
25
  from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
25
26
  from letta.schemas.letta_response import LettaResponse
26
27
  from letta.schemas.message import Message, MessageCreate
27
- from letta.schemas.openai.chat_completion_response import ToolCall
28
+ from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
29
+ from letta.schemas.provider_trace import ProviderTraceCreate
28
30
  from letta.schemas.usage import LettaUsageStatistics
29
31
  from letta.schemas.user import User
30
32
  from letta.server.rest_api.utils import create_letta_messages_from_llm_response
@@ -32,10 +34,11 @@ from letta.services.agent_manager import AgentManager
32
34
  from letta.services.block_manager import BlockManager
33
35
  from letta.services.message_manager import MessageManager
34
36
  from letta.services.passage_manager import PassageManager
37
+ from letta.services.step_manager import NoopStepManager, StepManager
38
+ from letta.services.telemetry_manager import NoopTelemetryManager, TelemetryManager
35
39
  from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
36
- from letta.settings import settings
37
40
  from letta.system import package_function_response
38
- from letta.tracing import log_event, trace_method
41
+ from letta.tracing import log_event, trace_method, tracer
39
42
 
40
43
  logger = get_logger(__name__)
41
44
 
@@ -50,6 +53,8 @@ class LettaAgent(BaseAgent):
50
53
  block_manager: BlockManager,
51
54
  passage_manager: PassageManager,
52
55
  actor: User,
56
+ step_manager: StepManager = NoopStepManager(),
57
+ telemetry_manager: TelemetryManager = NoopTelemetryManager(),
53
58
  ):
54
59
  super().__init__(agent_id=agent_id, openai_client=None, message_manager=message_manager, agent_manager=agent_manager, actor=actor)
55
60
 
@@ -57,6 +62,8 @@ class LettaAgent(BaseAgent):
57
62
  # Summarizer settings
58
63
  self.block_manager = block_manager
59
64
  self.passage_manager = passage_manager
65
+ self.step_manager = step_manager
66
+ self.telemetry_manager = telemetry_manager
60
67
  self.response_messages: List[Message] = []
61
68
 
62
69
  self.last_function_response = None
@@ -67,17 +74,19 @@ class LettaAgent(BaseAgent):
67
74
 
68
75
  @trace_method
69
76
  async def step(self, input_messages: List[MessageCreate], max_steps: int = 10, use_assistant_message: bool = True) -> LettaResponse:
70
- agent_state = await self.agent_manager.get_agent_by_id_async(self.agent_id, actor=self.actor)
71
- current_in_context_messages, new_in_context_messages, usage = await self._step(
72
- agent_state=agent_state, input_messages=input_messages, max_steps=max_steps
77
+ agent_state = await self.agent_manager.get_agent_by_id_async(
78
+ agent_id=self.agent_id, include_relationships=["tools", "memory"], actor=self.actor
73
79
  )
80
+ _, new_in_context_messages, usage = await self._step(agent_state=agent_state, input_messages=input_messages, max_steps=max_steps)
74
81
  return _create_letta_response(
75
82
  new_in_context_messages=new_in_context_messages, use_assistant_message=use_assistant_message, usage=usage
76
83
  )
77
84
 
78
- async def _step(
79
- self, agent_state: AgentState, input_messages: List[MessageCreate], max_steps: int = 10
80
- ) -> Tuple[List[Message], List[Message], CompletionUsage]:
85
+ @trace_method
86
+ async def step_stream_no_tokens(self, input_messages: List[MessageCreate], max_steps: int = 10, use_assistant_message: bool = True):
87
+ agent_state = await self.agent_manager.get_agent_by_id_async(
88
+ agent_id=self.agent_id, include_relationships=["tools", "memory"], actor=self.actor
89
+ )
81
90
  current_in_context_messages, new_in_context_messages = await _prepare_in_context_messages_async(
82
91
  input_messages, agent_state, self.message_manager, self.actor
83
92
  )
@@ -89,23 +98,81 @@ class LettaAgent(BaseAgent):
89
98
  )
90
99
  usage = LettaUsageStatistics()
91
100
  for _ in range(max_steps):
92
- response = await self._get_ai_reply(
101
+ step_id = generate_step_id()
102
+
103
+ in_context_messages = await self._rebuild_memory_async(
104
+ current_in_context_messages + new_in_context_messages,
105
+ agent_state,
106
+ num_messages=self.num_messages,
107
+ num_archival_memories=self.num_archival_memories,
108
+ )
109
+ log_event("agent.stream_no_tokens.messages.refreshed") # [1^]
110
+
111
+ request_data = await self._create_llm_request_data_async(
93
112
  llm_client=llm_client,
94
- in_context_messages=current_in_context_messages + new_in_context_messages,
113
+ in_context_messages=in_context_messages,
95
114
  agent_state=agent_state,
96
115
  tool_rules_solver=tool_rules_solver,
97
- stream=False,
98
- # TODO: also pass in reasoning content
116
+ # TODO: pass in reasoning content
99
117
  )
118
+ log_event("agent.stream_no_tokens.llm_request.created") # [2^]
100
119
 
120
+ try:
121
+ response_data = await llm_client.request_async(request_data, agent_state.llm_config)
122
+ except Exception as e:
123
+ raise llm_client.handle_llm_error(e)
124
+ log_event("agent.stream_no_tokens.llm_response.received") # [3^]
125
+
126
+ response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
127
+
128
+ # update usage
129
+ # TODO: add run_id
130
+ usage.step_count += 1
131
+ usage.completion_tokens += response.usage.completion_tokens
132
+ usage.prompt_tokens += response.usage.prompt_tokens
133
+ usage.total_tokens += response.usage.total_tokens
134
+
135
+ if not response.choices[0].message.tool_calls:
136
+ # TODO: make into a real error
137
+ raise ValueError("No tool calls found in response, model must make a tool call")
101
138
  tool_call = response.choices[0].message.tool_calls[0]
102
- reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
139
+ if response.choices[0].message.reasoning_content:
140
+ reasoning = [
141
+ ReasoningContent(
142
+ reasoning=response.choices[0].message.reasoning_content,
143
+ is_native=True,
144
+ signature=response.choices[0].message.reasoning_content_signature,
145
+ )
146
+ ]
147
+ else:
148
+ reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
103
149
 
104
150
  persisted_messages, should_continue = await self._handle_ai_response(
105
- tool_call, agent_state, tool_rules_solver, reasoning_content=reasoning
151
+ tool_call, agent_state, tool_rules_solver, response.usage, reasoning_content=reasoning
106
152
  )
107
153
  self.response_messages.extend(persisted_messages)
108
154
  new_in_context_messages.extend(persisted_messages)
155
+ log_event("agent.stream_no_tokens.llm_response.processed") # [4^]
156
+
157
+ # Log LLM Trace
158
+ await self.telemetry_manager.create_provider_trace_async(
159
+ actor=self.actor,
160
+ provider_trace_create=ProviderTraceCreate(
161
+ request_json=request_data,
162
+ response_json=response_data,
163
+ step_id=step_id,
164
+ organization_id=self.actor.organization_id,
165
+ ),
166
+ )
167
+
168
+ # stream step
169
+ # TODO: improve TTFT
170
+ filter_user_messages = [m for m in persisted_messages if m.role != "user"]
171
+ letta_messages = Message.to_letta_messages_from_list(
172
+ filter_user_messages, use_assistant_message=use_assistant_message, reverse=False
173
+ )
174
+ for message in letta_messages:
175
+ yield f"data: {message.model_dump_json()}\n\n"
109
176
 
110
177
  # update usage
111
178
  # TODO: add run_id
@@ -122,17 +189,125 @@ class LettaAgent(BaseAgent):
122
189
  message_ids = [m.id for m in (current_in_context_messages + new_in_context_messages)]
123
190
  self.agent_manager.set_in_context_messages(agent_id=self.agent_id, message_ids=message_ids, actor=self.actor)
124
191
 
192
+ # Return back usage
193
+ yield f"data: {usage.model_dump_json()}\n\n"
194
+
195
+ async def _step(
196
+ self, agent_state: AgentState, input_messages: List[MessageCreate], max_steps: int = 10
197
+ ) -> Tuple[List[Message], List[Message], CompletionUsage]:
198
+ """
199
+ Carries out an invocation of the agent loop. In each step, the agent
200
+ 1. Rebuilds its memory
201
+ 2. Generates a request for the LLM
202
+ 3. Fetches a response from the LLM
203
+ 4. Processes the response
204
+ """
205
+ current_in_context_messages, new_in_context_messages = await _prepare_in_context_messages_async(
206
+ input_messages, agent_state, self.message_manager, self.actor
207
+ )
208
+ tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
209
+ llm_client = LLMClient.create(
210
+ provider_type=agent_state.llm_config.model_endpoint_type,
211
+ put_inner_thoughts_first=True,
212
+ actor=self.actor,
213
+ )
214
+ usage = LettaUsageStatistics()
215
+ for _ in range(max_steps):
216
+ step_id = generate_step_id()
217
+
218
+ in_context_messages = await self._rebuild_memory_async(
219
+ current_in_context_messages + new_in_context_messages,
220
+ agent_state,
221
+ num_messages=self.num_messages,
222
+ num_archival_memories=self.num_archival_memories,
223
+ )
224
+ log_event("agent.step.messages.refreshed") # [1^]
225
+
226
+ request_data = await self._create_llm_request_data_async(
227
+ llm_client=llm_client,
228
+ in_context_messages=in_context_messages,
229
+ agent_state=agent_state,
230
+ tool_rules_solver=tool_rules_solver,
231
+ # TODO: pass in reasoning content
232
+ )
233
+ log_event("agent.step.llm_request.created") # [2^]
234
+
235
+ try:
236
+ response_data = await llm_client.request_async(request_data, agent_state.llm_config)
237
+ except Exception as e:
238
+ raise llm_client.handle_llm_error(e)
239
+ log_event("agent.step.llm_response.received") # [3^]
240
+
241
+ response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
242
+
243
+ # TODO: add run_id
244
+ usage.step_count += 1
245
+ usage.completion_tokens += response.usage.completion_tokens
246
+ usage.prompt_tokens += response.usage.prompt_tokens
247
+ usage.total_tokens += response.usage.total_tokens
248
+
249
+ if not response.choices[0].message.tool_calls:
250
+ # TODO: make into a real error
251
+ raise ValueError("No tool calls found in response, model must make a tool call")
252
+ tool_call = response.choices[0].message.tool_calls[0]
253
+ if response.choices[0].message.reasoning_content:
254
+ reasoning = [
255
+ ReasoningContent(
256
+ reasoning=response.choices[0].message.reasoning_content,
257
+ is_native=True,
258
+ signature=response.choices[0].message.reasoning_content_signature,
259
+ )
260
+ ]
261
+ else:
262
+ reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
263
+
264
+ persisted_messages, should_continue = await self._handle_ai_response(
265
+ tool_call, agent_state, tool_rules_solver, response.usage, reasoning_content=reasoning, step_id=step_id
266
+ )
267
+ self.response_messages.extend(persisted_messages)
268
+ new_in_context_messages.extend(persisted_messages)
269
+ log_event("agent.step.llm_response.processed") # [4^]
270
+
271
+ # Log LLM Trace
272
+ await self.telemetry_manager.create_provider_trace_async(
273
+ actor=self.actor,
274
+ provider_trace_create=ProviderTraceCreate(
275
+ request_json=request_data,
276
+ response_json=response_data,
277
+ step_id=step_id,
278
+ organization_id=self.actor.organization_id,
279
+ ),
280
+ )
281
+
282
+ if not should_continue:
283
+ break
284
+
285
+ # Extend the in context message ids
286
+ if not agent_state.message_buffer_autoclear:
287
+ message_ids = [m.id for m in (current_in_context_messages + new_in_context_messages)]
288
+ self.agent_manager.set_in_context_messages(agent_id=self.agent_id, message_ids=message_ids, actor=self.actor)
289
+
125
290
  return current_in_context_messages, new_in_context_messages, usage
126
291
 
127
292
  @trace_method
128
293
  async def step_stream(
129
- self, input_messages: List[MessageCreate], max_steps: int = 10, use_assistant_message: bool = True, stream_tokens: bool = False
294
+ self,
295
+ input_messages: List[MessageCreate],
296
+ max_steps: int = 10,
297
+ use_assistant_message: bool = True,
298
+ request_start_timestamp_ns: Optional[int] = None,
130
299
  ) -> AsyncGenerator[str, None]:
131
300
  """
132
- Main streaming loop that yields partial tokens.
133
- Whenever we detect a tool call, we yield from _handle_ai_response as well.
301
+ Carries out an invocation of the agent loop in a streaming fashion that yields partial tokens.
302
+ Whenever we detect a tool call, we yield from _handle_ai_response as well. At each step, the agent
303
+ 1. Rebuilds its memory
304
+ 2. Generates a request for the LLM
305
+ 3. Fetches a response from the LLM
306
+ 4. Processes the response
134
307
  """
135
- agent_state = await self.agent_manager.get_agent_by_id_async(self.agent_id, actor=self.actor)
308
+ agent_state = await self.agent_manager.get_agent_by_id_async(
309
+ agent_id=self.agent_id, include_relationships=["tools", "memory"], actor=self.actor
310
+ )
136
311
  current_in_context_messages, new_in_context_messages = await _prepare_in_context_messages_async(
137
312
  input_messages, agent_state, self.message_manager, self.actor
138
313
  )
@@ -145,13 +320,29 @@ class LettaAgent(BaseAgent):
145
320
  usage = LettaUsageStatistics()
146
321
 
147
322
  for _ in range(max_steps):
148
- stream = await self._get_ai_reply(
323
+ step_id = generate_step_id()
324
+ in_context_messages = await self._rebuild_memory_async(
325
+ current_in_context_messages + new_in_context_messages,
326
+ agent_state,
327
+ num_messages=self.num_messages,
328
+ num_archival_memories=self.num_archival_memories,
329
+ )
330
+ log_event("agent.step.messages.refreshed") # [1^]
331
+
332
+ request_data = await self._create_llm_request_data_async(
149
333
  llm_client=llm_client,
150
- in_context_messages=current_in_context_messages + new_in_context_messages,
334
+ in_context_messages=in_context_messages,
151
335
  agent_state=agent_state,
152
336
  tool_rules_solver=tool_rules_solver,
153
- stream=True,
154
337
  )
338
+ log_event("agent.stream.llm_request.created") # [2^]
339
+
340
+ try:
341
+ stream = await llm_client.stream_async(request_data, agent_state.llm_config)
342
+ except Exception as e:
343
+ raise llm_client.handle_llm_error(e)
344
+ log_event("agent.stream.llm_response.received") # [3^]
345
+
155
346
  # TODO: THIS IS INCREDIBLY UGLY
156
347
  # TODO: THERE ARE MULTIPLE COPIES OF THE LLM_CONFIG EVERYWHERE THAT ARE GETTING MANIPULATED
157
348
  if agent_state.llm_config.model_endpoint_type == "anthropic":
@@ -164,7 +355,23 @@ class LettaAgent(BaseAgent):
164
355
  use_assistant_message=use_assistant_message,
165
356
  put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
166
357
  )
358
+ else:
359
+ raise ValueError(f"Streaming not supported for {agent_state.llm_config}")
360
+
361
+ first_chunk, ttft_span = True, None
362
+ if request_start_timestamp_ns is not None:
363
+ ttft_span = tracer.start_span("time_to_first_token", start_time=request_start_timestamp_ns)
364
+ ttft_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
365
+
167
366
  async for chunk in interface.process(stream):
367
+ # Measure time to first token
368
+ if first_chunk and ttft_span is not None:
369
+ now = get_utc_timestamp_ns()
370
+ ttft_ns = now - request_start_timestamp_ns
371
+ ttft_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ttft_ns // 1_000_000})
372
+ ttft_span.end()
373
+ first_chunk = False
374
+
168
375
  yield f"data: {chunk.model_dump_json()}\n\n"
169
376
 
170
377
  # update usage
@@ -180,13 +387,46 @@ class LettaAgent(BaseAgent):
180
387
  tool_call,
181
388
  agent_state,
182
389
  tool_rules_solver,
390
+ UsageStatistics(
391
+ completion_tokens=interface.output_tokens,
392
+ prompt_tokens=interface.input_tokens,
393
+ total_tokens=interface.input_tokens + interface.output_tokens,
394
+ ),
183
395
  reasoning_content=reasoning_content,
184
396
  pre_computed_assistant_message_id=interface.letta_assistant_message_id,
185
397
  pre_computed_tool_message_id=interface.letta_tool_message_id,
398
+ step_id=step_id,
186
399
  )
187
400
  self.response_messages.extend(persisted_messages)
188
401
  new_in_context_messages.extend(persisted_messages)
189
402
 
403
+ # TODO (cliandy): the stream POST request span has ended at this point, we should tie this to the stream
404
+ # log_event("agent.stream.llm_response.processed") # [4^]
405
+
406
+ # Log LLM Trace
407
+ # TODO (cliandy): we are piecing together the streamed response here. Content here does not match the actual response schema.
408
+ await self.telemetry_manager.create_provider_trace_async(
409
+ actor=self.actor,
410
+ provider_trace_create=ProviderTraceCreate(
411
+ request_json=request_data,
412
+ response_json={
413
+ "content": {
414
+ "tool_call": tool_call.model_dump_json(),
415
+ "reasoning": [content.model_dump_json() for content in reasoning_content],
416
+ },
417
+ "id": interface.message_id,
418
+ "model": interface.model,
419
+ "role": "assistant",
420
+ # "stop_reason": "",
421
+ # "stop_sequence": None,
422
+ "type": "message",
423
+ "usage": {"input_tokens": interface.input_tokens, "output_tokens": interface.output_tokens},
424
+ },
425
+ step_id=step_id,
426
+ organization_id=self.actor.organization_id,
427
+ ),
428
+ )
429
+
190
430
  if not use_assistant_message or should_continue:
191
431
  tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
192
432
  yield f"data: {tool_return.model_dump_json()}\n\n"
@@ -209,28 +449,20 @@ class LettaAgent(BaseAgent):
209
449
  yield f"data: {MessageStreamStatus.done.model_dump_json()}\n\n"
210
450
 
211
451
  @trace_method
212
- # When raising an error this doesn't show up
213
- async def _get_ai_reply(
452
+ async def _create_llm_request_data_async(
214
453
  self,
215
454
  llm_client: LLMClientBase,
216
455
  in_context_messages: List[Message],
217
456
  agent_state: AgentState,
218
457
  tool_rules_solver: ToolRulesSolver,
219
- stream: bool,
220
458
  ) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
221
- if settings.experimental_enable_async_db_engine:
222
- self.num_messages = self.num_messages or (await self.message_manager.size_async(actor=self.actor, agent_id=agent_state.id))
223
- self.num_archival_memories = self.num_archival_memories or (
224
- await self.passage_manager.size_async(actor=self.actor, agent_id=agent_state.id)
225
- )
226
- in_context_messages = await self._rebuild_memory_async(
227
- in_context_messages, agent_state, num_messages=self.num_messages, num_archival_memories=self.num_archival_memories
228
- )
229
- else:
230
- if settings.experimental_skip_rebuild_memory and agent_state.llm_config.model_endpoint_type == "google_vertex":
231
- logger.info("Skipping memory rebuild")
232
- else:
233
- in_context_messages = self._rebuild_memory(in_context_messages, agent_state)
459
+ self.num_messages = self.num_messages or (await self.message_manager.size_async(actor=self.actor, agent_id=agent_state.id))
460
+ self.num_archival_memories = self.num_archival_memories or (
461
+ await self.passage_manager.size_async(actor=self.actor, agent_id=agent_state.id)
462
+ )
463
+ in_context_messages = await self._rebuild_memory_async(
464
+ in_context_messages, agent_state, num_messages=self.num_messages, num_archival_memories=self.num_archival_memories
465
+ )
234
466
 
235
467
  tools = [
236
468
  t
@@ -243,8 +475,8 @@ class LettaAgent(BaseAgent):
243
475
  ToolType.LETTA_MULTI_AGENT_CORE,
244
476
  ToolType.LETTA_SLEEPTIME_CORE,
245
477
  ToolType.LETTA_VOICE_SLEEPTIME_CORE,
478
+ ToolType.LETTA_BUILTIN,
246
479
  }
247
- or (t.tool_type == ToolType.LETTA_MULTI_AGENT_CORE and t.name == "send_message_to_agents_matching_tags")
248
480
  or (t.tool_type == ToolType.EXTERNAL_COMPOSIO)
249
481
  ]
250
482
 
@@ -264,15 +496,7 @@ class LettaAgent(BaseAgent):
264
496
 
265
497
  allowed_tools = [enable_strict_mode(t.json_schema) for t in tools if t.name in set(valid_tool_names)]
266
498
 
267
- response = await llm_client.send_llm_request_async(
268
- messages=in_context_messages,
269
- llm_config=agent_state.llm_config,
270
- tools=allowed_tools,
271
- force_tool_call=force_tool_call,
272
- stream=stream,
273
- )
274
-
275
- return response
499
+ return llm_client.build_request_data(in_context_messages, agent_state.llm_config, allowed_tools, force_tool_call)
276
500
 
277
501
  @trace_method
278
502
  async def _handle_ai_response(
@@ -280,9 +504,11 @@ class LettaAgent(BaseAgent):
280
504
  tool_call: ToolCall,
281
505
  agent_state: AgentState,
282
506
  tool_rules_solver: ToolRulesSolver,
507
+ usage: UsageStatistics,
283
508
  reasoning_content: Optional[List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent]]] = None,
284
509
  pre_computed_assistant_message_id: Optional[str] = None,
285
510
  pre_computed_tool_message_id: Optional[str] = None,
511
+ step_id: str | None = None,
286
512
  ) -> Tuple[List[Message], bool]:
287
513
  """
288
514
  Now that streaming is done, handle the final AI response.
@@ -294,8 +520,11 @@ class LettaAgent(BaseAgent):
294
520
 
295
521
  try:
296
522
  tool_args = json.loads(tool_call_args_str)
523
+ assert isinstance(tool_args, dict), "tool_args must be a dict"
297
524
  except json.JSONDecodeError:
298
525
  tool_args = {}
526
+ except AssertionError:
527
+ tool_args = json.loads(tool_args)
299
528
 
300
529
  # Get request heartbeats and coerce to bool
301
530
  request_heartbeat = tool_args.pop("request_heartbeat", False)
@@ -329,7 +558,25 @@ class LettaAgent(BaseAgent):
329
558
  elif tool_rules_solver.is_continue_tool(tool_name=tool_call_name):
330
559
  continue_stepping = True
331
560
 
332
- # 5. Persist to DB
561
+ # 5a. Persist Steps to DB
562
+ # Following agent loop to persist this before messages
563
+ # TODO (cliandy): determine what should match old loop w/provider_id, job_id
564
+ # TODO (cliandy): UsageStatistics and LettaUsageStatistics are used in many places, but are not the same.
565
+ logged_step = await self.step_manager.log_step_async(
566
+ actor=self.actor,
567
+ agent_id=agent_state.id,
568
+ provider_name=agent_state.llm_config.model_endpoint_type,
569
+ provider_category=agent_state.llm_config.provider_category or "base",
570
+ model=agent_state.llm_config.model,
571
+ model_endpoint=agent_state.llm_config.model_endpoint,
572
+ context_window_limit=agent_state.llm_config.context_window,
573
+ usage=usage,
574
+ provider_id=None,
575
+ job_id=None,
576
+ step_id=step_id,
577
+ )
578
+
579
+ # 5b. Persist Messages to DB
333
580
  tool_call_messages = create_letta_messages_from_llm_response(
334
581
  agent_id=agent_state.id,
335
582
  model=agent_state.llm_config.model,
@@ -343,6 +590,7 @@ class LettaAgent(BaseAgent):
343
590
  reasoning_content=reasoning_content,
344
591
  pre_computed_assistant_message_id=pre_computed_assistant_message_id,
345
592
  pre_computed_tool_message_id=pre_computed_tool_message_id,
593
+ step_id=logged_step.id if logged_step else None, # TODO (cliandy): eventually move over other agent loops
346
594
  )
347
595
  persisted_messages = await self.message_manager.create_many_messages_async(tool_call_messages, actor=self.actor)
348
596
  self.last_function_response = function_response
@@ -361,20 +609,21 @@ class LettaAgent(BaseAgent):
361
609
 
362
610
  # TODO: This temp. Move this logic and code to executors
363
611
  try:
364
- if target_tool.name == "send_message_to_agents_matching_tags" and target_tool.tool_type == ToolType.LETTA_MULTI_AGENT_CORE:
365
- log_event(name="start_send_message_to_agents_matching_tags", attributes=tool_args)
366
- results = await self._send_message_to_agents_matching_tags(**tool_args)
367
- log_event(name="finish_send_message_to_agents_matching_tags", attributes=tool_args)
368
- return json.dumps(results), True
369
- else:
370
- tool_execution_manager = ToolExecutionManager(agent_state=agent_state, actor=self.actor)
371
- # TODO: Integrate sandbox result
372
- log_event(name=f"start_{tool_name}_execution", attributes=tool_args)
373
- tool_execution_result = await tool_execution_manager.execute_tool_async(
374
- function_name=tool_name, function_args=tool_args, tool=target_tool
375
- )
376
- log_event(name=f"finish_{tool_name}_execution", attributes=tool_args)
377
- return tool_execution_result.func_return, True
612
+ tool_execution_manager = ToolExecutionManager(
613
+ agent_state=agent_state,
614
+ message_manager=self.message_manager,
615
+ agent_manager=self.agent_manager,
616
+ block_manager=self.block_manager,
617
+ passage_manager=self.passage_manager,
618
+ actor=self.actor,
619
+ )
620
+ # TODO: Integrate sandbox result
621
+ log_event(name=f"start_{tool_name}_execution", attributes=tool_args)
622
+ tool_execution_result = await tool_execution_manager.execute_tool_async(
623
+ function_name=tool_name, function_args=tool_args, tool=target_tool
624
+ )
625
+ log_event(name=f"finish_{tool_name}_execution", attributes=tool_args)
626
+ return tool_execution_result.func_return, True
378
627
  except Exception as e:
379
628
  return f"Failed to call tool. Error: {e}", False
380
629
 
@@ -430,6 +679,7 @@ class LettaAgent(BaseAgent):
430
679
  results = await asyncio.gather(*tasks)
431
680
  return results
432
681
 
682
+ @trace_method
433
683
  async def _load_last_function_response_async(self):
434
684
  """Load the last function response from message history"""
435
685
  in_context_messages = await self.agent_manager.get_in_context_messages_async(agent_id=self.agent_id, actor=self.actor)