letta-nightly 0.11.6.dev20250902104140__py3-none-any.whl → 0.11.7.dev20250904045700__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +10 -14
  3. letta/agents/base_agent.py +18 -0
  4. letta/agents/helpers.py +32 -7
  5. letta/agents/letta_agent.py +953 -762
  6. letta/agents/voice_agent.py +1 -1
  7. letta/client/streaming.py +0 -1
  8. letta/constants.py +11 -8
  9. letta/errors.py +9 -0
  10. letta/functions/function_sets/base.py +77 -69
  11. letta/functions/function_sets/builtin.py +41 -22
  12. letta/functions/function_sets/multi_agent.py +1 -2
  13. letta/functions/schema_generator.py +0 -1
  14. letta/helpers/converters.py +8 -3
  15. letta/helpers/datetime_helpers.py +5 -4
  16. letta/helpers/message_helper.py +1 -2
  17. letta/helpers/pinecone_utils.py +0 -1
  18. letta/helpers/tool_rule_solver.py +10 -0
  19. letta/helpers/tpuf_client.py +848 -0
  20. letta/interface.py +8 -8
  21. letta/interfaces/anthropic_streaming_interface.py +7 -0
  22. letta/interfaces/openai_streaming_interface.py +29 -6
  23. letta/llm_api/anthropic_client.py +188 -18
  24. letta/llm_api/azure_client.py +0 -1
  25. letta/llm_api/bedrock_client.py +1 -2
  26. letta/llm_api/deepseek_client.py +319 -5
  27. letta/llm_api/google_vertex_client.py +75 -17
  28. letta/llm_api/groq_client.py +0 -1
  29. letta/llm_api/helpers.py +2 -2
  30. letta/llm_api/llm_api_tools.py +1 -50
  31. letta/llm_api/llm_client.py +6 -8
  32. letta/llm_api/mistral.py +1 -1
  33. letta/llm_api/openai.py +16 -13
  34. letta/llm_api/openai_client.py +31 -16
  35. letta/llm_api/together_client.py +0 -1
  36. letta/llm_api/xai_client.py +0 -1
  37. letta/local_llm/chat_completion_proxy.py +7 -6
  38. letta/local_llm/settings/settings.py +1 -1
  39. letta/orm/__init__.py +1 -0
  40. letta/orm/agent.py +8 -6
  41. letta/orm/archive.py +9 -1
  42. letta/orm/block.py +3 -4
  43. letta/orm/block_history.py +3 -1
  44. letta/orm/group.py +2 -3
  45. letta/orm/identity.py +1 -2
  46. letta/orm/job.py +1 -2
  47. letta/orm/llm_batch_items.py +1 -2
  48. letta/orm/message.py +8 -4
  49. letta/orm/mixins.py +18 -0
  50. letta/orm/organization.py +2 -0
  51. letta/orm/passage.py +8 -1
  52. letta/orm/passage_tag.py +55 -0
  53. letta/orm/sandbox_config.py +1 -3
  54. letta/orm/step.py +1 -2
  55. letta/orm/tool.py +1 -0
  56. letta/otel/resource.py +2 -2
  57. letta/plugins/plugins.py +1 -1
  58. letta/prompts/prompt_generator.py +10 -2
  59. letta/schemas/agent.py +11 -0
  60. letta/schemas/archive.py +4 -0
  61. letta/schemas/block.py +13 -0
  62. letta/schemas/embedding_config.py +0 -1
  63. letta/schemas/enums.py +24 -7
  64. letta/schemas/group.py +12 -0
  65. letta/schemas/letta_message.py +55 -1
  66. letta/schemas/letta_message_content.py +28 -0
  67. letta/schemas/letta_request.py +21 -4
  68. letta/schemas/letta_stop_reason.py +9 -1
  69. letta/schemas/llm_config.py +24 -8
  70. letta/schemas/mcp.py +0 -3
  71. letta/schemas/memory.py +14 -0
  72. letta/schemas/message.py +245 -141
  73. letta/schemas/openai/chat_completion_request.py +2 -1
  74. letta/schemas/passage.py +1 -0
  75. letta/schemas/providers/bedrock.py +1 -1
  76. letta/schemas/providers/openai.py +2 -2
  77. letta/schemas/tool.py +11 -5
  78. letta/schemas/tool_execution_result.py +0 -1
  79. letta/schemas/tool_rule.py +71 -0
  80. letta/serialize_schemas/marshmallow_agent.py +1 -2
  81. letta/server/rest_api/app.py +3 -3
  82. letta/server/rest_api/auth/index.py +0 -1
  83. letta/server/rest_api/interface.py +3 -11
  84. letta/server/rest_api/redis_stream_manager.py +3 -4
  85. letta/server/rest_api/routers/v1/agents.py +143 -84
  86. letta/server/rest_api/routers/v1/blocks.py +1 -1
  87. letta/server/rest_api/routers/v1/folders.py +1 -1
  88. letta/server/rest_api/routers/v1/groups.py +23 -22
  89. letta/server/rest_api/routers/v1/internal_templates.py +68 -0
  90. letta/server/rest_api/routers/v1/sandbox_configs.py +11 -5
  91. letta/server/rest_api/routers/v1/sources.py +1 -1
  92. letta/server/rest_api/routers/v1/tools.py +167 -15
  93. letta/server/rest_api/streaming_response.py +4 -3
  94. letta/server/rest_api/utils.py +75 -18
  95. letta/server/server.py +24 -35
  96. letta/services/agent_manager.py +359 -45
  97. letta/services/agent_serialization_manager.py +23 -3
  98. letta/services/archive_manager.py +72 -3
  99. letta/services/block_manager.py +1 -2
  100. letta/services/context_window_calculator/token_counter.py +11 -6
  101. letta/services/file_manager.py +1 -3
  102. letta/services/files_agents_manager.py +2 -4
  103. letta/services/group_manager.py +73 -12
  104. letta/services/helpers/agent_manager_helper.py +5 -5
  105. letta/services/identity_manager.py +8 -3
  106. letta/services/job_manager.py +2 -14
  107. letta/services/llm_batch_manager.py +1 -3
  108. letta/services/mcp/base_client.py +1 -2
  109. letta/services/mcp_manager.py +5 -6
  110. letta/services/message_manager.py +536 -15
  111. letta/services/organization_manager.py +1 -2
  112. letta/services/passage_manager.py +287 -12
  113. letta/services/provider_manager.py +1 -3
  114. letta/services/sandbox_config_manager.py +12 -7
  115. letta/services/source_manager.py +1 -2
  116. letta/services/step_manager.py +0 -1
  117. letta/services/summarizer/summarizer.py +4 -2
  118. letta/services/telemetry_manager.py +1 -3
  119. letta/services/tool_executor/builtin_tool_executor.py +136 -316
  120. letta/services/tool_executor/core_tool_executor.py +231 -74
  121. letta/services/tool_executor/files_tool_executor.py +2 -2
  122. letta/services/tool_executor/mcp_tool_executor.py +0 -1
  123. letta/services/tool_executor/multi_agent_tool_executor.py +2 -2
  124. letta/services/tool_executor/sandbox_tool_executor.py +0 -1
  125. letta/services/tool_executor/tool_execution_sandbox.py +2 -3
  126. letta/services/tool_manager.py +181 -64
  127. letta/services/tool_sandbox/modal_deployment_manager.py +2 -2
  128. letta/services/user_manager.py +1 -2
  129. letta/settings.py +5 -3
  130. letta/streaming_interface.py +3 -3
  131. letta/system.py +1 -1
  132. letta/utils.py +0 -1
  133. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/METADATA +11 -7
  134. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/RECORD +137 -135
  135. letta/llm_api/deepseek.py +0 -303
  136. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/WHEEL +0 -0
  137. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/entry_points.txt +0 -0
  138. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/licenses/LICENSE +0 -0
@@ -40,7 +40,7 @@ from letta.schemas.letta_message_content import OmittedReasoningContent, Reasoni
40
40
  from letta.schemas.letta_response import LettaResponse
41
41
  from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
42
42
  from letta.schemas.llm_config import LLMConfig
43
- from letta.schemas.message import Message, MessageCreate
43
+ from letta.schemas.message import Message, MessageCreateBase
44
44
  from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
45
45
  from letta.schemas.provider_trace import ProviderTraceCreate
46
46
  from letta.schemas.step import StepProgression
@@ -48,7 +48,7 @@ from letta.schemas.step_metrics import StepMetrics
48
48
  from letta.schemas.tool_execution_result import ToolExecutionResult
49
49
  from letta.schemas.usage import LettaUsageStatistics
50
50
  from letta.schemas.user import User
51
- from letta.server.rest_api.utils import create_letta_messages_from_llm_response
51
+ from letta.server.rest_api.utils import create_approval_request_message_from_llm_response, create_letta_messages_from_llm_response
52
52
  from letta.services.agent_manager import AgentManager
53
53
  from letta.services.block_manager import BlockManager
54
54
  from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema
@@ -164,7 +164,7 @@ class LettaAgent(BaseAgent):
164
164
  @trace_method
165
165
  async def step(
166
166
  self,
167
- input_messages: list[MessageCreate],
167
+ input_messages: list[MessageCreateBase],
168
168
  max_steps: int = DEFAULT_MAX_STEPS,
169
169
  run_id: str | None = None,
170
170
  use_assistant_message: bool = True,
@@ -203,7 +203,7 @@ class LettaAgent(BaseAgent):
203
203
  @trace_method
204
204
  async def step_stream_no_tokens(
205
205
  self,
206
- input_messages: list[MessageCreate],
206
+ input_messages: list[MessageCreateBase],
207
207
  max_steps: int = DEFAULT_MAX_STEPS,
208
208
  use_assistant_message: bool = True,
209
209
  request_start_timestamp_ns: int | None = None,
@@ -218,6 +218,7 @@ class LettaAgent(BaseAgent):
218
218
  input_messages, agent_state, self.message_manager, self.actor
219
219
  )
220
220
  initial_messages = new_in_context_messages
221
+ in_context_messages = current_in_context_messages
221
222
  tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
222
223
  llm_client = LLMClient.create(
223
224
  provider_type=agent_state.llm_config.model_endpoint_type,
@@ -233,137 +234,34 @@ class LettaAgent(BaseAgent):
233
234
  request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
234
235
 
235
236
  for i in range(max_steps):
236
- # Check for job cancellation at the start of each step
237
- if await self._check_run_cancellation():
238
- stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
239
- logger.info(f"Agent execution cancelled for run {self.current_run_id}")
240
- yield f"data: {stop_reason.model_dump_json()}\n\n"
241
- break
242
-
243
- step_id = generate_step_id()
244
- step_start = get_utc_timestamp_ns()
245
- agent_step_span = tracer.start_span("agent_step", start_time=step_start)
246
- agent_step_span.set_attributes({"step_id": step_id})
247
-
248
- step_progression = StepProgression.START
249
- should_continue = False
250
- step_metrics = StepMetrics(id=step_id) # Initialize metrics tracking
251
-
252
- # Create step early with PENDING status
253
- logged_step = await self.step_manager.log_step_async(
254
- actor=self.actor,
255
- agent_id=agent_state.id,
256
- provider_name=agent_state.llm_config.model_endpoint_type,
257
- provider_category=agent_state.llm_config.provider_category or "base",
258
- model=agent_state.llm_config.model,
259
- model_endpoint=agent_state.llm_config.model_endpoint,
260
- context_window_limit=agent_state.llm_config.context_window,
261
- usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
262
- provider_id=None,
263
- job_id=self.current_run_id if self.current_run_id else None,
264
- step_id=step_id,
265
- project_id=agent_state.project_id,
266
- status=StepStatus.PENDING,
267
- )
268
- # Only use step_id in messages if step was actually created
269
- effective_step_id = step_id if logged_step else None
270
-
271
- try:
272
- request_data, response_data, current_in_context_messages, new_in_context_messages, valid_tool_names = (
273
- await self._build_and_request_from_llm(
274
- current_in_context_messages,
275
- new_in_context_messages,
276
- agent_state,
277
- llm_client,
278
- tool_rules_solver,
279
- agent_step_span,
280
- step_metrics,
281
- )
282
- )
283
- in_context_messages = current_in_context_messages + new_in_context_messages
284
-
285
- step_progression = StepProgression.RESPONSE_RECEIVED
286
- log_event("agent.stream_no_tokens.llm_response.received") # [3^]
287
-
288
- response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
289
-
290
- # update usage
291
- usage.step_count += 1
292
- usage.completion_tokens += response.usage.completion_tokens
293
- usage.prompt_tokens += response.usage.prompt_tokens
294
- usage.total_tokens += response.usage.total_tokens
295
- MetricRegistry().message_output_tokens.record(
296
- response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
297
- )
298
-
299
- if not response.choices[0].message.tool_calls:
300
- stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
301
- raise ValueError("No tool calls found in response, model must make a tool call")
302
- tool_call = response.choices[0].message.tool_calls[0]
303
- if response.choices[0].message.reasoning_content:
304
- reasoning = [
305
- ReasoningContent(
306
- reasoning=response.choices[0].message.reasoning_content,
307
- is_native=True,
308
- signature=response.choices[0].message.reasoning_content_signature,
309
- )
310
- ]
311
- elif response.choices[0].message.omitted_reasoning_content:
312
- reasoning = [OmittedReasoningContent()]
313
- elif response.choices[0].message.content:
314
- reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
315
- else:
316
- self.logger.info("No reasoning content found.")
317
- reasoning = None
318
-
237
+ if in_context_messages[-1].role == "approval":
238
+ approval_request_message = in_context_messages[-1]
239
+ step_metrics = await self.step_manager.get_step_metrics_async(step_id=approval_request_message.step_id, actor=self.actor)
319
240
  persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
320
- tool_call,
321
- valid_tool_names,
241
+ approval_request_message.tool_calls[0],
242
+ [], # TODO: update this
322
243
  agent_state,
323
244
  tool_rules_solver,
324
- response.usage,
325
- reasoning_content=reasoning,
326
- step_id=effective_step_id,
245
+ usage,
246
+ reasoning_content=approval_request_message.content,
247
+ step_id=approval_request_message.step_id,
327
248
  initial_messages=initial_messages,
328
- agent_step_span=agent_step_span,
329
249
  is_final_step=(i == max_steps - 1),
330
250
  step_metrics=step_metrics,
251
+ run_id=self.current_run_id,
252
+ is_approval=input_messages[0].approve,
253
+ is_denial=input_messages[0].approve == False,
254
+ denial_reason=input_messages[0].reason,
331
255
  )
332
- step_progression = StepProgression.STEP_LOGGED
333
-
334
- # Update step with actual usage now that we have it (if step was created)
335
- if logged_step:
336
- await self.step_manager.update_step_success_async(self.actor, step_id, response.usage, stop_reason)
337
-
338
- # TODO (cliandy): handle message contexts with larger refactor and dedupe logic
339
256
  new_message_idx = len(initial_messages) if initial_messages else 0
340
257
  self.response_messages.extend(persisted_messages[new_message_idx:])
341
258
  new_in_context_messages.extend(persisted_messages[new_message_idx:])
342
259
  initial_messages = None
343
- log_event("agent.stream_no_tokens.llm_response.processed") # [4^]
344
-
345
- # log step time
346
- now = get_utc_timestamp_ns()
347
- step_ns = now - step_start
348
- agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
349
- agent_step_span.end()
350
-
351
- # Log LLM Trace
352
- if settings.track_provider_trace:
353
- await self.telemetry_manager.create_provider_trace_async(
354
- actor=self.actor,
355
- provider_trace_create=ProviderTraceCreate(
356
- request_json=request_data,
357
- response_json=response_data,
358
- step_id=step_id, # Use original step_id for telemetry
359
- organization_id=self.actor.organization_id,
360
- ),
361
- )
362
- step_progression = StepProgression.LOGGED_TRACE
260
+ in_context_messages = current_in_context_messages + new_in_context_messages
363
261
 
364
262
  # stream step
365
263
  # TODO: improve TTFT
366
- filter_user_messages = [m for m in persisted_messages if m.role != "user"]
264
+ filter_user_messages = [m for m in persisted_messages if m.role != "user" and m.role != "approval"]
367
265
  letta_messages = Message.to_letta_messages_from_list(
368
266
  filter_user_messages, use_assistant_message=use_assistant_message, reverse=False
369
267
  )
@@ -371,104 +269,262 @@ class LettaAgent(BaseAgent):
371
269
  for message in letta_messages:
372
270
  if include_return_message_types is None or message.message_type in include_return_message_types:
373
271
  yield f"data: {message.model_dump_json()}\n\n"
272
+ else:
273
+ # Check for job cancellation at the start of each step
274
+ if await self._check_run_cancellation():
275
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
276
+ logger.info(f"Agent execution cancelled for run {self.current_run_id}")
277
+ yield f"data: {stop_reason.model_dump_json()}\n\n"
278
+ break
279
+
280
+ step_id = generate_step_id()
281
+ step_start = get_utc_timestamp_ns()
282
+ agent_step_span = tracer.start_span("agent_step", start_time=step_start)
283
+ agent_step_span.set_attributes({"step_id": step_id})
284
+
285
+ step_progression = StepProgression.START
286
+ should_continue = False
287
+ step_metrics = StepMetrics(id=step_id) # Initialize metrics tracking
288
+
289
+ # Create step early with PENDING status
290
+ logged_step = await self.step_manager.log_step_async(
291
+ actor=self.actor,
292
+ agent_id=agent_state.id,
293
+ provider_name=agent_state.llm_config.model_endpoint_type,
294
+ provider_category=agent_state.llm_config.provider_category or "base",
295
+ model=agent_state.llm_config.model,
296
+ model_endpoint=agent_state.llm_config.model_endpoint,
297
+ context_window_limit=agent_state.llm_config.context_window,
298
+ usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
299
+ provider_id=None,
300
+ job_id=self.current_run_id if self.current_run_id else None,
301
+ step_id=step_id,
302
+ project_id=agent_state.project_id,
303
+ status=StepStatus.PENDING,
304
+ )
305
+ # Only use step_id in messages if step was actually created
306
+ effective_step_id = step_id if logged_step else None
307
+
308
+ try:
309
+ (
310
+ request_data,
311
+ response_data,
312
+ current_in_context_messages,
313
+ new_in_context_messages,
314
+ valid_tool_names,
315
+ ) = await self._build_and_request_from_llm(
316
+ current_in_context_messages,
317
+ new_in_context_messages,
318
+ agent_state,
319
+ llm_client,
320
+ tool_rules_solver,
321
+ agent_step_span,
322
+ step_metrics,
323
+ )
324
+ in_context_messages = current_in_context_messages + new_in_context_messages
374
325
 
375
- MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
376
- step_progression = StepProgression.FINISHED
326
+ step_progression = StepProgression.RESPONSE_RECEIVED
327
+ log_event("agent.stream_no_tokens.llm_response.received") # [3^]
377
328
 
378
- # Record step metrics for successful completion
379
- if logged_step and step_metrics:
380
- # Set the step_ns that was already calculated
381
- step_metrics.step_ns = step_ns
382
- await self._record_step_metrics(
383
- step_id=step_id,
384
- agent_state=agent_state,
385
- step_metrics=step_metrics,
329
+ try:
330
+ response = llm_client.convert_response_to_chat_completion(
331
+ response_data, in_context_messages, agent_state.llm_config
332
+ )
333
+ except ValueError as e:
334
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response.value)
335
+ raise e
336
+
337
+ # update usage
338
+ usage.step_count += 1
339
+ usage.completion_tokens += response.usage.completion_tokens
340
+ usage.prompt_tokens += response.usage.prompt_tokens
341
+ usage.total_tokens += response.usage.total_tokens
342
+ MetricRegistry().message_output_tokens.record(
343
+ response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
386
344
  )
387
345
 
388
- except Exception as e:
389
- # Handle any unexpected errors during step processing
390
- self.logger.error(f"Error during step processing: {e}")
391
- job_update_metadata = {"error": str(e)}
392
-
393
- # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
394
- if not stop_reason:
395
- stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
396
- elif stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
397
- self.logger.error("Error occurred during step processing, with valid stop reason: %s", stop_reason.stop_reason)
398
- elif stop_reason.stop_reason not in (StopReasonType.no_tool_call, StopReasonType.invalid_tool_call):
399
- raise ValueError(f"Invalid Stop Reason: {stop_reason}")
400
-
401
- # Send error stop reason to client and re-raise
402
- yield f"data: {stop_reason.model_dump_json()}\n\n", 500
403
- raise
404
-
405
- # Update step if it needs to be updated
406
- finally:
407
- if step_progression == StepProgression.FINISHED and should_continue:
408
- continue
409
-
410
- self.logger.debug("Running cleanup for agent loop run: %s", self.current_run_id)
411
- self.logger.info("Running final update. Step Progression: %s", step_progression)
412
- try:
413
- if step_progression == StepProgression.FINISHED and not should_continue:
414
- # Successfully completed - update with final usage and stop reason
415
- if stop_reason is None:
416
- stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
417
- # Note: step already updated with success status after _handle_ai_response
418
- if logged_step:
419
- await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
420
- break
421
-
422
- # Handle error cases
423
- if step_progression < StepProgression.STEP_LOGGED:
424
- # Error occurred before step was fully logged
425
- import traceback
426
-
427
- if logged_step:
428
- await self.step_manager.update_step_error_async(
429
- actor=self.actor,
430
- step_id=step_id, # Use original step_id for telemetry
431
- error_type=type(e).__name__ if "e" in locals() else "Unknown",
432
- error_message=str(e) if "e" in locals() else "Unknown error",
433
- error_traceback=traceback.format_exc(),
434
- stop_reason=stop_reason,
346
+ if not response.choices[0].message.tool_calls:
347
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
348
+ raise ValueError("No tool calls found in response, model must make a tool call")
349
+ tool_call = response.choices[0].message.tool_calls[0]
350
+ if response.choices[0].message.reasoning_content:
351
+ reasoning = [
352
+ ReasoningContent(
353
+ reasoning=response.choices[0].message.reasoning_content,
354
+ is_native=True,
355
+ signature=response.choices[0].message.reasoning_content_signature,
435
356
  )
436
-
437
- if step_progression <= StepProgression.RESPONSE_RECEIVED:
438
- # TODO (cliandy): persist response if we get it back
439
- if settings.track_errored_messages and initial_messages:
440
- for message in initial_messages:
441
- message.is_err = True
442
- message.step_id = effective_step_id
443
- await self.message_manager.create_many_messages_async(initial_messages, actor=self.actor)
444
- elif step_progression <= StepProgression.LOGGED_TRACE:
445
- if stop_reason is None:
446
- self.logger.error("Error in step after logging step")
447
- stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
448
- if logged_step:
449
- await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
357
+ ]
358
+ elif response.choices[0].message.omitted_reasoning_content:
359
+ reasoning = [OmittedReasoningContent()]
360
+ elif response.choices[0].message.content:
361
+ reasoning = [
362
+ TextContent(text=response.choices[0].message.content)
363
+ ] # reasoning placed into content for legacy reasons
450
364
  else:
451
- self.logger.error("Invalid StepProgression value")
365
+ self.logger.info("No reasoning content found.")
366
+ reasoning = None
452
367
 
453
- if settings.track_stop_reason:
454
- await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
368
+ persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
369
+ tool_call,
370
+ valid_tool_names,
371
+ agent_state,
372
+ tool_rules_solver,
373
+ response.usage,
374
+ reasoning_content=reasoning,
375
+ step_id=effective_step_id,
376
+ initial_messages=initial_messages,
377
+ agent_step_span=agent_step_span,
378
+ is_final_step=(i == max_steps - 1),
379
+ step_metrics=step_metrics,
380
+ )
381
+ step_progression = StepProgression.STEP_LOGGED
382
+
383
+ # Update step with actual usage now that we have it (if step was created)
384
+ if logged_step:
385
+ await self.step_manager.update_step_success_async(self.actor, step_id, response.usage, stop_reason)
386
+
387
+ # TODO (cliandy): handle message contexts with larger refactor and dedupe logic
388
+ new_message_idx = len(initial_messages) if initial_messages else 0
389
+ self.response_messages.extend(persisted_messages[new_message_idx:])
390
+ new_in_context_messages.extend(persisted_messages[new_message_idx:])
391
+ initial_messages = None
392
+ log_event("agent.stream_no_tokens.llm_response.processed") # [4^]
393
+
394
+ # log step time
395
+ now = get_utc_timestamp_ns()
396
+ step_ns = now - step_start
397
+ agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
398
+ agent_step_span.end()
399
+
400
+ # Log LLM Trace
401
+ if settings.track_provider_trace:
402
+ await self.telemetry_manager.create_provider_trace_async(
403
+ actor=self.actor,
404
+ provider_trace_create=ProviderTraceCreate(
405
+ request_json=request_data,
406
+ response_json=response_data,
407
+ step_id=step_id, # Use original step_id for telemetry
408
+ organization_id=self.actor.organization_id,
409
+ ),
410
+ )
411
+ step_progression = StepProgression.LOGGED_TRACE
412
+
413
+ # stream step
414
+ # TODO: improve TTFT
415
+ filter_user_messages = [m for m in persisted_messages if m.role != "user"]
416
+ letta_messages = Message.to_letta_messages_from_list(
417
+ filter_user_messages, use_assistant_message=use_assistant_message, reverse=False
418
+ )
419
+ letta_messages = [m for m in letta_messages if m.message_type != "approval_response_message"]
420
+
421
+ for message in letta_messages:
422
+ if include_return_message_types is None or message.message_type in include_return_message_types:
423
+ yield f"data: {message.model_dump_json()}\n\n"
424
+
425
+ MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
426
+ step_progression = StepProgression.FINISHED
455
427
 
456
- # Record partial step metrics on failure (capture whatever timing data we have)
457
- if logged_step and step_metrics and step_progression < StepProgression.FINISHED:
458
- # Calculate total step time up to the failure point
459
- step_metrics.step_ns = get_utc_timestamp_ns() - step_start
428
+ # Record step metrics for successful completion
429
+ if logged_step and step_metrics:
430
+ # Set the step_ns that was already calculated
431
+ step_metrics.step_ns = step_ns
460
432
  await self._record_step_metrics(
461
433
  step_id=step_id,
462
434
  agent_state=agent_state,
463
435
  step_metrics=step_metrics,
464
- job_id=locals().get("run_id", self.current_run_id),
465
436
  )
466
437
 
467
438
  except Exception as e:
468
- self.logger.error("Failed to update step: %s", e)
439
+ # Handle any unexpected errors during step processing
440
+ self.logger.error(f"Error during step processing: {e}")
441
+ job_update_metadata = {"error": str(e)}
442
+
443
+ # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
444
+ if not stop_reason:
445
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
446
+ elif stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
447
+ self.logger.error("Error occurred during step processing, with valid stop reason: %s", stop_reason.stop_reason)
448
+ elif stop_reason.stop_reason not in (
449
+ StopReasonType.no_tool_call,
450
+ StopReasonType.invalid_tool_call,
451
+ StopReasonType.invalid_llm_response,
452
+ ):
453
+ self.logger.error("Error occurred during step processing, with unexpected stop reason: %s", stop_reason.stop_reason)
454
+
455
+ # Send error stop reason to client and re-raise
456
+ yield f"data: {stop_reason.model_dump_json()}\n\n", 500
457
+ raise
469
458
 
470
- if not should_continue:
471
- break
459
+ # Update step if it needs to be updated
460
+ finally:
461
+ if step_progression == StepProgression.FINISHED and should_continue:
462
+ continue
463
+
464
+ self.logger.debug("Running cleanup for agent loop run: %s", self.current_run_id)
465
+ self.logger.info("Running final update. Step Progression: %s", step_progression)
466
+ try:
467
+ if step_progression == StepProgression.FINISHED and not should_continue:
468
+ # Successfully completed - update with final usage and stop reason
469
+ if stop_reason is None:
470
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
471
+ # Note: step already updated with success status after _handle_ai_response
472
+ if logged_step:
473
+ await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
474
+ break
475
+
476
+ # Handle error cases
477
+ if step_progression < StepProgression.STEP_LOGGED:
478
+ # Error occurred before step was fully logged
479
+ import traceback
480
+
481
+ if logged_step:
482
+ await self.step_manager.update_step_error_async(
483
+ actor=self.actor,
484
+ step_id=step_id, # Use original step_id for telemetry
485
+ error_type=type(e).__name__ if "e" in locals() else "Unknown",
486
+ error_message=str(e) if "e" in locals() else "Unknown error",
487
+ error_traceback=traceback.format_exc(),
488
+ stop_reason=stop_reason,
489
+ )
490
+
491
+ if step_progression <= StepProgression.RESPONSE_RECEIVED:
492
+ # TODO (cliandy): persist response if we get it back
493
+ if settings.track_errored_messages and initial_messages:
494
+ for message in initial_messages:
495
+ message.is_err = True
496
+ message.step_id = effective_step_id
497
+ await self.message_manager.create_many_messages_async(
498
+ initial_messages, actor=self.actor, embedding_config=agent_state.embedding_config
499
+ )
500
+ elif step_progression <= StepProgression.LOGGED_TRACE:
501
+ if stop_reason is None:
502
+ self.logger.error("Error in step after logging step")
503
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
504
+ if logged_step:
505
+ await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
506
+ else:
507
+ self.logger.error("Invalid StepProgression value")
508
+
509
+ if settings.track_stop_reason:
510
+ await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
511
+
512
+ # Record partial step metrics on failure (capture whatever timing data we have)
513
+ if logged_step and step_metrics and step_progression < StepProgression.FINISHED:
514
+ # Calculate total step time up to the failure point
515
+ step_metrics.step_ns = get_utc_timestamp_ns() - step_start
516
+ await self._record_step_metrics(
517
+ step_id=step_id,
518
+ agent_state=agent_state,
519
+ step_metrics=step_metrics,
520
+ job_id=locals().get("run_id", self.current_run_id),
521
+ )
522
+
523
+ except Exception as e:
524
+ self.logger.error("Failed to update step: %s", e)
525
+
526
+ if not should_continue:
527
+ break
472
528
 
473
529
  # Extend the in context message ids
474
530
  if not agent_state.message_buffer_autoclear:
@@ -489,7 +545,7 @@ class LettaAgent(BaseAgent):
489
545
  async def _step(
490
546
  self,
491
547
  agent_state: AgentState,
492
- input_messages: list[MessageCreate],
548
+ input_messages: list[MessageCreateBase],
493
549
  max_steps: int = DEFAULT_MAX_STEPS,
494
550
  run_id: str | None = None,
495
551
  request_start_timestamp_ns: int | None = None,
@@ -506,6 +562,7 @@ class LettaAgent(BaseAgent):
506
562
  input_messages, agent_state, self.message_manager, self.actor
507
563
  )
508
564
  initial_messages = new_in_context_messages
565
+ in_context_messages = current_in_context_messages
509
566
  tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
510
567
  llm_client = LLMClient.create(
511
568
  provider_type=agent_state.llm_config.model_endpoint_type,
@@ -521,53 +578,83 @@ class LettaAgent(BaseAgent):
521
578
  job_update_metadata = None
522
579
  usage = LettaUsageStatistics()
523
580
  for i in range(max_steps):
524
- # If dry run, build request data and return it without making LLM call
525
- if dry_run:
526
- request_data, valid_tool_names = await self._create_llm_request_data_async(
527
- llm_client=llm_client,
528
- in_context_messages=current_in_context_messages + new_in_context_messages,
529
- agent_state=agent_state,
530
- tool_rules_solver=tool_rules_solver,
581
+ if in_context_messages[-1].role == "approval":
582
+ approval_request_message = in_context_messages[-1]
583
+ step_metrics = await self.step_manager.get_step_metrics_async(step_id=approval_request_message.step_id, actor=self.actor)
584
+ persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
585
+ approval_request_message.tool_calls[0],
586
+ [], # TODO: update this
587
+ agent_state,
588
+ tool_rules_solver,
589
+ usage,
590
+ reasoning_content=approval_request_message.content,
591
+ step_id=approval_request_message.step_id,
592
+ initial_messages=initial_messages,
593
+ is_final_step=(i == max_steps - 1),
594
+ step_metrics=step_metrics,
595
+ run_id=run_id or self.current_run_id,
596
+ is_approval=input_messages[0].approve,
597
+ is_denial=input_messages[0].approve == False,
598
+ denial_reason=input_messages[0].reason,
531
599
  )
532
- return request_data
600
+ new_message_idx = len(initial_messages) if initial_messages else 0
601
+ self.response_messages.extend(persisted_messages[new_message_idx:])
602
+ new_in_context_messages.extend(persisted_messages[new_message_idx:])
603
+ initial_messages = None
604
+ in_context_messages = current_in_context_messages + new_in_context_messages
605
+ else:
606
+ # If dry run, build request data and return it without making LLM call
607
+ if dry_run:
608
+ request_data, valid_tool_names = await self._create_llm_request_data_async(
609
+ llm_client=llm_client,
610
+ in_context_messages=current_in_context_messages + new_in_context_messages,
611
+ agent_state=agent_state,
612
+ tool_rules_solver=tool_rules_solver,
613
+ )
614
+ return request_data
533
615
 
534
- # Check for job cancellation at the start of each step
535
- if await self._check_run_cancellation():
536
- stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
537
- logger.info(f"Agent execution cancelled for run {self.current_run_id}")
538
- break
616
+ # Check for job cancellation at the start of each step
617
+ if await self._check_run_cancellation():
618
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
619
+ logger.info(f"Agent execution cancelled for run {self.current_run_id}")
620
+ break
539
621
 
540
- step_id = generate_step_id()
541
- step_start = get_utc_timestamp_ns()
542
- agent_step_span = tracer.start_span("agent_step", start_time=step_start)
543
- agent_step_span.set_attributes({"step_id": step_id})
622
+ step_id = generate_step_id()
623
+ step_start = get_utc_timestamp_ns()
624
+ agent_step_span = tracer.start_span("agent_step", start_time=step_start)
625
+ agent_step_span.set_attributes({"step_id": step_id})
544
626
 
545
- step_progression = StepProgression.START
546
- should_continue = False
547
- step_metrics = StepMetrics(id=step_id) # Initialize metrics tracking
627
+ step_progression = StepProgression.START
628
+ should_continue = False
629
+ step_metrics = StepMetrics(id=step_id) # Initialize metrics tracking
548
630
 
549
- # Create step early with PENDING status
550
- logged_step = await self.step_manager.log_step_async(
551
- actor=self.actor,
552
- agent_id=agent_state.id,
553
- provider_name=agent_state.llm_config.model_endpoint_type,
554
- provider_category=agent_state.llm_config.provider_category or "base",
555
- model=agent_state.llm_config.model,
556
- model_endpoint=agent_state.llm_config.model_endpoint,
557
- context_window_limit=agent_state.llm_config.context_window,
558
- usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
559
- provider_id=None,
560
- job_id=run_id if run_id else self.current_run_id,
561
- step_id=step_id,
562
- project_id=agent_state.project_id,
563
- status=StepStatus.PENDING,
564
- )
565
- # Only use step_id in messages if step was actually created
566
- effective_step_id = step_id if logged_step else None
631
+ # Create step early with PENDING status
632
+ logged_step = await self.step_manager.log_step_async(
633
+ actor=self.actor,
634
+ agent_id=agent_state.id,
635
+ provider_name=agent_state.llm_config.model_endpoint_type,
636
+ provider_category=agent_state.llm_config.provider_category or "base",
637
+ model=agent_state.llm_config.model,
638
+ model_endpoint=agent_state.llm_config.model_endpoint,
639
+ context_window_limit=agent_state.llm_config.context_window,
640
+ usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
641
+ provider_id=None,
642
+ job_id=run_id if run_id else self.current_run_id,
643
+ step_id=step_id,
644
+ project_id=agent_state.project_id,
645
+ status=StepStatus.PENDING,
646
+ )
647
+ # Only use step_id in messages if step was actually created
648
+ effective_step_id = step_id if logged_step else None
567
649
 
568
- try:
569
- request_data, response_data, current_in_context_messages, new_in_context_messages, valid_tool_names = (
570
- await self._build_and_request_from_llm(
650
+ try:
651
+ (
652
+ request_data,
653
+ response_data,
654
+ current_in_context_messages,
655
+ new_in_context_messages,
656
+ valid_tool_names,
657
+ ) = await self._build_and_request_from_llm(
571
658
  current_in_context_messages,
572
659
  new_in_context_messages,
573
660
  agent_state,
@@ -576,180 +663,193 @@ class LettaAgent(BaseAgent):
576
663
  agent_step_span,
577
664
  step_metrics,
578
665
  )
579
- )
580
- in_context_messages = current_in_context_messages + new_in_context_messages
581
-
582
- step_progression = StepProgression.RESPONSE_RECEIVED
583
- log_event("agent.step.llm_response.received") # [3^]
584
-
585
- response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
666
+ in_context_messages = current_in_context_messages + new_in_context_messages
586
667
 
587
- usage.step_count += 1
588
- usage.completion_tokens += response.usage.completion_tokens
589
- usage.prompt_tokens += response.usage.prompt_tokens
590
- usage.total_tokens += response.usage.total_tokens
591
- usage.run_ids = [run_id] if run_id else None
592
- MetricRegistry().message_output_tokens.record(
593
- response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
594
- )
668
+ step_progression = StepProgression.RESPONSE_RECEIVED
669
+ log_event("agent.step.llm_response.received") # [3^]
595
670
 
596
- if not response.choices[0].message.tool_calls:
597
- stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
598
- raise ValueError("No tool calls found in response, model must make a tool call")
599
- tool_call = response.choices[0].message.tool_calls[0]
600
- if response.choices[0].message.reasoning_content:
601
- reasoning = [
602
- ReasoningContent(
603
- reasoning=response.choices[0].message.reasoning_content,
604
- is_native=True,
605
- signature=response.choices[0].message.reasoning_content_signature,
671
+ try:
672
+ response = llm_client.convert_response_to_chat_completion(
673
+ response_data, in_context_messages, agent_state.llm_config
606
674
  )
607
- ]
608
- elif response.choices[0].message.content:
609
- reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
610
- elif response.choices[0].message.omitted_reasoning_content:
611
- reasoning = [OmittedReasoningContent()]
612
- else:
613
- self.logger.info("No reasoning content found.")
614
- reasoning = None
615
-
616
- persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
617
- tool_call,
618
- valid_tool_names,
619
- agent_state,
620
- tool_rules_solver,
621
- response.usage,
622
- reasoning_content=reasoning,
623
- step_id=effective_step_id,
624
- initial_messages=initial_messages,
625
- agent_step_span=agent_step_span,
626
- is_final_step=(i == max_steps - 1),
627
- run_id=run_id,
628
- step_metrics=step_metrics,
629
- )
630
- step_progression = StepProgression.STEP_LOGGED
631
-
632
- # Update step with actual usage now that we have it (if step was created)
633
- if logged_step:
634
- await self.step_manager.update_step_success_async(self.actor, step_id, response.usage, stop_reason)
635
-
636
- new_message_idx = len(initial_messages) if initial_messages else 0
637
- self.response_messages.extend(persisted_messages[new_message_idx:])
638
- new_in_context_messages.extend(persisted_messages[new_message_idx:])
639
-
640
- initial_messages = None
641
- log_event("agent.step.llm_response.processed") # [4^]
642
-
643
- # log step time
644
- now = get_utc_timestamp_ns()
645
- step_ns = now - step_start
646
- agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
647
- agent_step_span.end()
648
-
649
- # Log LLM Trace
650
- if settings.track_provider_trace:
651
- await self.telemetry_manager.create_provider_trace_async(
652
- actor=self.actor,
653
- provider_trace_create=ProviderTraceCreate(
654
- request_json=request_data,
655
- response_json=response_data,
656
- step_id=step_id, # Use original step_id for telemetry
657
- organization_id=self.actor.organization_id,
658
- ),
675
+ except ValueError as e:
676
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response.value)
677
+ raise e
678
+
679
+ usage.step_count += 1
680
+ usage.completion_tokens += response.usage.completion_tokens
681
+ usage.prompt_tokens += response.usage.prompt_tokens
682
+ usage.total_tokens += response.usage.total_tokens
683
+ usage.run_ids = [run_id] if run_id else None
684
+ MetricRegistry().message_output_tokens.record(
685
+ response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
659
686
  )
660
- step_progression = StepProgression.LOGGED_TRACE
661
687
 
662
- MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
663
- step_progression = StepProgression.FINISHED
688
+ if not response.choices[0].message.tool_calls:
689
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
690
+ raise ValueError("No tool calls found in response, model must make a tool call")
691
+ tool_call = response.choices[0].message.tool_calls[0]
692
+ if response.choices[0].message.reasoning_content:
693
+ reasoning = [
694
+ ReasoningContent(
695
+ reasoning=response.choices[0].message.reasoning_content,
696
+ is_native=True,
697
+ signature=response.choices[0].message.reasoning_content_signature,
698
+ )
699
+ ]
700
+ elif response.choices[0].message.content:
701
+ reasoning = [
702
+ TextContent(text=response.choices[0].message.content)
703
+ ] # reasoning placed into content for legacy reasons
704
+ elif response.choices[0].message.omitted_reasoning_content:
705
+ reasoning = [OmittedReasoningContent()]
706
+ else:
707
+ self.logger.info("No reasoning content found.")
708
+ reasoning = None
664
709
 
665
- # Record step metrics for successful completion
666
- if logged_step and step_metrics:
667
- # Set the step_ns that was already calculated
668
- step_metrics.step_ns = step_ns
669
- await self._record_step_metrics(
670
- step_id=step_id,
671
- agent_state=agent_state,
710
+ persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
711
+ tool_call,
712
+ valid_tool_names,
713
+ agent_state,
714
+ tool_rules_solver,
715
+ response.usage,
716
+ reasoning_content=reasoning,
717
+ step_id=effective_step_id,
718
+ initial_messages=initial_messages,
719
+ agent_step_span=agent_step_span,
720
+ is_final_step=(i == max_steps - 1),
721
+ run_id=run_id,
672
722
  step_metrics=step_metrics,
673
- job_id=run_id if run_id else self.current_run_id,
674
723
  )
675
-
676
- except Exception as e:
677
- # Handle any unexpected errors during step processing
678
- self.logger.error(f"Error during step processing: {e}")
679
- job_update_metadata = {"error": str(e)}
680
-
681
- # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
682
- if not stop_reason:
683
- stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
684
- elif stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
685
- self.logger.error("Error occurred during step processing, with valid stop reason: %s", stop_reason.stop_reason)
686
- elif stop_reason.stop_reason not in (StopReasonType.no_tool_call, StopReasonType.invalid_tool_call):
687
- raise ValueError(f"Invalid Stop Reason: {stop_reason}")
688
- raise
689
-
690
- # Update step if it needs to be updated
691
- finally:
692
- if step_progression == StepProgression.FINISHED and should_continue:
693
- continue
694
-
695
- self.logger.debug("Running cleanup for agent loop run: %s", self.current_run_id)
696
- self.logger.info("Running final update. Step Progression: %s", step_progression)
697
- try:
698
- if step_progression == StepProgression.FINISHED and not should_continue:
699
- # Successfully completed - update with final usage and stop reason
700
- if stop_reason is None:
701
- stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
702
- if logged_step:
703
- await self.step_manager.update_step_success_async(self.actor, step_id, usage, stop_reason)
704
- break
705
-
706
- # Handle error cases
707
- if step_progression < StepProgression.STEP_LOGGED:
708
- # Error occurred before step was fully logged
709
- import traceback
710
-
711
- if logged_step:
712
- await self.step_manager.update_step_error_async(
713
- actor=self.actor,
724
+ step_progression = StepProgression.STEP_LOGGED
725
+
726
+ # Update step with actual usage now that we have it (if step was created)
727
+ if logged_step:
728
+ await self.step_manager.update_step_success_async(self.actor, step_id, response.usage, stop_reason)
729
+
730
+ new_message_idx = len(initial_messages) if initial_messages else 0
731
+ self.response_messages.extend(persisted_messages[new_message_idx:])
732
+ new_in_context_messages.extend(persisted_messages[new_message_idx:])
733
+
734
+ initial_messages = None
735
+ log_event("agent.step.llm_response.processed") # [4^]
736
+
737
+ # log step time
738
+ now = get_utc_timestamp_ns()
739
+ step_ns = now - step_start
740
+ agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
741
+ agent_step_span.end()
742
+
743
+ # Log LLM Trace
744
+ if settings.track_provider_trace:
745
+ await self.telemetry_manager.create_provider_trace_async(
746
+ actor=self.actor,
747
+ provider_trace_create=ProviderTraceCreate(
748
+ request_json=request_data,
749
+ response_json=response_data,
714
750
  step_id=step_id, # Use original step_id for telemetry
715
- error_type=type(e).__name__ if "e" in locals() else "Unknown",
716
- error_message=str(e) if "e" in locals() else "Unknown error",
717
- error_traceback=traceback.format_exc(),
718
- stop_reason=stop_reason,
719
- )
720
-
721
- if step_progression <= StepProgression.RESPONSE_RECEIVED:
722
- # TODO (cliandy): persist response if we get it back
723
- if settings.track_errored_messages and initial_messages:
724
- for message in initial_messages:
725
- message.is_err = True
726
- message.step_id = effective_step_id
727
- await self.message_manager.create_many_messages_async(initial_messages, actor=self.actor)
728
- elif step_progression <= StepProgression.LOGGED_TRACE:
729
- if stop_reason is None:
730
- self.logger.error("Error in step after logging step")
731
- stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
732
- if logged_step:
733
- await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
734
- else:
735
- self.logger.error("Invalid StepProgression value")
751
+ organization_id=self.actor.organization_id,
752
+ ),
753
+ )
754
+ step_progression = StepProgression.LOGGED_TRACE
736
755
 
737
- if settings.track_stop_reason:
738
- await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
756
+ MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
757
+ step_progression = StepProgression.FINISHED
739
758
 
740
- # Record partial step metrics on failure (capture whatever timing data we have)
741
- if logged_step and step_metrics and step_progression < StepProgression.FINISHED:
742
- # Calculate total step time up to the failure point
743
- step_metrics.step_ns = get_utc_timestamp_ns() - step_start
759
+ # Record step metrics for successful completion
760
+ if logged_step and step_metrics:
761
+ # Set the step_ns that was already calculated
762
+ step_metrics.step_ns = step_ns
744
763
  await self._record_step_metrics(
745
764
  step_id=step_id,
746
765
  agent_state=agent_state,
747
766
  step_metrics=step_metrics,
748
- job_id=locals().get("run_id", self.current_run_id),
767
+ job_id=run_id if run_id else self.current_run_id,
749
768
  )
750
769
 
751
770
  except Exception as e:
752
- self.logger.error("Failed to update step: %s", e)
771
+ # Handle any unexpected errors during step processing
772
+ self.logger.error(f"Error during step processing: {e}")
773
+ job_update_metadata = {"error": str(e)}
774
+
775
+ # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
776
+ if not stop_reason:
777
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
778
+ elif stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
779
+ self.logger.error("Error occurred during step processing, with valid stop reason: %s", stop_reason.stop_reason)
780
+ elif stop_reason.stop_reason not in (
781
+ StopReasonType.no_tool_call,
782
+ StopReasonType.invalid_tool_call,
783
+ StopReasonType.invalid_llm_response,
784
+ ):
785
+ self.logger.error("Error occurred during step processing, with unexpected stop reason: %s", stop_reason.stop_reason)
786
+ raise
787
+
788
+ # Update step if it needs to be updated
789
+ finally:
790
+ if step_progression == StepProgression.FINISHED and should_continue:
791
+ continue
792
+
793
+ self.logger.debug("Running cleanup for agent loop run: %s", self.current_run_id)
794
+ self.logger.info("Running final update. Step Progression: %s", step_progression)
795
+ try:
796
+ if step_progression == StepProgression.FINISHED and not should_continue:
797
+ # Successfully completed - update with final usage and stop reason
798
+ if stop_reason is None:
799
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
800
+ if logged_step:
801
+ await self.step_manager.update_step_success_async(self.actor, step_id, usage, stop_reason)
802
+ break
803
+
804
+ # Handle error cases
805
+ if step_progression < StepProgression.STEP_LOGGED:
806
+ # Error occurred before step was fully logged
807
+ import traceback
808
+
809
+ if logged_step:
810
+ await self.step_manager.update_step_error_async(
811
+ actor=self.actor,
812
+ step_id=step_id, # Use original step_id for telemetry
813
+ error_type=type(e).__name__ if "e" in locals() else "Unknown",
814
+ error_message=str(e) if "e" in locals() else "Unknown error",
815
+ error_traceback=traceback.format_exc(),
816
+ stop_reason=stop_reason,
817
+ )
818
+
819
+ if step_progression <= StepProgression.RESPONSE_RECEIVED:
820
+ # TODO (cliandy): persist response if we get it back
821
+ if settings.track_errored_messages and initial_messages:
822
+ for message in initial_messages:
823
+ message.is_err = True
824
+ message.step_id = effective_step_id
825
+ await self.message_manager.create_many_messages_async(
826
+ initial_messages, actor=self.actor, embedding_config=agent_state.embedding_config
827
+ )
828
+ elif step_progression <= StepProgression.LOGGED_TRACE:
829
+ if stop_reason is None:
830
+ self.logger.error("Error in step after logging step")
831
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
832
+ if logged_step:
833
+ await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
834
+ else:
835
+ self.logger.error("Invalid StepProgression value")
836
+
837
+ if settings.track_stop_reason:
838
+ await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
839
+
840
+ # Record partial step metrics on failure (capture whatever timing data we have)
841
+ if logged_step and step_metrics and step_progression < StepProgression.FINISHED:
842
+ # Calculate total step time up to the failure point
843
+ step_metrics.step_ns = get_utc_timestamp_ns() - step_start
844
+ await self._record_step_metrics(
845
+ step_id=step_id,
846
+ agent_state=agent_state,
847
+ step_metrics=step_metrics,
848
+ job_id=locals().get("run_id", self.current_run_id),
849
+ )
850
+
851
+ except Exception as e:
852
+ self.logger.error("Failed to update step: %s", e)
753
853
 
754
854
  if not should_continue:
755
855
  break
@@ -783,7 +883,7 @@ class LettaAgent(BaseAgent):
783
883
  @trace_method
784
884
  async def step_stream(
785
885
  self,
786
- input_messages: list[MessageCreate],
886
+ input_messages: list[MessageCreateBase],
787
887
  max_steps: int = DEFAULT_MAX_STEPS,
788
888
  use_assistant_message: bool = True,
789
889
  request_start_timestamp_ns: int | None = None,
@@ -806,6 +906,7 @@ class LettaAgent(BaseAgent):
806
906
  input_messages, agent_state, self.message_manager, self.actor
807
907
  )
808
908
  initial_messages = new_in_context_messages
909
+ in_context_messages = current_in_context_messages
809
910
 
810
911
  tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
811
912
  llm_client = LLMClient.create(
@@ -822,219 +923,30 @@ class LettaAgent(BaseAgent):
822
923
  request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
823
924
 
824
925
  for i in range(max_steps):
825
- step_id = generate_step_id()
826
- # Check for job cancellation at the start of each step
827
- if await self._check_run_cancellation():
828
- stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
829
- logger.info(f"Agent execution cancelled for run {self.current_run_id}")
830
- yield f"data: {stop_reason.model_dump_json()}\n\n"
831
- break
832
-
833
- step_start = get_utc_timestamp_ns()
834
- agent_step_span = tracer.start_span("agent_step", start_time=step_start)
835
- agent_step_span.set_attributes({"step_id": step_id})
836
-
837
- step_progression = StepProgression.START
838
- should_continue = False
839
- step_metrics = StepMetrics(id=step_id) # Initialize metrics tracking
840
-
841
- # Create step early with PENDING status
842
- logged_step = await self.step_manager.log_step_async(
843
- actor=self.actor,
844
- agent_id=agent_state.id,
845
- provider_name=agent_state.llm_config.model_endpoint_type,
846
- provider_category=agent_state.llm_config.provider_category or "base",
847
- model=agent_state.llm_config.model,
848
- model_endpoint=agent_state.llm_config.model_endpoint,
849
- context_window_limit=agent_state.llm_config.context_window,
850
- usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
851
- provider_id=None,
852
- job_id=self.current_run_id if self.current_run_id else None,
853
- step_id=step_id,
854
- project_id=agent_state.project_id,
855
- status=StepStatus.PENDING,
856
- )
857
- # Only use step_id in messages if step was actually created
858
- effective_step_id = step_id if logged_step else None
859
-
860
- try:
861
- (
862
- request_data,
863
- stream,
864
- current_in_context_messages,
865
- new_in_context_messages,
866
- valid_tool_names,
867
- provider_request_start_timestamp_ns,
868
- ) = await self._build_and_request_from_llm_streaming(
869
- first_chunk,
870
- agent_step_span,
871
- request_start_timestamp_ns,
872
- current_in_context_messages,
873
- new_in_context_messages,
874
- agent_state,
875
- llm_client,
876
- tool_rules_solver,
877
- )
878
-
879
- step_progression = StepProgression.STREAM_RECEIVED
880
- log_event("agent.stream.llm_response.received") # [3^]
881
-
882
- # TODO: THIS IS INCREDIBLY UGLY
883
- # TODO: THERE ARE MULTIPLE COPIES OF THE LLM_CONFIG EVERYWHERE THAT ARE GETTING MANIPULATED
884
- if agent_state.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock]:
885
- interface = AnthropicStreamingInterface(
886
- use_assistant_message=use_assistant_message,
887
- put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
888
- )
889
- elif agent_state.llm_config.model_endpoint_type == ProviderType.openai:
890
- interface = OpenAIStreamingInterface(
891
- use_assistant_message=use_assistant_message,
892
- is_openai_proxy=agent_state.llm_config.provider_name == "lmstudio_openai",
893
- messages=current_in_context_messages + new_in_context_messages,
894
- tools=request_data.get("tools", []),
895
- )
896
- else:
897
- raise ValueError(f"Streaming not supported for {agent_state.llm_config}")
898
-
899
- async for chunk in interface.process(
900
- stream,
901
- ttft_span=request_span,
902
- ):
903
- # Measure TTFT (trace, metric, and db). This should be consolidated.
904
- if first_chunk and request_span is not None:
905
- now = get_utc_timestamp_ns()
906
- ttft_ns = now - request_start_timestamp_ns
907
-
908
- request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ns_to_ms(ttft_ns)})
909
- metric_attributes = get_ctx_attributes()
910
- metric_attributes["model.name"] = agent_state.llm_config.model
911
- MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
912
-
913
- if self.current_run_id and self.job_manager:
914
- await self.job_manager.record_ttft(self.current_run_id, ttft_ns, self.actor)
915
-
916
- first_chunk = False
917
-
918
- if include_return_message_types is None or chunk.message_type in include_return_message_types:
919
- # filter down returned data
920
- yield f"data: {chunk.model_dump_json()}\n\n"
921
-
922
- stream_end_time_ns = get_utc_timestamp_ns()
923
-
924
- # Some providers that rely on the OpenAI client currently e.g. LMStudio don't get usage metrics back on the last streaming chunk, fall back to manual values
925
- if isinstance(interface, OpenAIStreamingInterface) and not interface.input_tokens and not interface.output_tokens:
926
- logger.warning(
927
- f"No token usage metrics received from OpenAI streaming interface for {agent_state.llm_config.model}, falling back to estimated values. Input tokens: {interface.fallback_input_tokens}, Output tokens: {interface.fallback_output_tokens}"
928
- )
929
- interface.input_tokens = interface.fallback_input_tokens
930
- interface.output_tokens = interface.fallback_output_tokens
931
-
932
- usage.step_count += 1
933
- usage.completion_tokens += interface.output_tokens
934
- usage.prompt_tokens += interface.input_tokens
935
- usage.total_tokens += interface.input_tokens + interface.output_tokens
936
- MetricRegistry().message_output_tokens.record(
937
- usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
938
- )
939
-
940
- # log LLM request time
941
- llm_request_ns = stream_end_time_ns - provider_request_start_timestamp_ns
942
- step_metrics.llm_request_ns = llm_request_ns
943
-
944
- llm_request_ms = ns_to_ms(llm_request_ns)
945
- agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ms})
946
- MetricRegistry().llm_execution_time_ms_histogram.record(
947
- llm_request_ms,
948
- dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model}),
949
- )
950
-
951
- # Process resulting stream content
952
- try:
953
- tool_call = interface.get_tool_call_object()
954
- except ValueError as e:
955
- stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
956
- raise e
957
- except Exception as e:
958
- stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_tool_call.value)
959
- raise e
960
- reasoning_content = interface.get_reasoning_content()
926
+ if in_context_messages[-1].role == "approval":
927
+ approval_request_message = in_context_messages[-1]
928
+ step_metrics = await self.step_manager.get_step_metrics_async(step_id=approval_request_message.step_id, actor=self.actor)
961
929
  persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
962
- tool_call,
963
- valid_tool_names,
930
+ approval_request_message.tool_calls[0],
931
+ [], # TODO: update this
964
932
  agent_state,
965
933
  tool_rules_solver,
966
- UsageStatistics(
967
- completion_tokens=usage.completion_tokens,
968
- prompt_tokens=usage.prompt_tokens,
969
- total_tokens=usage.total_tokens,
970
- ),
971
- reasoning_content=reasoning_content,
972
- pre_computed_assistant_message_id=interface.letta_message_id,
973
- step_id=effective_step_id,
974
- initial_messages=initial_messages,
975
- agent_step_span=agent_step_span,
934
+ usage,
935
+ reasoning_content=approval_request_message.content,
936
+ step_id=approval_request_message.step_id,
937
+ initial_messages=new_in_context_messages,
976
938
  is_final_step=(i == max_steps - 1),
977
939
  step_metrics=step_metrics,
940
+ run_id=self.current_run_id,
941
+ is_approval=input_messages[0].approve,
942
+ is_denial=input_messages[0].approve == False,
943
+ denial_reason=input_messages[0].reason,
978
944
  )
979
- step_progression = StepProgression.STEP_LOGGED
980
-
981
- # Update step with actual usage now that we have it (if step was created)
982
- if logged_step:
983
- await self.step_manager.update_step_success_async(
984
- self.actor,
985
- step_id,
986
- UsageStatistics(
987
- completion_tokens=usage.completion_tokens,
988
- prompt_tokens=usage.prompt_tokens,
989
- total_tokens=usage.total_tokens,
990
- ),
991
- stop_reason,
992
- )
993
-
994
945
  new_message_idx = len(initial_messages) if initial_messages else 0
995
946
  self.response_messages.extend(persisted_messages[new_message_idx:])
996
947
  new_in_context_messages.extend(persisted_messages[new_message_idx:])
997
-
998
948
  initial_messages = None
999
-
1000
- # log total step time
1001
- now = get_utc_timestamp_ns()
1002
- step_ns = now - step_start
1003
- agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
1004
- agent_step_span.end()
1005
-
1006
- # TODO (cliandy): the stream POST request span has ended at this point, we should tie this to the stream
1007
- # log_event("agent.stream.llm_response.processed") # [4^]
1008
-
1009
- # Log LLM Trace
1010
- # We are piecing together the streamed response here.
1011
- # Content here does not match the actual response schema as streams come in chunks.
1012
- if settings.track_provider_trace:
1013
- await self.telemetry_manager.create_provider_trace_async(
1014
- actor=self.actor,
1015
- provider_trace_create=ProviderTraceCreate(
1016
- request_json=request_data,
1017
- response_json={
1018
- "content": {
1019
- "tool_call": tool_call.model_dump_json(),
1020
- "reasoning": [content.model_dump_json() for content in reasoning_content],
1021
- },
1022
- "id": interface.message_id,
1023
- "model": interface.model,
1024
- "role": "assistant",
1025
- # "stop_reason": "",
1026
- # "stop_sequence": None,
1027
- "type": "message",
1028
- "usage": {
1029
- "input_tokens": usage.prompt_tokens,
1030
- "output_tokens": usage.completion_tokens,
1031
- },
1032
- },
1033
- step_id=step_id, # Use original step_id for telemetry
1034
- organization_id=self.actor.organization_id,
1035
- ),
1036
- )
1037
- step_progression = StepProgression.LOGGED_TRACE
949
+ in_context_messages = current_in_context_messages + new_in_context_messages
1038
950
 
1039
951
  # yields tool response as this is handled from Letta and not the response from the LLM provider
1040
952
  tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
@@ -1042,103 +954,238 @@ class LettaAgent(BaseAgent):
1042
954
  # Apply message type filtering if specified
1043
955
  if include_return_message_types is None or tool_return.message_type in include_return_message_types:
1044
956
  yield f"data: {tool_return.model_dump_json()}\n\n"
957
+ else:
958
+ step_id = generate_step_id()
959
+ # Check for job cancellation at the start of each step
960
+ if await self._check_run_cancellation():
961
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
962
+ logger.info(f"Agent execution cancelled for run {self.current_run_id}")
963
+ yield f"data: {stop_reason.model_dump_json()}\n\n"
964
+ break
965
+
966
+ step_start = get_utc_timestamp_ns()
967
+ agent_step_span = tracer.start_span("agent_step", start_time=step_start)
968
+ agent_step_span.set_attributes({"step_id": step_id})
969
+
970
+ step_progression = StepProgression.START
971
+ should_continue = False
972
+ step_metrics = StepMetrics(id=step_id) # Initialize metrics tracking
973
+
974
+ # Create step early with PENDING status
975
+ logged_step = await self.step_manager.log_step_async(
976
+ actor=self.actor,
977
+ agent_id=agent_state.id,
978
+ provider_name=agent_state.llm_config.model_endpoint_type,
979
+ provider_category=agent_state.llm_config.provider_category or "base",
980
+ model=agent_state.llm_config.model,
981
+ model_endpoint=agent_state.llm_config.model_endpoint,
982
+ context_window_limit=agent_state.llm_config.context_window,
983
+ usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
984
+ provider_id=None,
985
+ job_id=self.current_run_id if self.current_run_id else None,
986
+ step_id=step_id,
987
+ project_id=agent_state.project_id,
988
+ status=StepStatus.PENDING,
989
+ )
990
+ # Only use step_id in messages if step was actually created
991
+ effective_step_id = step_id if logged_step else None
1045
992
 
1046
- # TODO (cliandy): consolidate and expand with trace
1047
- MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
1048
- step_progression = StepProgression.FINISHED
993
+ try:
994
+ (
995
+ request_data,
996
+ stream,
997
+ current_in_context_messages,
998
+ new_in_context_messages,
999
+ valid_tool_names,
1000
+ provider_request_start_timestamp_ns,
1001
+ ) = await self._build_and_request_from_llm_streaming(
1002
+ first_chunk,
1003
+ agent_step_span,
1004
+ request_start_timestamp_ns,
1005
+ current_in_context_messages,
1006
+ new_in_context_messages,
1007
+ agent_state,
1008
+ llm_client,
1009
+ tool_rules_solver,
1010
+ )
1049
1011
 
1050
- # Record step metrics for successful completion
1051
- if logged_step and step_metrics:
1052
- try:
1053
- # Set the step_ns that was already calculated
1054
- step_metrics.step_ns = step_ns
1012
+ step_progression = StepProgression.STREAM_RECEIVED
1013
+ log_event("agent.stream.llm_response.received") # [3^]
1014
+
1015
+ # TODO: THIS IS INCREDIBLY UGLY
1016
+ # TODO: THERE ARE MULTIPLE COPIES OF THE LLM_CONFIG EVERYWHERE THAT ARE GETTING MANIPULATED
1017
+ if agent_state.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock]:
1018
+ interface = AnthropicStreamingInterface(
1019
+ use_assistant_message=use_assistant_message,
1020
+ put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
1021
+ )
1022
+ elif agent_state.llm_config.model_endpoint_type == ProviderType.openai:
1023
+ interface = OpenAIStreamingInterface(
1024
+ use_assistant_message=use_assistant_message,
1025
+ is_openai_proxy=agent_state.llm_config.provider_name == "lmstudio_openai",
1026
+ messages=current_in_context_messages + new_in_context_messages,
1027
+ tools=request_data.get("tools", []),
1028
+ put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
1029
+ )
1030
+ else:
1031
+ raise ValueError(f"Streaming not supported for {agent_state.llm_config}")
1055
1032
 
1056
- # Get context attributes for project and template IDs
1057
- ctx_attrs = get_ctx_attributes()
1033
+ async for chunk in interface.process(
1034
+ stream,
1035
+ ttft_span=request_span,
1036
+ ):
1037
+ # Measure TTFT (trace, metric, and db). This should be consolidated.
1038
+ if first_chunk and request_span is not None:
1039
+ now = get_utc_timestamp_ns()
1040
+ ttft_ns = now - request_start_timestamp_ns
1058
1041
 
1059
- await self._record_step_metrics(
1060
- step_id=step_id,
1061
- agent_state=agent_state,
1062
- step_metrics=step_metrics,
1063
- ctx_attrs=ctx_attrs,
1064
- job_id=self.current_run_id,
1042
+ request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ns_to_ms(ttft_ns)})
1043
+ metric_attributes = get_ctx_attributes()
1044
+ metric_attributes["model.name"] = agent_state.llm_config.model
1045
+ MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
1046
+
1047
+ if self.current_run_id and self.job_manager:
1048
+ await self.job_manager.record_ttft(self.current_run_id, ttft_ns, self.actor)
1049
+
1050
+ first_chunk = False
1051
+
1052
+ if include_return_message_types is None or chunk.message_type in include_return_message_types:
1053
+ # filter down returned data
1054
+ yield f"data: {chunk.model_dump_json()}\n\n"
1055
+
1056
+ stream_end_time_ns = get_utc_timestamp_ns()
1057
+
1058
+ # Some providers that rely on the OpenAI client currently e.g. LMStudio don't get usage metrics back on the last streaming chunk, fall back to manual values
1059
+ if isinstance(interface, OpenAIStreamingInterface) and not interface.input_tokens and not interface.output_tokens:
1060
+ logger.warning(
1061
+ f"No token usage metrics received from OpenAI streaming interface for {agent_state.llm_config.model}, falling back to estimated values. Input tokens: {interface.fallback_input_tokens}, Output tokens: {interface.fallback_output_tokens}"
1065
1062
  )
1066
- except Exception as metrics_error:
1067
- self.logger.warning(f"Failed to record step metrics: {metrics_error}")
1063
+ interface.input_tokens = interface.fallback_input_tokens
1064
+ interface.output_tokens = interface.fallback_output_tokens
1065
+
1066
+ usage.step_count += 1
1067
+ usage.completion_tokens += interface.output_tokens
1068
+ usage.prompt_tokens += interface.input_tokens
1069
+ usage.total_tokens += interface.input_tokens + interface.output_tokens
1070
+ MetricRegistry().message_output_tokens.record(
1071
+ usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
1072
+ )
1068
1073
 
1069
- except Exception as e:
1070
- # Handle any unexpected errors during step processing
1071
- self.logger.error(f"Error during step processing: {e}")
1072
- job_update_metadata = {"error": str(e)}
1073
-
1074
- # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
1075
- if not stop_reason:
1076
- stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
1077
- elif stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
1078
- self.logger.error("Error occurred during step processing, with valid stop reason: %s", stop_reason.stop_reason)
1079
- elif stop_reason.stop_reason not in (StopReasonType.no_tool_call, StopReasonType.invalid_tool_call):
1080
- raise ValueError(f"Invalid Stop Reason: {stop_reason}")
1081
-
1082
- # Send error stop reason to client and re-raise with expected response code
1083
- yield f"data: {stop_reason.model_dump_json()}\n\n", 500
1084
- raise
1085
-
1086
- # Update step if it needs to be updated
1087
- finally:
1088
- if step_progression == StepProgression.FINISHED and should_continue:
1089
- continue
1090
-
1091
- self.logger.debug("Running cleanup for agent loop run: %s", self.current_run_id)
1092
- self.logger.info("Running final update. Step Progression: %s", step_progression)
1093
- try:
1094
- if step_progression == StepProgression.FINISHED and not should_continue:
1095
- # Successfully completed - update with final usage and stop reason
1096
- if stop_reason is None:
1097
- stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
1098
- # Note: step already updated with success status after _handle_ai_response
1099
- if logged_step:
1100
- await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
1101
- break
1102
-
1103
- # Handle error cases
1104
- if step_progression < StepProgression.STEP_LOGGED:
1105
- # Error occurred before step was fully logged
1106
- import traceback
1107
-
1108
- if logged_step:
1109
- await self.step_manager.update_step_error_async(
1110
- actor=self.actor,
1074
+ # log LLM request time
1075
+ llm_request_ns = stream_end_time_ns - provider_request_start_timestamp_ns
1076
+ step_metrics.llm_request_ns = llm_request_ns
1077
+
1078
+ llm_request_ms = ns_to_ms(llm_request_ns)
1079
+ agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ms})
1080
+ MetricRegistry().llm_execution_time_ms_histogram.record(
1081
+ llm_request_ms,
1082
+ dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model}),
1083
+ )
1084
+
1085
+ # Process resulting stream content
1086
+ try:
1087
+ tool_call = interface.get_tool_call_object()
1088
+ except ValueError as e:
1089
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
1090
+ raise e
1091
+ except Exception as e:
1092
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_tool_call.value)
1093
+ raise e
1094
+ reasoning_content = interface.get_reasoning_content()
1095
+ persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
1096
+ tool_call,
1097
+ valid_tool_names,
1098
+ agent_state,
1099
+ tool_rules_solver,
1100
+ UsageStatistics(
1101
+ completion_tokens=usage.completion_tokens,
1102
+ prompt_tokens=usage.prompt_tokens,
1103
+ total_tokens=usage.total_tokens,
1104
+ ),
1105
+ reasoning_content=reasoning_content,
1106
+ pre_computed_assistant_message_id=interface.letta_message_id,
1107
+ step_id=effective_step_id,
1108
+ initial_messages=initial_messages,
1109
+ agent_step_span=agent_step_span,
1110
+ is_final_step=(i == max_steps - 1),
1111
+ step_metrics=step_metrics,
1112
+ )
1113
+ step_progression = StepProgression.STEP_LOGGED
1114
+
1115
+ # Update step with actual usage now that we have it (if step was created)
1116
+ if logged_step:
1117
+ await self.step_manager.update_step_success_async(
1118
+ self.actor,
1119
+ step_id,
1120
+ UsageStatistics(
1121
+ completion_tokens=usage.completion_tokens,
1122
+ prompt_tokens=usage.prompt_tokens,
1123
+ total_tokens=usage.total_tokens,
1124
+ ),
1125
+ stop_reason,
1126
+ )
1127
+
1128
+ new_message_idx = len(initial_messages) if initial_messages else 0
1129
+ self.response_messages.extend(persisted_messages[new_message_idx:])
1130
+ new_in_context_messages.extend(persisted_messages[new_message_idx:])
1131
+
1132
+ initial_messages = None
1133
+
1134
+ # log total step time
1135
+ now = get_utc_timestamp_ns()
1136
+ step_ns = now - step_start
1137
+ agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
1138
+ agent_step_span.end()
1139
+
1140
+ # TODO (cliandy): the stream POST request span has ended at this point, we should tie this to the stream
1141
+ # log_event("agent.stream.llm_response.processed") # [4^]
1142
+
1143
+ # Log LLM Trace
1144
+ # We are piecing together the streamed response here.
1145
+ # Content here does not match the actual response schema as streams come in chunks.
1146
+ if settings.track_provider_trace:
1147
+ await self.telemetry_manager.create_provider_trace_async(
1148
+ actor=self.actor,
1149
+ provider_trace_create=ProviderTraceCreate(
1150
+ request_json=request_data,
1151
+ response_json={
1152
+ "content": {
1153
+ "tool_call": tool_call.model_dump_json(),
1154
+ "reasoning": [content.model_dump_json() for content in reasoning_content],
1155
+ },
1156
+ "id": interface.message_id,
1157
+ "model": interface.model,
1158
+ "role": "assistant",
1159
+ # "stop_reason": "",
1160
+ # "stop_sequence": None,
1161
+ "type": "message",
1162
+ "usage": {
1163
+ "input_tokens": usage.prompt_tokens,
1164
+ "output_tokens": usage.completion_tokens,
1165
+ },
1166
+ },
1111
1167
  step_id=step_id, # Use original step_id for telemetry
1112
- error_type=type(e).__name__ if "e" in locals() else "Unknown",
1113
- error_message=str(e) if "e" in locals() else "Unknown error",
1114
- error_traceback=traceback.format_exc(),
1115
- stop_reason=stop_reason,
1116
- )
1168
+ organization_id=self.actor.organization_id,
1169
+ ),
1170
+ )
1171
+ step_progression = StepProgression.LOGGED_TRACE
1117
1172
 
1118
- if step_progression <= StepProgression.STREAM_RECEIVED:
1119
- if first_chunk and settings.track_errored_messages and initial_messages:
1120
- for message in initial_messages:
1121
- message.is_err = True
1122
- message.step_id = effective_step_id
1123
- await self.message_manager.create_many_messages_async(initial_messages, actor=self.actor)
1124
- elif step_progression <= StepProgression.LOGGED_TRACE:
1125
- if stop_reason is None:
1126
- self.logger.error("Error in step after logging step")
1127
- stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
1128
- if logged_step:
1129
- await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
1130
- else:
1131
- self.logger.error("Invalid StepProgression value")
1173
+ # yields tool response as this is handled from Letta and not the response from the LLM provider
1174
+ tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
1175
+ if not (use_assistant_message and tool_return.name == "send_message"):
1176
+ # Apply message type filtering if specified
1177
+ if include_return_message_types is None or tool_return.message_type in include_return_message_types:
1178
+ yield f"data: {tool_return.model_dump_json()}\n\n"
1132
1179
 
1133
- # Do tracking for failure cases. Can consolidate with success conditions later.
1134
- if settings.track_stop_reason:
1135
- await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
1180
+ # TODO (cliandy): consolidate and expand with trace
1181
+ MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
1182
+ step_progression = StepProgression.FINISHED
1136
1183
 
1137
- # Record partial step metrics on failure (capture whatever timing data we have)
1138
- if logged_step and step_metrics and step_progression < StepProgression.FINISHED:
1184
+ # Record step metrics for successful completion
1185
+ if logged_step and step_metrics:
1139
1186
  try:
1140
- # Calculate total step time up to the failure point
1141
- step_metrics.step_ns = get_utc_timestamp_ns() - step_start
1187
+ # Set the step_ns that was already calculated
1188
+ step_metrics.step_ns = step_ns
1142
1189
 
1143
1190
  # Get context attributes for project and template IDs
1144
1191
  ctx_attrs = get_ctx_attributes()
@@ -1148,16 +1195,109 @@ class LettaAgent(BaseAgent):
1148
1195
  agent_state=agent_state,
1149
1196
  step_metrics=step_metrics,
1150
1197
  ctx_attrs=ctx_attrs,
1151
- job_id=locals().get("run_id", self.current_run_id),
1198
+ job_id=self.current_run_id,
1152
1199
  )
1153
1200
  except Exception as metrics_error:
1154
1201
  self.logger.warning(f"Failed to record step metrics: {metrics_error}")
1155
1202
 
1156
1203
  except Exception as e:
1157
- self.logger.error("Failed to update step: %s", e)
1204
+ # Handle any unexpected errors during step processing
1205
+ self.logger.error(f"Error during step processing: {e}")
1206
+ job_update_metadata = {"error": str(e)}
1207
+
1208
+ # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
1209
+ if not stop_reason:
1210
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
1211
+ elif stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
1212
+ self.logger.error("Error occurred during step processing, with valid stop reason: %s", stop_reason.stop_reason)
1213
+ elif stop_reason.stop_reason not in (
1214
+ StopReasonType.no_tool_call,
1215
+ StopReasonType.invalid_tool_call,
1216
+ StopReasonType.invalid_llm_response,
1217
+ ):
1218
+ self.logger.error("Error occurred during step processing, with unexpected stop reason: %s", stop_reason.stop_reason)
1219
+
1220
+ # Send error stop reason to client and re-raise with expected response code
1221
+ yield f"data: {stop_reason.model_dump_json()}\n\n", 500
1222
+ raise
1158
1223
 
1159
- if not should_continue:
1160
- break
1224
+ # Update step if it needs to be updated
1225
+ finally:
1226
+ if step_progression == StepProgression.FINISHED and should_continue:
1227
+ continue
1228
+
1229
+ self.logger.debug("Running cleanup for agent loop run: %s", self.current_run_id)
1230
+ self.logger.info("Running final update. Step Progression: %s", step_progression)
1231
+ try:
1232
+ if step_progression == StepProgression.FINISHED and not should_continue:
1233
+ # Successfully completed - update with final usage and stop reason
1234
+ if stop_reason is None:
1235
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
1236
+ # Note: step already updated with success status after _handle_ai_response
1237
+ if logged_step:
1238
+ await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
1239
+ break
1240
+
1241
+ # Handle error cases
1242
+ if step_progression < StepProgression.STEP_LOGGED:
1243
+ # Error occurred before step was fully logged
1244
+ import traceback
1245
+
1246
+ if logged_step:
1247
+ await self.step_manager.update_step_error_async(
1248
+ actor=self.actor,
1249
+ step_id=step_id, # Use original step_id for telemetry
1250
+ error_type=type(e).__name__ if "e" in locals() else "Unknown",
1251
+ error_message=str(e) if "e" in locals() else "Unknown error",
1252
+ error_traceback=traceback.format_exc(),
1253
+ stop_reason=stop_reason,
1254
+ )
1255
+
1256
+ if step_progression <= StepProgression.STREAM_RECEIVED:
1257
+ if first_chunk and settings.track_errored_messages and initial_messages:
1258
+ for message in initial_messages:
1259
+ message.is_err = True
1260
+ message.step_id = effective_step_id
1261
+ await self.message_manager.create_many_messages_async(
1262
+ initial_messages, actor=self.actor, embedding_config=agent_state.embedding_config
1263
+ )
1264
+ elif step_progression <= StepProgression.LOGGED_TRACE:
1265
+ if stop_reason is None:
1266
+ self.logger.error("Error in step after logging step")
1267
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
1268
+ if logged_step:
1269
+ await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
1270
+ else:
1271
+ self.logger.error("Invalid StepProgression value")
1272
+
1273
+ # Do tracking for failure cases. Can consolidate with success conditions later.
1274
+ if settings.track_stop_reason:
1275
+ await self._log_request(request_start_timestamp_ns, request_span, job_update_metadata, is_error=True)
1276
+
1277
+ # Record partial step metrics on failure (capture whatever timing data we have)
1278
+ if logged_step and step_metrics and step_progression < StepProgression.FINISHED:
1279
+ try:
1280
+ # Calculate total step time up to the failure point
1281
+ step_metrics.step_ns = get_utc_timestamp_ns() - step_start
1282
+
1283
+ # Get context attributes for project and template IDs
1284
+ ctx_attrs = get_ctx_attributes()
1285
+
1286
+ await self._record_step_metrics(
1287
+ step_id=step_id,
1288
+ agent_state=agent_state,
1289
+ step_metrics=step_metrics,
1290
+ ctx_attrs=ctx_attrs,
1291
+ job_id=locals().get("run_id", self.current_run_id),
1292
+ )
1293
+ except Exception as metrics_error:
1294
+ self.logger.warning(f"Failed to record step metrics: {metrics_error}")
1295
+
1296
+ except Exception as e:
1297
+ self.logger.error("Failed to update step: %s", e)
1298
+
1299
+ if not should_continue:
1300
+ break
1161
1301
  # Extend the in context message ids
1162
1302
  if not agent_state.message_buffer_autoclear:
1163
1303
  await self._rebuild_context_window(
@@ -1494,14 +1634,46 @@ class LettaAgent(BaseAgent):
1494
1634
  is_final_step: bool | None = None,
1495
1635
  run_id: str | None = None,
1496
1636
  step_metrics: StepMetrics = None,
1637
+ is_approval: bool | None = None,
1638
+ is_denial: bool | None = None,
1639
+ denial_reason: str | None = None,
1497
1640
  ) -> tuple[list[Message], bool, LettaStopReason | None]:
1498
1641
  """
1499
1642
  Handle the final AI response once streaming completes, execute / validate the
1500
1643
  tool call, decide whether we should keep stepping, and persist state.
1501
1644
  """
1645
+ tool_call_id: str = tool_call.id or f"call_{uuid.uuid4().hex[:8]}"
1646
+
1647
+ if is_denial:
1648
+ continue_stepping = True
1649
+ stop_reason = None
1650
+ tool_call_messages = create_letta_messages_from_llm_response(
1651
+ agent_id=agent_state.id,
1652
+ model=agent_state.llm_config.model,
1653
+ function_name="",
1654
+ function_arguments={},
1655
+ tool_execution_result=ToolExecutionResult(status="error"),
1656
+ tool_call_id=tool_call_id,
1657
+ function_call_success=False,
1658
+ function_response=f"Error: request to call tool denied. User reason: {denial_reason}",
1659
+ timezone=agent_state.timezone,
1660
+ actor=self.actor,
1661
+ continue_stepping=continue_stepping,
1662
+ heartbeat_reason=f"{NON_USER_MSG_PREFIX}Continuing: user denied request to call tool.",
1663
+ reasoning_content=None,
1664
+ pre_computed_assistant_message_id=None,
1665
+ step_id=step_id,
1666
+ is_approval_response=True,
1667
+ )
1668
+ messages_to_persist = (initial_messages or []) + tool_call_messages
1669
+ persisted_messages = await self.message_manager.create_many_messages_async(
1670
+ messages_to_persist, actor=self.actor, embedding_config=agent_state.embedding_config
1671
+ )
1672
+ return persisted_messages, continue_stepping, stop_reason
1673
+
1502
1674
  # 1. Parse and validate the tool-call envelope
1503
1675
  tool_call_name: str = tool_call.function.name
1504
- tool_call_id: str = tool_call.id or f"call_{uuid.uuid4().hex[:8]}"
1676
+
1505
1677
  tool_args = _safe_load_tool_call_str(tool_call.function.arguments)
1506
1678
  request_heartbeat: bool = _pop_heartbeat(tool_args)
1507
1679
  tool_args.pop(INNER_THOUGHTS_KWARG, None)
@@ -1515,77 +1687,99 @@ class LettaAgent(BaseAgent):
1515
1687
  request_heartbeat=request_heartbeat,
1516
1688
  )
1517
1689
 
1518
- # 2. Execute the tool (or synthesize an error result if disallowed)
1519
- tool_rule_violated = tool_call_name not in valid_tool_names
1520
- if tool_rule_violated:
1521
- tool_execution_result = _build_rule_violation_result(tool_call_name, valid_tool_names, tool_rules_solver)
1522
- else:
1523
- # Track tool execution time
1524
- tool_start_time = get_utc_timestamp_ns()
1525
- tool_execution_result = await self._execute_tool(
1526
- tool_name=tool_call_name,
1527
- tool_args=tool_args,
1528
- agent_state=agent_state,
1529
- agent_step_span=agent_step_span,
1690
+ if not is_approval and tool_rules_solver.is_requires_approval_tool(tool_call_name):
1691
+ approval_message = create_approval_request_message_from_llm_response(
1692
+ agent_id=agent_state.id,
1693
+ model=agent_state.llm_config.model,
1694
+ function_name=tool_call_name,
1695
+ function_arguments=tool_args,
1696
+ tool_call_id=tool_call_id,
1697
+ actor=self.actor,
1698
+ continue_stepping=request_heartbeat,
1699
+ reasoning_content=reasoning_content,
1700
+ pre_computed_assistant_message_id=pre_computed_assistant_message_id,
1530
1701
  step_id=step_id,
1531
1702
  )
1532
- tool_end_time = get_utc_timestamp_ns()
1703
+ messages_to_persist = (initial_messages or []) + [approval_message]
1704
+ continue_stepping = False
1705
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.requires_approval.value)
1706
+ else:
1707
+ # 2. Execute the tool (or synthesize an error result if disallowed)
1708
+ tool_rule_violated = tool_call_name not in valid_tool_names and not is_approval
1709
+ if tool_rule_violated:
1710
+ tool_execution_result = _build_rule_violation_result(tool_call_name, valid_tool_names, tool_rules_solver)
1711
+ else:
1712
+ # Track tool execution time
1713
+ tool_start_time = get_utc_timestamp_ns()
1714
+ tool_execution_result = await self._execute_tool(
1715
+ tool_name=tool_call_name,
1716
+ tool_args=tool_args,
1717
+ agent_state=agent_state,
1718
+ agent_step_span=agent_step_span,
1719
+ step_id=step_id,
1720
+ )
1721
+ tool_end_time = get_utc_timestamp_ns()
1533
1722
 
1534
- # Store tool execution time in metrics
1535
- step_metrics.tool_execution_ns = tool_end_time - tool_start_time
1723
+ # Store tool execution time in metrics
1724
+ step_metrics.tool_execution_ns = tool_end_time - tool_start_time
1536
1725
 
1537
- log_telemetry(
1538
- self.logger, "_handle_ai_response execute tool finish", tool_execution_result=tool_execution_result, tool_call_id=tool_call_id
1539
- )
1726
+ log_telemetry(
1727
+ self.logger,
1728
+ "_handle_ai_response execute tool finish",
1729
+ tool_execution_result=tool_execution_result,
1730
+ tool_call_id=tool_call_id,
1731
+ )
1540
1732
 
1541
- # 3. Prepare the function-response payload
1542
- truncate = tool_call_name not in {"conversation_search", "conversation_search_date", "archival_memory_search"}
1543
- return_char_limit = next(
1544
- (t.return_char_limit for t in agent_state.tools if t.name == tool_call_name),
1545
- None,
1546
- )
1547
- function_response_string = validate_function_response(
1548
- tool_execution_result.func_return,
1549
- return_char_limit=return_char_limit,
1550
- truncate=truncate,
1551
- )
1552
- self.last_function_response = package_function_response(
1553
- was_success=tool_execution_result.success_flag,
1554
- response_string=function_response_string,
1555
- timezone=agent_state.timezone,
1556
- )
1733
+ # 3. Prepare the function-response payload
1734
+ truncate = tool_call_name not in {"conversation_search", "conversation_search_date", "archival_memory_search"}
1735
+ return_char_limit = next(
1736
+ (t.return_char_limit for t in agent_state.tools if t.name == tool_call_name),
1737
+ None,
1738
+ )
1739
+ function_response_string = validate_function_response(
1740
+ tool_execution_result.func_return,
1741
+ return_char_limit=return_char_limit,
1742
+ truncate=truncate,
1743
+ )
1744
+ self.last_function_response = package_function_response(
1745
+ was_success=tool_execution_result.success_flag,
1746
+ response_string=function_response_string,
1747
+ timezone=agent_state.timezone,
1748
+ )
1557
1749
 
1558
- # 4. Decide whether to keep stepping (<<< focal section simplified)
1559
- continue_stepping, heartbeat_reason, stop_reason = self._decide_continuation(
1560
- agent_state=agent_state,
1561
- request_heartbeat=request_heartbeat,
1562
- tool_call_name=tool_call_name,
1563
- tool_rule_violated=tool_rule_violated,
1564
- tool_rules_solver=tool_rules_solver,
1565
- is_final_step=is_final_step,
1566
- )
1750
+ # 4. Decide whether to keep stepping (focal section simplified)
1751
+ continue_stepping, heartbeat_reason, stop_reason = self._decide_continuation(
1752
+ agent_state=agent_state,
1753
+ request_heartbeat=request_heartbeat,
1754
+ tool_call_name=tool_call_name,
1755
+ tool_rule_violated=tool_rule_violated,
1756
+ tool_rules_solver=tool_rules_solver,
1757
+ is_final_step=is_final_step,
1758
+ )
1567
1759
 
1568
- # 5. Create messages (step was already created at the beginning)
1569
- tool_call_messages = create_letta_messages_from_llm_response(
1570
- agent_id=agent_state.id,
1571
- model=agent_state.llm_config.model,
1572
- function_name=tool_call_name,
1573
- function_arguments=tool_args,
1574
- tool_execution_result=tool_execution_result,
1575
- tool_call_id=tool_call_id,
1576
- function_call_success=tool_execution_result.success_flag,
1577
- function_response=function_response_string,
1578
- timezone=agent_state.timezone,
1579
- actor=self.actor,
1580
- continue_stepping=continue_stepping,
1581
- heartbeat_reason=heartbeat_reason,
1582
- reasoning_content=reasoning_content,
1583
- pre_computed_assistant_message_id=pre_computed_assistant_message_id,
1584
- step_id=step_id,
1585
- )
1760
+ # 5. Create messages (step was already created at the beginning)
1761
+ tool_call_messages = create_letta_messages_from_llm_response(
1762
+ agent_id=agent_state.id,
1763
+ model=agent_state.llm_config.model,
1764
+ function_name=tool_call_name,
1765
+ function_arguments=tool_args,
1766
+ tool_execution_result=tool_execution_result,
1767
+ tool_call_id=tool_call_id,
1768
+ function_call_success=tool_execution_result.success_flag,
1769
+ function_response=function_response_string,
1770
+ timezone=agent_state.timezone,
1771
+ actor=self.actor,
1772
+ continue_stepping=continue_stepping,
1773
+ heartbeat_reason=heartbeat_reason,
1774
+ reasoning_content=reasoning_content,
1775
+ pre_computed_assistant_message_id=pre_computed_assistant_message_id,
1776
+ step_id=step_id,
1777
+ is_approval_response=is_approval or is_denial,
1778
+ )
1779
+ messages_to_persist = (initial_messages or []) + tool_call_messages
1586
1780
 
1587
1781
  persisted_messages = await self.message_manager.create_many_messages_async(
1588
- (initial_messages or []) + tool_call_messages, actor=self.actor
1782
+ messages_to_persist, actor=self.actor, embedding_config=agent_state.embedding_config
1589
1783
  )
1590
1784
 
1591
1785
  if run_id:
@@ -1606,7 +1800,6 @@ class LettaAgent(BaseAgent):
1606
1800
  tool_rules_solver: ToolRulesSolver,
1607
1801
  is_final_step: bool | None,
1608
1802
  ) -> tuple[bool, str | None, LettaStopReason | None]:
1609
-
1610
1803
  continue_stepping = request_heartbeat
1611
1804
  heartbeat_reason: str | None = None
1612
1805
  stop_reason: LettaStopReason | None = None
@@ -1638,9 +1831,7 @@ class LettaAgent(BaseAgent):
1638
1831
  uncalled = tool_rules_solver.get_uncalled_required_tools(available_tools=set([t.name for t in agent_state.tools]))
1639
1832
  if not continue_stepping and uncalled:
1640
1833
  continue_stepping = True
1641
- heartbeat_reason = (
1642
- f"{NON_USER_MSG_PREFIX}Continuing, user expects these tools: [" f"{', '.join(uncalled)}] to be called still."
1643
- )
1834
+ heartbeat_reason = f"{NON_USER_MSG_PREFIX}Continuing, user expects these tools: [{', '.join(uncalled)}] to be called still."
1644
1835
 
1645
1836
  stop_reason = None # reset – we’re still going
1646
1837