letta-nightly 0.8.15.dev20250719104256__py3-none-any.whl → 0.8.16.dev20250721070720__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (99) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +27 -11
  3. letta/agents/helpers.py +1 -1
  4. letta/agents/letta_agent.py +518 -322
  5. letta/agents/letta_agent_batch.py +1 -2
  6. letta/agents/voice_agent.py +15 -17
  7. letta/client/client.py +3 -3
  8. letta/constants.py +5 -0
  9. letta/embeddings.py +0 -2
  10. letta/errors.py +8 -0
  11. letta/functions/function_sets/base.py +3 -3
  12. letta/functions/helpers.py +2 -3
  13. letta/groups/sleeptime_multi_agent.py +0 -1
  14. letta/helpers/composio_helpers.py +2 -2
  15. letta/helpers/converters.py +1 -1
  16. letta/helpers/pinecone_utils.py +8 -0
  17. letta/helpers/tool_rule_solver.py +13 -18
  18. letta/llm_api/aws_bedrock.py +16 -2
  19. letta/llm_api/cohere.py +1 -1
  20. letta/llm_api/openai_client.py +1 -1
  21. letta/local_llm/grammars/gbnf_grammar_generator.py +1 -1
  22. letta/local_llm/llm_chat_completion_wrappers/zephyr.py +14 -14
  23. letta/local_llm/utils.py +1 -2
  24. letta/orm/agent.py +3 -3
  25. letta/orm/block.py +4 -4
  26. letta/orm/files_agents.py +0 -1
  27. letta/orm/identity.py +2 -0
  28. letta/orm/mcp_server.py +0 -2
  29. letta/orm/message.py +140 -14
  30. letta/orm/organization.py +5 -5
  31. letta/orm/passage.py +4 -4
  32. letta/orm/source.py +1 -1
  33. letta/orm/sqlalchemy_base.py +61 -39
  34. letta/orm/step.py +2 -0
  35. letta/otel/db_pool_monitoring.py +308 -0
  36. letta/otel/metric_registry.py +94 -1
  37. letta/otel/sqlalchemy_instrumentation.py +548 -0
  38. letta/otel/sqlalchemy_instrumentation_integration.py +124 -0
  39. letta/otel/tracing.py +37 -1
  40. letta/schemas/agent.py +0 -3
  41. letta/schemas/agent_file.py +283 -0
  42. letta/schemas/block.py +0 -3
  43. letta/schemas/file.py +28 -26
  44. letta/schemas/letta_message.py +15 -4
  45. letta/schemas/memory.py +1 -1
  46. letta/schemas/message.py +31 -26
  47. letta/schemas/openai/chat_completion_response.py +0 -1
  48. letta/schemas/providers.py +20 -0
  49. letta/schemas/source.py +11 -13
  50. letta/schemas/step.py +12 -0
  51. letta/schemas/tool.py +0 -4
  52. letta/serialize_schemas/marshmallow_agent.py +14 -1
  53. letta/serialize_schemas/marshmallow_block.py +23 -1
  54. letta/serialize_schemas/marshmallow_message.py +1 -3
  55. letta/serialize_schemas/marshmallow_tool.py +23 -1
  56. letta/server/db.py +110 -6
  57. letta/server/rest_api/app.py +85 -73
  58. letta/server/rest_api/routers/v1/agents.py +68 -53
  59. letta/server/rest_api/routers/v1/blocks.py +2 -2
  60. letta/server/rest_api/routers/v1/jobs.py +3 -0
  61. letta/server/rest_api/routers/v1/organizations.py +2 -2
  62. letta/server/rest_api/routers/v1/sources.py +18 -2
  63. letta/server/rest_api/routers/v1/tools.py +11 -12
  64. letta/server/rest_api/routers/v1/users.py +1 -1
  65. letta/server/rest_api/streaming_response.py +13 -5
  66. letta/server/rest_api/utils.py +8 -25
  67. letta/server/server.py +11 -4
  68. letta/server/ws_api/server.py +2 -2
  69. letta/services/agent_file_manager.py +616 -0
  70. letta/services/agent_manager.py +133 -46
  71. letta/services/block_manager.py +38 -17
  72. letta/services/file_manager.py +106 -21
  73. letta/services/file_processor/file_processor.py +93 -0
  74. letta/services/files_agents_manager.py +28 -0
  75. letta/services/group_manager.py +4 -5
  76. letta/services/helpers/agent_manager_helper.py +57 -9
  77. letta/services/identity_manager.py +22 -0
  78. letta/services/job_manager.py +210 -91
  79. letta/services/llm_batch_manager.py +9 -6
  80. letta/services/mcp/stdio_client.py +1 -2
  81. letta/services/mcp_manager.py +0 -1
  82. letta/services/message_manager.py +49 -26
  83. letta/services/passage_manager.py +0 -1
  84. letta/services/provider_manager.py +1 -1
  85. letta/services/source_manager.py +114 -5
  86. letta/services/step_manager.py +36 -4
  87. letta/services/telemetry_manager.py +9 -2
  88. letta/services/tool_executor/builtin_tool_executor.py +5 -1
  89. letta/services/tool_executor/core_tool_executor.py +3 -3
  90. letta/services/tool_manager.py +95 -20
  91. letta/services/user_manager.py +4 -12
  92. letta/settings.py +23 -6
  93. letta/system.py +1 -1
  94. letta/utils.py +26 -2
  95. {letta_nightly-0.8.15.dev20250719104256.dist-info → letta_nightly-0.8.16.dev20250721070720.dist-info}/METADATA +3 -2
  96. {letta_nightly-0.8.15.dev20250719104256.dist-info → letta_nightly-0.8.16.dev20250721070720.dist-info}/RECORD +99 -94
  97. {letta_nightly-0.8.15.dev20250719104256.dist-info → letta_nightly-0.8.16.dev20250721070720.dist-info}/LICENSE +0 -0
  98. {letta_nightly-0.8.15.dev20250719104256.dist-info → letta_nightly-0.8.16.dev20250721070720.dist-info}/WHEEL +0 -0
  99. {letta_nightly-0.8.15.dev20250719104256.dist-info → letta_nightly-0.8.16.dev20250721070720.dist-info}/entry_points.txt +0 -0
@@ -1,4 +1,3 @@
1
- import asyncio
2
1
  import json
3
2
  import uuid
4
3
  from collections.abc import AsyncGenerator
@@ -44,6 +43,7 @@ from letta.schemas.llm_config import LLMConfig
44
43
  from letta.schemas.message import Message, MessageCreate
45
44
  from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
46
45
  from letta.schemas.provider_trace import ProviderTraceCreate
46
+ from letta.schemas.step import StepProgression
47
47
  from letta.schemas.tool_execution_result import ToolExecutionResult
48
48
  from letta.schemas.usage import LettaUsageStatistics
49
49
  from letta.schemas.user import User
@@ -239,100 +239,164 @@ class LettaAgent(BaseAgent):
239
239
  agent_step_span = tracer.start_span("agent_step", start_time=step_start)
240
240
  agent_step_span.set_attributes({"step_id": step_id})
241
241
 
242
- request_data, response_data, current_in_context_messages, new_in_context_messages, valid_tool_names = (
243
- await self._build_and_request_from_llm(
244
- current_in_context_messages,
245
- new_in_context_messages,
246
- agent_state,
247
- llm_client,
248
- tool_rules_solver,
249
- agent_step_span,
242
+ step_progression = StepProgression.START
243
+ should_continue = False
244
+ try:
245
+ request_data, response_data, current_in_context_messages, new_in_context_messages, valid_tool_names = (
246
+ await self._build_and_request_from_llm(
247
+ current_in_context_messages,
248
+ new_in_context_messages,
249
+ agent_state,
250
+ llm_client,
251
+ tool_rules_solver,
252
+ agent_step_span,
253
+ )
250
254
  )
251
- )
252
- in_context_messages = current_in_context_messages + new_in_context_messages
253
-
254
- log_event("agent.stream_no_tokens.llm_response.received") # [3^]
255
-
256
- response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
257
-
258
- # update usage
259
- usage.step_count += 1
260
- usage.completion_tokens += response.usage.completion_tokens
261
- usage.prompt_tokens += response.usage.prompt_tokens
262
- usage.total_tokens += response.usage.total_tokens
263
- MetricRegistry().message_output_tokens.record(
264
- response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
265
- )
255
+ in_context_messages = current_in_context_messages + new_in_context_messages
266
256
 
267
- if not response.choices[0].message.tool_calls:
268
- # TODO: make into a real error
269
- raise ValueError("No tool calls found in response, model must make a tool call")
270
- tool_call = response.choices[0].message.tool_calls[0]
271
- if response.choices[0].message.reasoning_content:
272
- reasoning = [
273
- ReasoningContent(
274
- reasoning=response.choices[0].message.reasoning_content,
275
- is_native=True,
276
- signature=response.choices[0].message.reasoning_content_signature,
277
- )
278
- ]
279
- elif response.choices[0].message.omitted_reasoning_content:
280
- reasoning = [OmittedReasoningContent()]
281
- elif response.choices[0].message.content:
282
- reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
283
- else:
284
- self.logger.info("No reasoning content found.")
285
- reasoning = None
286
-
287
- persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
288
- tool_call,
289
- valid_tool_names,
290
- agent_state,
291
- tool_rules_solver,
292
- response.usage,
293
- reasoning_content=reasoning,
294
- step_id=step_id,
295
- initial_messages=initial_messages,
296
- agent_step_span=agent_step_span,
297
- is_final_step=(i == max_steps - 1),
298
- )
257
+ step_progression = StepProgression.RESPONSE_RECEIVED
258
+ log_event("agent.stream_no_tokens.llm_response.received") # [3^]
299
259
 
300
- # TODO (cliandy): handle message contexts with larger refactor and dedupe logic
301
- new_message_idx = len(initial_messages) if initial_messages else 0
302
- self.response_messages.extend(persisted_messages[new_message_idx:])
303
- new_in_context_messages.extend(persisted_messages[new_message_idx:])
304
- initial_messages = None
305
- log_event("agent.stream_no_tokens.llm_response.processed") # [4^]
260
+ response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
306
261
 
307
- # log step time
308
- now = get_utc_timestamp_ns()
309
- step_ns = now - step_start
310
- agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
311
- agent_step_span.end()
262
+ # update usage
263
+ usage.step_count += 1
264
+ usage.completion_tokens += response.usage.completion_tokens
265
+ usage.prompt_tokens += response.usage.prompt_tokens
266
+ usage.total_tokens += response.usage.total_tokens
267
+ MetricRegistry().message_output_tokens.record(
268
+ response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
269
+ )
312
270
 
313
- # Log LLM Trace
314
- await self.telemetry_manager.create_provider_trace_async(
315
- actor=self.actor,
316
- provider_trace_create=ProviderTraceCreate(
317
- request_json=request_data,
318
- response_json=response_data,
271
+ if not response.choices[0].message.tool_calls:
272
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
273
+ raise ValueError("No tool calls found in response, model must make a tool call")
274
+ tool_call = response.choices[0].message.tool_calls[0]
275
+ if response.choices[0].message.reasoning_content:
276
+ reasoning = [
277
+ ReasoningContent(
278
+ reasoning=response.choices[0].message.reasoning_content,
279
+ is_native=True,
280
+ signature=response.choices[0].message.reasoning_content_signature,
281
+ )
282
+ ]
283
+ elif response.choices[0].message.omitted_reasoning_content:
284
+ reasoning = [OmittedReasoningContent()]
285
+ elif response.choices[0].message.content:
286
+ reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
287
+ else:
288
+ self.logger.info("No reasoning content found.")
289
+ reasoning = None
290
+
291
+ persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
292
+ tool_call,
293
+ valid_tool_names,
294
+ agent_state,
295
+ tool_rules_solver,
296
+ response.usage,
297
+ reasoning_content=reasoning,
319
298
  step_id=step_id,
320
- organization_id=self.actor.organization_id,
321
- ),
322
- )
299
+ initial_messages=initial_messages,
300
+ agent_step_span=agent_step_span,
301
+ is_final_step=(i == max_steps - 1),
302
+ )
303
+ step_progression = StepProgression.STEP_LOGGED
304
+
305
+ # TODO (cliandy): handle message contexts with larger refactor and dedupe logic
306
+ new_message_idx = len(initial_messages) if initial_messages else 0
307
+ self.response_messages.extend(persisted_messages[new_message_idx:])
308
+ new_in_context_messages.extend(persisted_messages[new_message_idx:])
309
+ initial_messages = None
310
+ log_event("agent.stream_no_tokens.llm_response.processed") # [4^]
311
+
312
+ # log step time
313
+ now = get_utc_timestamp_ns()
314
+ step_ns = now - step_start
315
+ agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
316
+ agent_step_span.end()
317
+
318
+ # Log LLM Trace
319
+ await self.telemetry_manager.create_provider_trace_async(
320
+ actor=self.actor,
321
+ provider_trace_create=ProviderTraceCreate(
322
+ request_json=request_data,
323
+ response_json=response_data,
324
+ step_id=step_id,
325
+ organization_id=self.actor.organization_id,
326
+ ),
327
+ )
328
+ step_progression = StepProgression.LOGGED_TRACE
323
329
 
324
- # stream step
325
- # TODO: improve TTFT
326
- filter_user_messages = [m for m in persisted_messages if m.role != "user"]
327
- letta_messages = Message.to_letta_messages_from_list(
328
- filter_user_messages, use_assistant_message=use_assistant_message, reverse=False
329
- )
330
+ # stream step
331
+ # TODO: improve TTFT
332
+ filter_user_messages = [m for m in persisted_messages if m.role != "user"]
333
+ letta_messages = Message.to_letta_messages_from_list(
334
+ filter_user_messages, use_assistant_message=use_assistant_message, reverse=False
335
+ )
330
336
 
331
- for message in letta_messages:
332
- if include_return_message_types is None or message.message_type in include_return_message_types:
333
- yield f"data: {message.model_dump_json()}\n\n"
337
+ for message in letta_messages:
338
+ if include_return_message_types is None or message.message_type in include_return_message_types:
339
+ yield f"data: {message.model_dump_json()}\n\n"
334
340
 
335
- MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
341
+ MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
342
+ step_progression = StepProgression.FINISHED
343
+ except Exception as e:
344
+ # Handle any unexpected errors during step processing
345
+ self.logger.error(f"Error during step processing: {e}")
346
+
347
+ # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
348
+ if not stop_reason:
349
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
350
+ elif stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
351
+ self.logger.error("Error occurred during step processing, with valid stop reason: %s", stop_reason.stop_reason)
352
+ elif stop_reason.stop_reason not in (StopReasonType.no_tool_call, StopReasonType.invalid_tool_call):
353
+ raise ValueError(f"Invalid Stop Reason: {stop_reason}")
354
+
355
+ # Send error stop reason to client and re-raise
356
+ yield f"data: {stop_reason.model_dump_json()}\n\n", 500
357
+ raise
358
+
359
+ # Update step if it needs to be updated
360
+ finally:
361
+ if settings.track_stop_reason:
362
+ self.logger.info("Running final update. Step Progression: %s", step_progression)
363
+ try:
364
+ if step_progression < StepProgression.STEP_LOGGED:
365
+ await self.step_manager.log_step_async(
366
+ actor=self.actor,
367
+ agent_id=agent_state.id,
368
+ provider_name=agent_state.llm_config.model_endpoint_type,
369
+ provider_category=agent_state.llm_config.provider_category or "base",
370
+ model=agent_state.llm_config.model,
371
+ model_endpoint=agent_state.llm_config.model_endpoint,
372
+ context_window_limit=agent_state.llm_config.context_window,
373
+ usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
374
+ provider_id=None,
375
+ job_id=self.current_run_id if self.current_run_id else None,
376
+ step_id=step_id,
377
+ project_id=agent_state.project_id,
378
+ stop_reason=stop_reason,
379
+ )
380
+ if step_progression <= StepProgression.RESPONSE_RECEIVED:
381
+ # TODO (cliandy): persist response if we get it back
382
+ if settings.track_errored_messages:
383
+ for message in initial_messages:
384
+ message.is_err = True
385
+ message.step_id = step_id
386
+ await self.message_manager.create_many_messages_async(initial_messages, actor=self.actor)
387
+ elif step_progression <= StepProgression.LOGGED_TRACE:
388
+ if stop_reason is None:
389
+ self.logger.error("Error in step after logging step")
390
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
391
+ await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
392
+ elif step_progression == StepProgression.FINISHED and not should_continue:
393
+ if stop_reason is None:
394
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
395
+ await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
396
+ else:
397
+ self.logger.error("Invalid StepProgression value")
398
+ except Exception as e:
399
+ self.logger.error("Failed to update step: %s", e)
336
400
 
337
401
  if not should_continue:
338
402
  break
@@ -397,17 +461,6 @@ class LettaAgent(BaseAgent):
397
461
  stop_reason = None
398
462
  usage = LettaUsageStatistics()
399
463
  for i in range(max_steps):
400
- # Check for job cancellation at the start of each step
401
- if await self._check_run_cancellation():
402
- stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
403
- logger.info(f"Agent execution cancelled for run {self.current_run_id}")
404
- break
405
-
406
- step_id = generate_step_id()
407
- step_start = get_utc_timestamp_ns()
408
- agent_step_span = tracer.start_span("agent_step", start_time=step_start)
409
- agent_step_span.set_attributes({"step_id": step_id})
410
-
411
464
  # If dry run, build request data and return it without making LLM call
412
465
  if dry_run:
413
466
  request_data, valid_tool_names = await self._create_llm_request_data_async(
@@ -418,84 +471,159 @@ class LettaAgent(BaseAgent):
418
471
  )
419
472
  return request_data
420
473
 
421
- request_data, response_data, current_in_context_messages, new_in_context_messages, valid_tool_names = (
422
- await self._build_and_request_from_llm(
423
- current_in_context_messages, new_in_context_messages, agent_state, llm_client, tool_rules_solver, agent_step_span
424
- )
425
- )
426
- in_context_messages = current_in_context_messages + new_in_context_messages
427
-
428
- log_event("agent.step.llm_response.received") # [3^]
474
+ # Check for job cancellation at the start of each step
475
+ if await self._check_run_cancellation():
476
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
477
+ logger.info(f"Agent execution cancelled for run {self.current_run_id}")
478
+ break
429
479
 
430
- response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
480
+ step_id = generate_step_id()
481
+ step_start = get_utc_timestamp_ns()
482
+ agent_step_span = tracer.start_span("agent_step", start_time=step_start)
483
+ agent_step_span.set_attributes({"step_id": step_id})
431
484
 
432
- usage.step_count += 1
433
- usage.completion_tokens += response.usage.completion_tokens
434
- usage.prompt_tokens += response.usage.prompt_tokens
435
- usage.total_tokens += response.usage.total_tokens
436
- usage.run_ids = [run_id] if run_id else None
437
- MetricRegistry().message_output_tokens.record(
438
- response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
439
- )
485
+ step_progression = StepProgression.START
486
+ should_continue = False
440
487
 
441
- if not response.choices[0].message.tool_calls:
442
- # TODO: make into a real error
443
- raise ValueError("No tool calls found in response, model must make a tool call")
444
- tool_call = response.choices[0].message.tool_calls[0]
445
- if response.choices[0].message.reasoning_content:
446
- reasoning = [
447
- ReasoningContent(
448
- reasoning=response.choices[0].message.reasoning_content,
449
- is_native=True,
450
- signature=response.choices[0].message.reasoning_content_signature,
488
+ try:
489
+ request_data, response_data, current_in_context_messages, new_in_context_messages, valid_tool_names = (
490
+ await self._build_and_request_from_llm(
491
+ current_in_context_messages, new_in_context_messages, agent_state, llm_client, tool_rules_solver, agent_step_span
451
492
  )
452
- ]
453
- elif response.choices[0].message.content:
454
- reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
455
- elif response.choices[0].message.omitted_reasoning_content:
456
- reasoning = [OmittedReasoningContent()]
457
- else:
458
- self.logger.info("No reasoning content found.")
459
- reasoning = None
460
-
461
- persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
462
- tool_call,
463
- valid_tool_names,
464
- agent_state,
465
- tool_rules_solver,
466
- response.usage,
467
- reasoning_content=reasoning,
468
- step_id=step_id,
469
- initial_messages=initial_messages,
470
- agent_step_span=agent_step_span,
471
- is_final_step=(i == max_steps - 1),
472
- run_id=run_id,
473
- )
474
- new_message_idx = len(initial_messages) if initial_messages else 0
475
- self.response_messages.extend(persisted_messages[new_message_idx:])
476
- new_in_context_messages.extend(persisted_messages[new_message_idx:])
493
+ )
494
+ in_context_messages = current_in_context_messages + new_in_context_messages
477
495
 
478
- initial_messages = None
479
- log_event("agent.step.llm_response.processed") # [4^]
496
+ step_progression = StepProgression.RESPONSE_RECEIVED
497
+ log_event("agent.step.llm_response.received") # [3^]
480
498
 
481
- # log step time
482
- now = get_utc_timestamp_ns()
483
- step_ns = now - step_start
484
- agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
485
- agent_step_span.end()
499
+ response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
486
500
 
487
- # Log LLM Trace
488
- await self.telemetry_manager.create_provider_trace_async(
489
- actor=self.actor,
490
- provider_trace_create=ProviderTraceCreate(
491
- request_json=request_data,
492
- response_json=response_data,
501
+ usage.step_count += 1
502
+ usage.completion_tokens += response.usage.completion_tokens
503
+ usage.prompt_tokens += response.usage.prompt_tokens
504
+ usage.total_tokens += response.usage.total_tokens
505
+ usage.run_ids = [run_id] if run_id else None
506
+ MetricRegistry().message_output_tokens.record(
507
+ response.usage.completion_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
508
+ )
509
+
510
+ if not response.choices[0].message.tool_calls:
511
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
512
+ raise ValueError("No tool calls found in response, model must make a tool call")
513
+ tool_call = response.choices[0].message.tool_calls[0]
514
+ if response.choices[0].message.reasoning_content:
515
+ reasoning = [
516
+ ReasoningContent(
517
+ reasoning=response.choices[0].message.reasoning_content,
518
+ is_native=True,
519
+ signature=response.choices[0].message.reasoning_content_signature,
520
+ )
521
+ ]
522
+ elif response.choices[0].message.content:
523
+ reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
524
+ elif response.choices[0].message.omitted_reasoning_content:
525
+ reasoning = [OmittedReasoningContent()]
526
+ else:
527
+ self.logger.info("No reasoning content found.")
528
+ reasoning = None
529
+
530
+ persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
531
+ tool_call,
532
+ valid_tool_names,
533
+ agent_state,
534
+ tool_rules_solver,
535
+ response.usage,
536
+ reasoning_content=reasoning,
493
537
  step_id=step_id,
494
- organization_id=self.actor.organization_id,
495
- ),
496
- )
538
+ initial_messages=initial_messages,
539
+ agent_step_span=agent_step_span,
540
+ is_final_step=(i == max_steps - 1),
541
+ run_id=run_id,
542
+ )
543
+ step_progression = StepProgression.STEP_LOGGED
544
+
545
+ new_message_idx = len(initial_messages) if initial_messages else 0
546
+ self.response_messages.extend(persisted_messages[new_message_idx:])
547
+ new_in_context_messages.extend(persisted_messages[new_message_idx:])
548
+
549
+ initial_messages = None
550
+ log_event("agent.step.llm_response.processed") # [4^]
551
+
552
+ # log step time
553
+ now = get_utc_timestamp_ns()
554
+ step_ns = now - step_start
555
+ agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
556
+ agent_step_span.end()
557
+
558
+ # Log LLM Trace
559
+ await self.telemetry_manager.create_provider_trace_async(
560
+ actor=self.actor,
561
+ provider_trace_create=ProviderTraceCreate(
562
+ request_json=request_data,
563
+ response_json=response_data,
564
+ step_id=step_id,
565
+ organization_id=self.actor.organization_id,
566
+ ),
567
+ )
497
568
 
498
- MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
569
+ step_progression = StepProgression.LOGGED_TRACE
570
+ MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
571
+ step_progression = StepProgression.FINISHED
572
+
573
+ except Exception as e:
574
+ # Handle any unexpected errors during step processing
575
+ self.logger.error(f"Error during step processing: {e}")
576
+
577
+ # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
578
+ if not stop_reason:
579
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
580
+ elif stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
581
+ self.logger.error("Error occurred during step processing, with valid stop reason: %s", stop_reason.stop_reason)
582
+ elif stop_reason.stop_reason not in (StopReasonType.no_tool_call, StopReasonType.invalid_tool_call):
583
+ raise ValueError(f"Invalid Stop Reason: {stop_reason}")
584
+ raise
585
+
586
+ # Update step if it needs to be updated
587
+ finally:
588
+ if settings.track_stop_reason:
589
+ self.logger.info("Running final update. Step Progression: %s", step_progression)
590
+ try:
591
+ if step_progression < StepProgression.STEP_LOGGED:
592
+ await self.step_manager.log_step_async(
593
+ actor=self.actor,
594
+ agent_id=agent_state.id,
595
+ provider_name=agent_state.llm_config.model_endpoint_type,
596
+ provider_category=agent_state.llm_config.provider_category or "base",
597
+ model=agent_state.llm_config.model,
598
+ model_endpoint=agent_state.llm_config.model_endpoint,
599
+ context_window_limit=agent_state.llm_config.context_window,
600
+ usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
601
+ provider_id=None,
602
+ job_id=self.current_run_id if self.current_run_id else None,
603
+ step_id=step_id,
604
+ project_id=agent_state.project_id,
605
+ stop_reason=stop_reason,
606
+ )
607
+ if step_progression <= StepProgression.RESPONSE_RECEIVED:
608
+ # TODO (cliandy): persist response if we get it back
609
+ if settings.track_errored_messages:
610
+ for message in initial_messages:
611
+ message.is_err = True
612
+ message.step_id = step_id
613
+ await self.message_manager.create_many_messages_async(initial_messages, actor=self.actor)
614
+ elif step_progression <= StepProgression.LOGGED_TRACE:
615
+ if stop_reason is None:
616
+ self.logger.error("Error in step after logging step")
617
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
618
+ await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
619
+ elif step_progression == StepProgression.FINISHED and not should_continue:
620
+ if stop_reason is None:
621
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
622
+ await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
623
+ else:
624
+ self.logger.error("Invalid StepProgression value")
625
+ except Exception as e:
626
+ self.logger.error("Failed to update step: %s", e)
499
627
 
500
628
  if not should_continue:
501
629
  break
@@ -577,6 +705,7 @@ class LettaAgent(BaseAgent):
577
705
  request_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
578
706
 
579
707
  for i in range(max_steps):
708
+ step_id = generate_step_id()
580
709
  # Check for job cancellation at the start of each step
581
710
  if await self._check_run_cancellation():
582
711
  stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
@@ -584,163 +713,230 @@ class LettaAgent(BaseAgent):
584
713
  yield f"data: {stop_reason.model_dump_json()}\n\n"
585
714
  break
586
715
 
587
- step_id = generate_step_id()
588
716
  step_start = get_utc_timestamp_ns()
589
717
  agent_step_span = tracer.start_span("agent_step", start_time=step_start)
590
718
  agent_step_span.set_attributes({"step_id": step_id})
591
719
 
592
- (
593
- request_data,
594
- stream,
595
- current_in_context_messages,
596
- new_in_context_messages,
597
- valid_tool_names,
598
- provider_request_start_timestamp_ns,
599
- ) = await self._build_and_request_from_llm_streaming(
600
- first_chunk,
601
- agent_step_span,
602
- request_start_timestamp_ns,
603
- current_in_context_messages,
604
- new_in_context_messages,
605
- agent_state,
606
- llm_client,
607
- tool_rules_solver,
608
- )
609
- log_event("agent.stream.llm_response.received") # [3^]
610
-
611
- # TODO: THIS IS INCREDIBLY UGLY
612
- # TODO: THERE ARE MULTIPLE COPIES OF THE LLM_CONFIG EVERYWHERE THAT ARE GETTING MANIPULATED
613
- if agent_state.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock]:
614
- interface = AnthropicStreamingInterface(
615
- use_assistant_message=use_assistant_message,
616
- put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
617
- )
618
- elif agent_state.llm_config.model_endpoint_type == ProviderType.openai:
619
- interface = OpenAIStreamingInterface(
620
- use_assistant_message=use_assistant_message,
621
- put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
622
- )
623
- else:
624
- raise ValueError(f"Streaming not supported for {agent_state.llm_config}")
625
-
626
- async for chunk in interface.process(
627
- stream,
628
- ttft_span=request_span,
629
- provider_request_start_timestamp_ns=provider_request_start_timestamp_ns,
630
- ):
631
- # Measure time to first token
632
- if first_chunk and request_span is not None:
633
- now = get_utc_timestamp_ns()
634
- ttft_ns = now - request_start_timestamp_ns
635
- request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ns_to_ms(ttft_ns)})
636
- metric_attributes = get_ctx_attributes()
637
- metric_attributes["model.name"] = agent_state.llm_config.model
638
- MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
639
- first_chunk = False
640
-
641
- if include_return_message_types is None or chunk.message_type in include_return_message_types:
642
- # filter down returned data
643
- yield f"data: {chunk.model_dump_json()}\n\n"
644
-
645
- stream_end_time_ns = get_utc_timestamp_ns()
646
-
647
- # update usage
648
- usage.step_count += 1
649
- usage.completion_tokens += interface.output_tokens
650
- usage.prompt_tokens += interface.input_tokens
651
- usage.total_tokens += interface.input_tokens + interface.output_tokens
652
- MetricRegistry().message_output_tokens.record(
653
- interface.output_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
654
- )
655
-
656
- # log LLM request time
657
- llm_request_ms = ns_to_ms(stream_end_time_ns - provider_request_start_timestamp_ns)
658
- agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ms})
659
- MetricRegistry().llm_execution_time_ms_histogram.record(
660
- llm_request_ms,
661
- dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model}),
662
- )
663
-
664
- # Process resulting stream content
720
+ step_progression = StepProgression.START
721
+ should_continue = False
665
722
  try:
666
- tool_call = interface.get_tool_call_object()
667
- except ValueError as e:
668
- stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
669
- yield f"data: {stop_reason.model_dump_json()}\n\n"
670
- raise e
671
- except Exception as e:
672
- stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_tool_call.value)
673
- yield f"data: {stop_reason.model_dump_json()}\n\n"
674
- raise e
675
- reasoning_content = interface.get_reasoning_content()
676
- persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
677
- tool_call,
678
- valid_tool_names,
679
- agent_state,
680
- tool_rules_solver,
681
- UsageStatistics(
682
- completion_tokens=interface.output_tokens,
683
- prompt_tokens=interface.input_tokens,
684
- total_tokens=interface.input_tokens + interface.output_tokens,
685
- ),
686
- reasoning_content=reasoning_content,
687
- pre_computed_assistant_message_id=interface.letta_message_id,
688
- step_id=step_id,
689
- initial_messages=initial_messages,
690
- agent_step_span=agent_step_span,
691
- is_final_step=(i == max_steps - 1),
692
- )
693
- new_message_idx = len(initial_messages) if initial_messages else 0
694
- self.response_messages.extend(persisted_messages[new_message_idx:])
695
- new_in_context_messages.extend(persisted_messages[new_message_idx:])
723
+ (
724
+ request_data,
725
+ stream,
726
+ current_in_context_messages,
727
+ new_in_context_messages,
728
+ valid_tool_names,
729
+ provider_request_start_timestamp_ns,
730
+ ) = await self._build_and_request_from_llm_streaming(
731
+ first_chunk,
732
+ agent_step_span,
733
+ request_start_timestamp_ns,
734
+ current_in_context_messages,
735
+ new_in_context_messages,
736
+ agent_state,
737
+ llm_client,
738
+ tool_rules_solver,
739
+ )
696
740
 
697
- initial_messages = None
741
+ step_progression = StepProgression.STREAM_RECEIVED
742
+ log_event("agent.stream.llm_response.received") # [3^]
698
743
 
699
- # log total step time
700
- now = get_utc_timestamp_ns()
701
- step_ns = now - step_start
702
- agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
703
- agent_step_span.end()
744
+ # TODO: THIS IS INCREDIBLY UGLY
745
+ # TODO: THERE ARE MULTIPLE COPIES OF THE LLM_CONFIG EVERYWHERE THAT ARE GETTING MANIPULATED
746
+ if agent_state.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock]:
747
+ interface = AnthropicStreamingInterface(
748
+ use_assistant_message=use_assistant_message,
749
+ put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
750
+ )
751
+ elif agent_state.llm_config.model_endpoint_type == ProviderType.openai:
752
+ interface = OpenAIStreamingInterface(
753
+ use_assistant_message=use_assistant_message,
754
+ put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
755
+ )
756
+ else:
757
+ raise ValueError(f"Streaming not supported for {agent_state.llm_config}")
758
+
759
+ async for chunk in interface.process(
760
+ stream,
761
+ ttft_span=request_span,
762
+ provider_request_start_timestamp_ns=provider_request_start_timestamp_ns,
763
+ ):
764
+ # Measure time to first token
765
+ if first_chunk and request_span is not None:
766
+ now = get_utc_timestamp_ns()
767
+ ttft_ns = now - request_start_timestamp_ns
768
+ request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ns_to_ms(ttft_ns)})
769
+ metric_attributes = get_ctx_attributes()
770
+ metric_attributes["model.name"] = agent_state.llm_config.model
771
+ MetricRegistry().ttft_ms_histogram.record(ns_to_ms(ttft_ns), metric_attributes)
772
+ first_chunk = False
773
+
774
+ if include_return_message_types is None or chunk.message_type in include_return_message_types:
775
+ # filter down returned data
776
+ yield f"data: {chunk.model_dump_json()}\n\n"
777
+
778
+ stream_end_time_ns = get_utc_timestamp_ns()
779
+
780
+ # update usage
781
+ usage.step_count += 1
782
+ usage.completion_tokens += interface.output_tokens
783
+ usage.prompt_tokens += interface.input_tokens
784
+ usage.total_tokens += interface.input_tokens + interface.output_tokens
785
+ MetricRegistry().message_output_tokens.record(
786
+ interface.output_tokens, dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model})
787
+ )
704
788
 
705
- # TODO (cliandy): the stream POST request span has ended at this point, we should tie this to the stream
706
- # log_event("agent.stream.llm_response.processed") # [4^]
789
+ # log LLM request time
790
+ llm_request_ms = ns_to_ms(stream_end_time_ns - provider_request_start_timestamp_ns)
791
+ agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": llm_request_ms})
792
+ MetricRegistry().llm_execution_time_ms_histogram.record(
793
+ llm_request_ms,
794
+ dict(get_ctx_attributes(), **{"model.name": agent_state.llm_config.model}),
795
+ )
707
796
 
708
- # Log LLM Trace
709
- # TODO (cliandy): we are piecing together the streamed response here. Content here does not match the actual response schema.
710
- await self.telemetry_manager.create_provider_trace_async(
711
- actor=self.actor,
712
- provider_trace_create=ProviderTraceCreate(
713
- request_json=request_data,
714
- response_json={
715
- "content": {
716
- "tool_call": tool_call.model_dump_json(),
717
- "reasoning": [content.model_dump_json() for content in reasoning_content],
718
- },
719
- "id": interface.message_id,
720
- "model": interface.model,
721
- "role": "assistant",
722
- # "stop_reason": "",
723
- # "stop_sequence": None,
724
- "type": "message",
725
- "usage": {"input_tokens": interface.input_tokens, "output_tokens": interface.output_tokens},
726
- },
797
+ # Process resulting stream content
798
+ try:
799
+ tool_call = interface.get_tool_call_object()
800
+ except ValueError as e:
801
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
802
+ raise e
803
+ except Exception as e:
804
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_tool_call.value)
805
+ raise e
806
+ reasoning_content = interface.get_reasoning_content()
807
+ persisted_messages, should_continue, stop_reason = await self._handle_ai_response(
808
+ tool_call,
809
+ valid_tool_names,
810
+ agent_state,
811
+ tool_rules_solver,
812
+ UsageStatistics(
813
+ completion_tokens=interface.output_tokens,
814
+ prompt_tokens=interface.input_tokens,
815
+ total_tokens=interface.input_tokens + interface.output_tokens,
816
+ ),
817
+ reasoning_content=reasoning_content,
818
+ pre_computed_assistant_message_id=interface.letta_message_id,
727
819
  step_id=step_id,
728
- organization_id=self.actor.organization_id,
729
- ),
730
- )
820
+ initial_messages=initial_messages,
821
+ agent_step_span=agent_step_span,
822
+ is_final_step=(i == max_steps - 1),
823
+ )
824
+ step_progression = StepProgression.STEP_LOGGED
825
+
826
+ new_message_idx = len(initial_messages) if initial_messages else 0
827
+ self.response_messages.extend(persisted_messages[new_message_idx:])
828
+ new_in_context_messages.extend(persisted_messages[new_message_idx:])
829
+
830
+ initial_messages = None
831
+
832
+ # log total step time
833
+ now = get_utc_timestamp_ns()
834
+ step_ns = now - step_start
835
+ agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
836
+ agent_step_span.end()
837
+
838
+ # TODO (cliandy): the stream POST request span has ended at this point, we should tie this to the stream
839
+ # log_event("agent.stream.llm_response.processed") # [4^]
840
+
841
+ # Log LLM Trace
842
+ # We are piecing together the streamed response here.
843
+ # Content here does not match the actual response schema as streams come in chunks.
844
+ await self.telemetry_manager.create_provider_trace_async(
845
+ actor=self.actor,
846
+ provider_trace_create=ProviderTraceCreate(
847
+ request_json=request_data,
848
+ response_json={
849
+ "content": {
850
+ "tool_call": tool_call.model_dump_json(),
851
+ "reasoning": [content.model_dump_json() for content in reasoning_content],
852
+ },
853
+ "id": interface.message_id,
854
+ "model": interface.model,
855
+ "role": "assistant",
856
+ # "stop_reason": "",
857
+ # "stop_sequence": None,
858
+ "type": "message",
859
+ "usage": {
860
+ "input_tokens": interface.input_tokens,
861
+ "output_tokens": interface.output_tokens,
862
+ },
863
+ },
864
+ step_id=step_id,
865
+ organization_id=self.actor.organization_id,
866
+ ),
867
+ )
868
+ step_progression = StepProgression.LOGGED_TRACE
731
869
 
732
- tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
733
- if not (use_assistant_message and tool_return.name == "send_message"):
734
- # Apply message type filtering if specified
735
- if include_return_message_types is None or tool_return.message_type in include_return_message_types:
736
- yield f"data: {tool_return.model_dump_json()}\n\n"
870
+ # yields tool response as this is handled from Letta and not the response from the LLM provider
871
+ tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
872
+ if not (use_assistant_message and tool_return.name == "send_message"):
873
+ # Apply message type filtering if specified
874
+ if include_return_message_types is None or tool_return.message_type in include_return_message_types:
875
+ yield f"data: {tool_return.model_dump_json()}\n\n"
737
876
 
738
- # TODO (cliandy): consolidate and expand with trace
739
- MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
877
+ # TODO (cliandy): consolidate and expand with trace
878
+ MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
879
+ step_progression = StepProgression.FINISHED
880
+
881
+ except Exception as e:
882
+ # Handle any unexpected errors during step processing
883
+ self.logger.error(f"Error during step processing: {e}")
884
+
885
+ # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
886
+ if not stop_reason:
887
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
888
+ elif stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
889
+ self.logger.error("Error occurred during step processing, with valid stop reason: %s", stop_reason.stop_reason)
890
+ elif stop_reason.stop_reason not in (StopReasonType.no_tool_call, StopReasonType.invalid_tool_call):
891
+ raise ValueError(f"Invalid Stop Reason: {stop_reason}")
892
+
893
+ # Send error stop reason to client and re-raise with expected response code
894
+ yield f"data: {stop_reason.model_dump_json()}\n\n", 500
895
+ raise
896
+
897
+ # Update step if it needs to be updated
898
+ finally:
899
+ if settings.track_stop_reason:
900
+ self.logger.info("Running final update. Step Progression: %s", step_progression)
901
+ try:
902
+ if step_progression < StepProgression.STEP_LOGGED:
903
+ await self.step_manager.log_step_async(
904
+ actor=self.actor,
905
+ agent_id=agent_state.id,
906
+ provider_name=agent_state.llm_config.model_endpoint_type,
907
+ provider_category=agent_state.llm_config.provider_category or "base",
908
+ model=agent_state.llm_config.model,
909
+ model_endpoint=agent_state.llm_config.model_endpoint,
910
+ context_window_limit=agent_state.llm_config.context_window,
911
+ usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
912
+ provider_id=None,
913
+ job_id=self.current_run_id if self.current_run_id else None,
914
+ step_id=step_id,
915
+ project_id=agent_state.project_id,
916
+ stop_reason=stop_reason,
917
+ )
918
+ if step_progression <= StepProgression.STREAM_RECEIVED:
919
+ if first_chunk and settings.track_errored_messages:
920
+ for message in initial_messages:
921
+ message.is_err = True
922
+ message.step_id = step_id
923
+ await self.message_manager.create_many_messages_async(initial_messages, actor=self.actor)
924
+ elif step_progression <= StepProgression.LOGGED_TRACE:
925
+ if stop_reason is None:
926
+ self.logger.error("Error in step after logging step")
927
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
928
+ await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
929
+ elif step_progression == StepProgression.FINISHED and not should_continue:
930
+ if stop_reason is None:
931
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
932
+ await self.step_manager.update_step_stop_reason(self.actor, step_id, stop_reason.stop_reason)
933
+ else:
934
+ self.logger.error("Invalid StepProgression value")
935
+ except Exception as e:
936
+ self.logger.error("Failed to update step: %s", e)
740
937
 
741
938
  if not should_continue:
742
939
  break
743
-
744
940
  # Extend the in context message ids
745
941
  if not agent_state.message_buffer_autoclear:
746
942
  await self._rebuild_context_window(
@@ -919,7 +1115,7 @@ class LettaAgent(BaseAgent):
919
1115
  in_context_messages=in_context_messages,
920
1116
  new_letta_messages=new_letta_messages,
921
1117
  )
922
- await self.agent_manager.set_in_context_messages_async(
1118
+ await self.agent_manager.update_message_ids_async(
923
1119
  agent_id=self.agent_id,
924
1120
  message_ids=[m.id for m in new_in_context_messages],
925
1121
  actor=self.actor,
@@ -936,7 +1132,7 @@ class LettaAgent(BaseAgent):
936
1132
  new_in_context_messages, updated = await self.summarizer.summarize(
937
1133
  in_context_messages=in_context_messages, new_letta_messages=[], force=True
938
1134
  )
939
- return await self.agent_manager.set_in_context_messages_async(
1135
+ return await self.agent_manager.update_message_ids_async(
940
1136
  agent_id=self.agent_id, message_ids=[m.id for m in new_in_context_messages], actor=self.actor
941
1137
  )
942
1138
 
@@ -948,18 +1144,17 @@ class LettaAgent(BaseAgent):
948
1144
  agent_state: AgentState,
949
1145
  tool_rules_solver: ToolRulesSolver,
950
1146
  ) -> tuple[dict, list[str]]:
951
- self.num_messages, self.num_archival_memories = await asyncio.gather(
952
- (
953
- self.message_manager.size_async(actor=self.actor, agent_id=agent_state.id)
954
- if self.num_messages is None
955
- else asyncio.sleep(0, result=self.num_messages)
956
- ),
957
- (
958
- self.passage_manager.agent_passage_size_async(actor=self.actor, agent_id=agent_state.id)
959
- if self.num_archival_memories is None
960
- else asyncio.sleep(0, result=self.num_archival_memories)
961
- ),
962
- )
1147
+ if not self.num_messages:
1148
+ self.num_messages = await self.message_manager.size_async(
1149
+ agent_id=agent_state.id,
1150
+ actor=self.actor,
1151
+ )
1152
+ if not self.num_archival_memories:
1153
+ self.num_archival_memories = await self.passage_manager.agent_passage_size_async(
1154
+ agent_id=agent_state.id,
1155
+ actor=self.actor,
1156
+ )
1157
+
963
1158
  in_context_messages = await self._rebuild_memory_async(
964
1159
  in_context_messages,
965
1160
  agent_state,
@@ -1108,6 +1303,7 @@ class LettaAgent(BaseAgent):
1108
1303
  job_id=run_id if run_id else self.current_run_id,
1109
1304
  step_id=step_id,
1110
1305
  project_id=agent_state.project_id,
1306
+ stop_reason=stop_reason,
1111
1307
  )
1112
1308
 
1113
1309
  tool_call_messages = create_letta_messages_from_llm_response(