letta-nightly 0.12.1.dev20251024104217__py3-none-any.whl → 0.13.0.dev20251025104015__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (159) hide show
  1. letta/__init__.py +2 -3
  2. letta/adapters/letta_llm_adapter.py +1 -0
  3. letta/adapters/simple_llm_request_adapter.py +8 -5
  4. letta/adapters/simple_llm_stream_adapter.py +22 -6
  5. letta/agents/agent_loop.py +10 -3
  6. letta/agents/base_agent.py +4 -1
  7. letta/agents/helpers.py +41 -9
  8. letta/agents/letta_agent.py +11 -10
  9. letta/agents/letta_agent_v2.py +47 -37
  10. letta/agents/letta_agent_v3.py +395 -300
  11. letta/agents/voice_agent.py +8 -6
  12. letta/agents/voice_sleeptime_agent.py +3 -3
  13. letta/constants.py +30 -7
  14. letta/errors.py +20 -0
  15. letta/functions/function_sets/base.py +55 -3
  16. letta/functions/mcp_client/types.py +33 -57
  17. letta/functions/schema_generator.py +135 -23
  18. letta/groups/sleeptime_multi_agent_v3.py +6 -11
  19. letta/groups/sleeptime_multi_agent_v4.py +227 -0
  20. letta/helpers/converters.py +78 -4
  21. letta/helpers/crypto_utils.py +6 -2
  22. letta/interfaces/anthropic_parallel_tool_call_streaming_interface.py +9 -11
  23. letta/interfaces/anthropic_streaming_interface.py +3 -4
  24. letta/interfaces/gemini_streaming_interface.py +4 -6
  25. letta/interfaces/openai_streaming_interface.py +63 -28
  26. letta/llm_api/anthropic_client.py +7 -4
  27. letta/llm_api/deepseek_client.py +6 -4
  28. letta/llm_api/google_ai_client.py +3 -12
  29. letta/llm_api/google_vertex_client.py +1 -1
  30. letta/llm_api/helpers.py +90 -61
  31. letta/llm_api/llm_api_tools.py +4 -1
  32. letta/llm_api/openai.py +12 -12
  33. letta/llm_api/openai_client.py +53 -16
  34. letta/local_llm/constants.py +4 -3
  35. letta/local_llm/json_parser.py +5 -2
  36. letta/local_llm/utils.py +2 -3
  37. letta/log.py +171 -7
  38. letta/orm/agent.py +43 -9
  39. letta/orm/archive.py +4 -0
  40. letta/orm/custom_columns.py +15 -0
  41. letta/orm/identity.py +11 -11
  42. letta/orm/mcp_server.py +9 -0
  43. letta/orm/message.py +6 -1
  44. letta/orm/run_metrics.py +7 -2
  45. letta/orm/sqlalchemy_base.py +2 -2
  46. letta/orm/tool.py +3 -0
  47. letta/otel/tracing.py +2 -0
  48. letta/prompts/prompt_generator.py +7 -2
  49. letta/schemas/agent.py +41 -10
  50. letta/schemas/agent_file.py +3 -0
  51. letta/schemas/archive.py +4 -2
  52. letta/schemas/block.py +2 -1
  53. letta/schemas/enums.py +36 -3
  54. letta/schemas/file.py +3 -3
  55. letta/schemas/folder.py +2 -1
  56. letta/schemas/group.py +2 -1
  57. letta/schemas/identity.py +18 -9
  58. letta/schemas/job.py +3 -1
  59. letta/schemas/letta_message.py +71 -12
  60. letta/schemas/letta_request.py +7 -3
  61. letta/schemas/letta_stop_reason.py +0 -25
  62. letta/schemas/llm_config.py +8 -2
  63. letta/schemas/mcp.py +80 -83
  64. letta/schemas/mcp_server.py +349 -0
  65. letta/schemas/memory.py +20 -8
  66. letta/schemas/message.py +212 -67
  67. letta/schemas/providers/anthropic.py +13 -6
  68. letta/schemas/providers/azure.py +6 -4
  69. letta/schemas/providers/base.py +8 -4
  70. letta/schemas/providers/bedrock.py +6 -2
  71. letta/schemas/providers/cerebras.py +7 -3
  72. letta/schemas/providers/deepseek.py +2 -1
  73. letta/schemas/providers/google_gemini.py +15 -6
  74. letta/schemas/providers/groq.py +2 -1
  75. letta/schemas/providers/lmstudio.py +9 -6
  76. letta/schemas/providers/mistral.py +2 -1
  77. letta/schemas/providers/openai.py +7 -2
  78. letta/schemas/providers/together.py +9 -3
  79. letta/schemas/providers/xai.py +7 -3
  80. letta/schemas/run.py +7 -2
  81. letta/schemas/run_metrics.py +2 -1
  82. letta/schemas/sandbox_config.py +2 -2
  83. letta/schemas/secret.py +3 -158
  84. letta/schemas/source.py +2 -2
  85. letta/schemas/step.py +2 -2
  86. letta/schemas/tool.py +24 -1
  87. letta/schemas/usage.py +0 -1
  88. letta/server/rest_api/app.py +123 -7
  89. letta/server/rest_api/dependencies.py +3 -0
  90. letta/server/rest_api/interface.py +7 -4
  91. letta/server/rest_api/redis_stream_manager.py +16 -1
  92. letta/server/rest_api/routers/v1/__init__.py +7 -0
  93. letta/server/rest_api/routers/v1/agents.py +332 -322
  94. letta/server/rest_api/routers/v1/archives.py +127 -40
  95. letta/server/rest_api/routers/v1/blocks.py +54 -6
  96. letta/server/rest_api/routers/v1/chat_completions.py +146 -0
  97. letta/server/rest_api/routers/v1/folders.py +27 -35
  98. letta/server/rest_api/routers/v1/groups.py +23 -35
  99. letta/server/rest_api/routers/v1/identities.py +24 -10
  100. letta/server/rest_api/routers/v1/internal_runs.py +107 -0
  101. letta/server/rest_api/routers/v1/internal_templates.py +162 -179
  102. letta/server/rest_api/routers/v1/jobs.py +15 -27
  103. letta/server/rest_api/routers/v1/mcp_servers.py +309 -0
  104. letta/server/rest_api/routers/v1/messages.py +23 -34
  105. letta/server/rest_api/routers/v1/organizations.py +6 -27
  106. letta/server/rest_api/routers/v1/providers.py +35 -62
  107. letta/server/rest_api/routers/v1/runs.py +30 -43
  108. letta/server/rest_api/routers/v1/sandbox_configs.py +6 -4
  109. letta/server/rest_api/routers/v1/sources.py +26 -42
  110. letta/server/rest_api/routers/v1/steps.py +16 -29
  111. letta/server/rest_api/routers/v1/tools.py +17 -13
  112. letta/server/rest_api/routers/v1/users.py +5 -17
  113. letta/server/rest_api/routers/v1/voice.py +18 -27
  114. letta/server/rest_api/streaming_response.py +5 -2
  115. letta/server/rest_api/utils.py +187 -25
  116. letta/server/server.py +27 -22
  117. letta/server/ws_api/server.py +5 -4
  118. letta/services/agent_manager.py +148 -26
  119. letta/services/agent_serialization_manager.py +6 -1
  120. letta/services/archive_manager.py +168 -15
  121. letta/services/block_manager.py +14 -4
  122. letta/services/file_manager.py +33 -29
  123. letta/services/group_manager.py +10 -0
  124. letta/services/helpers/agent_manager_helper.py +65 -11
  125. letta/services/identity_manager.py +105 -4
  126. letta/services/job_manager.py +11 -1
  127. letta/services/mcp/base_client.py +2 -2
  128. letta/services/mcp/oauth_utils.py +33 -8
  129. letta/services/mcp_manager.py +174 -78
  130. letta/services/mcp_server_manager.py +1331 -0
  131. letta/services/message_manager.py +109 -4
  132. letta/services/organization_manager.py +4 -4
  133. letta/services/passage_manager.py +9 -25
  134. letta/services/provider_manager.py +91 -15
  135. letta/services/run_manager.py +72 -15
  136. letta/services/sandbox_config_manager.py +45 -3
  137. letta/services/source_manager.py +15 -8
  138. letta/services/step_manager.py +24 -1
  139. letta/services/streaming_service.py +581 -0
  140. letta/services/summarizer/summarizer.py +1 -1
  141. letta/services/tool_executor/core_tool_executor.py +111 -0
  142. letta/services/tool_executor/files_tool_executor.py +5 -3
  143. letta/services/tool_executor/sandbox_tool_executor.py +2 -2
  144. letta/services/tool_executor/tool_execution_manager.py +1 -1
  145. letta/services/tool_manager.py +10 -3
  146. letta/services/tool_sandbox/base.py +61 -1
  147. letta/services/tool_sandbox/local_sandbox.py +1 -3
  148. letta/services/user_manager.py +2 -2
  149. letta/settings.py +49 -5
  150. letta/system.py +14 -5
  151. letta/utils.py +73 -1
  152. letta/validators.py +105 -0
  153. {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251025104015.dist-info}/METADATA +4 -2
  154. {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251025104015.dist-info}/RECORD +157 -151
  155. letta/schemas/letta_ping.py +0 -28
  156. letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
  157. {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251025104015.dist-info}/WHEEL +0 -0
  158. {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251025104015.dist-info}/entry_points.txt +0 -0
  159. {letta_nightly-0.12.1.dev20251024104217.dist-info → letta_nightly-0.13.0.dev20251025104015.dist-info}/licenses/LICENSE +0 -0
@@ -1,5 +1,7 @@
1
+ import asyncio
2
+ import json
1
3
  import uuid
2
- from typing import AsyncGenerator, Optional
4
+ from typing import Any, AsyncGenerator, Dict, Optional
3
5
 
4
6
  from opentelemetry.trace import Span
5
7
 
@@ -10,6 +12,7 @@ from letta.agents.helpers import (
10
12
  _build_rule_violation_result,
11
13
  _load_last_function_response,
12
14
  _maybe_get_approval_messages,
15
+ _maybe_get_pending_tool_call_message,
13
16
  _prepare_in_context_messages_no_persist_async,
14
17
  _safe_load_tool_call_str,
15
18
  generate_step_id,
@@ -19,27 +22,36 @@ from letta.agents.letta_agent_v2 import LettaAgentV2
19
22
  from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX, REQUEST_HEARTBEAT_PARAM
20
23
  from letta.errors import ContextWindowExceededError, LLMError
21
24
  from letta.helpers import ToolRulesSolver
22
- from letta.helpers.datetime_helpers import get_utc_timestamp_ns
25
+ from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns
23
26
  from letta.helpers.tool_execution_helper import enable_strict_mode
24
27
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG
25
28
  from letta.otel.tracing import trace_method
26
29
  from letta.schemas.agent import AgentState
27
- from letta.schemas.letta_message import LettaMessage, MessageType
30
+ from letta.schemas.enums import MessageRole
31
+ from letta.schemas.letta_message import ApprovalReturn, LettaMessage, MessageType
28
32
  from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
29
33
  from letta.schemas.letta_response import LettaResponse
30
34
  from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
31
- from letta.schemas.message import Message, MessageCreate
32
- from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
35
+ from letta.schemas.message import Message, MessageCreate, ToolReturn
36
+ from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall, UsageStatistics
33
37
  from letta.schemas.step import StepProgression
34
38
  from letta.schemas.step_metrics import StepMetrics
35
39
  from letta.schemas.tool_execution_result import ToolExecutionResult
36
- from letta.server.rest_api.utils import create_approval_request_message_from_llm_response, create_letta_messages_from_llm_response
40
+ from letta.server.rest_api.utils import (
41
+ create_approval_request_message_from_llm_response,
42
+ create_letta_messages_from_llm_response,
43
+ create_parallel_tool_messages_from_llm_response,
44
+ )
37
45
  from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema
38
46
  from letta.settings import settings, summarizer_settings
39
47
  from letta.system import package_function_response
40
48
  from letta.utils import log_telemetry, validate_function_response
41
49
 
42
50
 
51
+ class ToolCallDenial(ToolCall):
52
+ reason: Optional[str] = None
53
+
54
+
43
55
  class LettaAgentV3(LettaAgentV2):
44
56
  """
45
57
  Similar to V2, but stripped down / simplified, while also generalized:
@@ -212,10 +224,10 @@ class LettaAgentV3(LettaAgentV2):
212
224
  force=False,
213
225
  )
214
226
 
215
- except:
216
- if self.stop_reason and not first_chunk:
217
- yield f"data: {self.stop_reason.model_dump_json()}\n\n"
218
- raise
227
+ except Exception as e:
228
+ self.logger.warning(f"Error during agent stream: {e}", exc_info=True)
229
+ if first_chunk:
230
+ raise # only raise if first chunk has not been streamed yet
219
231
 
220
232
  if run_id:
221
233
  letta_messages = Message.to_letta_messages_from_list(
@@ -248,6 +260,7 @@ class LettaAgentV3(LettaAgentV2):
248
260
  request_start_timestamp_ns: int | None = None,
249
261
  remaining_turns: int = -1,
250
262
  dry_run: bool = False,
263
+ enforce_run_id_set: bool = True,
251
264
  ) -> AsyncGenerator[LettaMessage | dict, None]:
252
265
  """
253
266
  Execute a single agent step (one LLM call and tool execution).
@@ -269,9 +282,12 @@ class LettaAgentV3(LettaAgentV2):
269
282
  Yields:
270
283
  LettaMessage or dict: Chunks for streaming mode, or request data for dry_run
271
284
  """
285
+ if enforce_run_id_set and run_id is None:
286
+ raise AssertionError("run_id is required when enforce_run_id_set is True")
287
+
272
288
  step_progression = StepProgression.START
273
289
  # TODO(@caren): clean this up
274
- tool_call, content, agent_step_span, first_chunk, step_id, logged_step, step_start_ns, step_metrics = (
290
+ tool_calls, content, agent_step_span, first_chunk, step_id, logged_step, step_start_ns, step_metrics = (
275
291
  None,
276
292
  None,
277
293
  None,
@@ -294,14 +310,38 @@ class LettaAgentV3(LettaAgentV2):
294
310
  self._require_tool_call = require_tool_call
295
311
 
296
312
  approval_request, approval_response = _maybe_get_approval_messages(messages)
313
+ tool_call_denials, tool_returns = [], []
297
314
  if approval_request and approval_response:
298
- tool_call = approval_request.tool_calls[0]
299
315
  content = approval_request.content
316
+
317
+ # Get tool calls that are pending
318
+ backfill_tool_call_id = approval_request.tool_calls[0].id # legacy case
319
+ approved_tool_call_ids = {
320
+ backfill_tool_call_id if a.tool_call_id.startswith("message-") else a.tool_call_id
321
+ for a in approval_response.approvals
322
+ if isinstance(a, ApprovalReturn) and a.approve
323
+ }
324
+ tool_calls = [tool_call for tool_call in approval_request.tool_calls if tool_call.id in approved_tool_call_ids]
325
+ pending_tool_call_message = _maybe_get_pending_tool_call_message(messages)
326
+ if pending_tool_call_message:
327
+ tool_calls.extend(pending_tool_call_message.tool_calls)
328
+
329
+ # Get tool calls that were denied
330
+ denies = {d.tool_call_id: d for d in approval_response.approvals if isinstance(d, ApprovalReturn) and not d.approve}
331
+ tool_call_denials = [
332
+ ToolCallDenial(**t.model_dump(), reason=denies.get(t.id).reason) for t in approval_request.tool_calls if t.id in denies
333
+ ]
334
+
335
+ # Get tool calls that were executed client side
336
+ if approval_response.approvals:
337
+ tool_returns = [r for r in approval_response.approvals if isinstance(r, ToolReturn)]
338
+
300
339
  step_id = approval_request.step_id
301
340
  step_metrics = await self.step_manager.get_step_metrics_async(step_id=step_id, actor=self.actor)
302
341
  else:
303
342
  # Check for job cancellation at the start of each step
304
343
  if run_id and await self._check_run_cancellation(run_id):
344
+ self.should_continue = False
305
345
  self.stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
306
346
  self.logger.info(f"Agent execution cancelled for run {run_id}")
307
347
  return
@@ -323,6 +363,27 @@ class LettaAgentV3(LettaAgentV2):
323
363
  force_tool_call=force_tool_call,
324
364
  requires_subsequent_tool_call=self._require_tool_call,
325
365
  )
366
+ # TODO: Extend to more providers, and also approval tool rules
367
+ # Enable Anthropic parallel tool use when no tool rules are attached
368
+ try:
369
+ if self.agent_state.llm_config.model_endpoint_type in ["anthropic", "bedrock"]:
370
+ no_tool_rules = (
371
+ not self.agent_state.tool_rules
372
+ or len([t for t in self.agent_state.tool_rules if t.type != "requires_approval"]) == 0
373
+ )
374
+ if (
375
+ isinstance(request_data.get("tool_choice"), dict)
376
+ and "disable_parallel_tool_use" in request_data["tool_choice"]
377
+ ):
378
+ # Gate parallel tool use on both: no tool rules and toggled on
379
+ if no_tool_rules and self.agent_state.llm_config.parallel_tool_calls:
380
+ request_data["tool_choice"]["disable_parallel_tool_use"] = False
381
+ else:
382
+ # Explicitly disable when tool rules present or llm_config toggled off
383
+ request_data["tool_choice"]["disable_parallel_tool_use"] = True
384
+ except Exception:
385
+ # if this fails, we simply don't enable parallel tool use
386
+ pass
326
387
  if dry_run:
327
388
  yield request_data
328
389
  return
@@ -363,6 +424,7 @@ class LettaAgentV3(LettaAgentV2):
363
424
  force=True,
364
425
  )
365
426
  else:
427
+ self.stop_reason = LettaStopReason(stop_reason=StopReasonType.llm_api_error.value)
366
428
  raise e
367
429
 
368
430
  step_progression, step_metrics = self._step_checkpoint_llm_request_finish(
@@ -371,12 +433,16 @@ class LettaAgentV3(LettaAgentV2):
371
433
 
372
434
  self._update_global_usage_stats(llm_adapter.usage)
373
435
 
374
- # Handle the AI response with the extracted data
375
- # NOTE: in v3 loop, no tool call is OK
376
- # if tool_call is None and llm_adapter.tool_call is None:
436
+ # Handle the AI response with the extracted data (supports multiple tool calls)
437
+ # Gather tool calls. Approval paths specify a single tool call.
438
+ if hasattr(llm_adapter, "tool_calls") and llm_adapter.tool_calls:
439
+ tool_calls = llm_adapter.tool_calls
440
+ elif llm_adapter.tool_call is not None:
441
+ tool_calls = [llm_adapter.tool_call]
377
442
 
443
+ aggregated_persisted: list[Message] = []
378
444
  persisted_messages, self.should_continue, self.stop_reason = await self._handle_ai_response(
379
- tool_call=tool_call or llm_adapter.tool_call,
445
+ tool_calls=tool_calls,
380
446
  valid_tool_names=[tool["name"] for tool in valid_tools],
381
447
  agent_state=self.agent_state,
382
448
  tool_rules_solver=self.tool_rules_solver,
@@ -385,7 +451,6 @@ class LettaAgentV3(LettaAgentV2):
385
451
  prompt_tokens=self.usage.prompt_tokens,
386
452
  total_tokens=self.usage.total_tokens,
387
453
  ),
388
- # reasoning_content=reasoning_content or llm_adapter.reasoning_content,
389
454
  content=content or llm_adapter.content,
390
455
  pre_computed_assistant_message_id=llm_adapter.message_id,
391
456
  step_id=step_id,
@@ -394,26 +459,28 @@ class LettaAgentV3(LettaAgentV2):
394
459
  is_final_step=(remaining_turns == 0),
395
460
  run_id=run_id,
396
461
  step_metrics=step_metrics,
397
- is_approval=approval_response.approve if approval_response is not None else False,
398
- is_denial=(approval_response.approve == False) if approval_response is not None else False,
399
- denial_reason=approval_response.denial_reason if approval_response is not None else None,
462
+ is_approval_response=approval_response is not None,
463
+ tool_call_denials=tool_call_denials,
464
+ tool_returns=tool_returns,
400
465
  )
466
+ aggregated_persisted.extend(persisted_messages)
401
467
  # NOTE: there is an edge case where persisted_messages is empty (the LLM did a "no-op")
402
468
 
403
469
  new_message_idx = len(input_messages_to_persist) if input_messages_to_persist else 0
404
- self.response_messages.extend(persisted_messages[new_message_idx:])
470
+ self.response_messages.extend(aggregated_persisted[new_message_idx:])
405
471
 
406
472
  if llm_adapter.supports_token_streaming():
407
- # Stream the tool return if a tool was actually executed.
408
- # In the normal streaming path, the tool call is surfaced via the streaming interface
409
- # (llm_adapter.tool_call), so don't rely solely on the local `tool_call` variable.
410
- has_tool_return = any(m.role == "tool" for m in persisted_messages)
411
- if len(persisted_messages) > 0 and persisted_messages[-1].role != "approval" and has_tool_return:
412
- tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
413
- if include_return_message_types is None or tool_return.message_type in include_return_message_types:
414
- yield tool_return
473
+ # Stream each tool return if tools were executed
474
+ response_tool_returns = [msg for msg in aggregated_persisted if msg.role == "tool"]
475
+ for tr in response_tool_returns:
476
+ # Skip streaming for aggregated parallel tool returns (no per-call tool_call_id)
477
+ if tr.tool_call_id is None and tr.tool_returns:
478
+ continue
479
+ tool_return_letta = tr.to_letta_messages()[0]
480
+ if include_return_message_types is None or tool_return_letta.message_type in include_return_message_types:
481
+ yield tool_return_letta
415
482
  else:
416
- filter_user_messages = [m for m in persisted_messages[new_message_idx:] if m.role != "user"]
483
+ filter_user_messages = [m for m in aggregated_persisted[new_message_idx:] if m.role != "user"]
417
484
  letta_messages = Message.to_letta_messages_from_list(
418
485
  filter_user_messages,
419
486
  use_assistant_message=False, # NOTE: set to false
@@ -439,25 +506,21 @@ class LettaAgentV3(LettaAgentV2):
439
506
  # TODO should we be logging this even if persisted_messages is empty? Technically, there still was an LLM call
440
507
  step_progression, step_metrics = await self._step_checkpoint_finish(step_metrics, agent_step_span, logged_step)
441
508
  except Exception as e:
442
- import traceback
443
-
444
- self.logger.error(f"Error during step processing: {e}")
445
- self.logger.error(f"Error traceback: {traceback.format_exc()}")
446
- # self.logger.error(f"Error during step processing: {e}")
509
+ self.logger.warning(f"Error during step processing: {e}")
447
510
  self.job_update_metadata = {"error": str(e)}
448
511
 
449
512
  # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
450
513
  if not self.stop_reason:
451
514
  self.stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
452
515
  elif self.stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
453
- self.logger.error("Error occurred during step processing, with valid stop reason: %s", self.stop_reason.stop_reason)
516
+ self.logger.warning("Error occurred during step processing, with valid stop reason: %s", self.stop_reason.stop_reason)
454
517
  elif self.stop_reason.stop_reason not in (
455
518
  StopReasonType.no_tool_call,
456
519
  StopReasonType.invalid_tool_call,
457
520
  StopReasonType.invalid_llm_response,
458
521
  StopReasonType.llm_api_error,
459
522
  ):
460
- self.logger.error("Error occurred during step processing, with unexpected stop reason: %s", self.stop_reason.stop_reason)
523
+ self.logger.warning("Error occurred during step processing, with unexpected stop reason: %s", self.stop_reason.stop_reason)
461
524
  raise e
462
525
  finally:
463
526
  self.logger.debug("Running cleanup for agent loop run: %s", run_id)
@@ -498,12 +561,12 @@ class LettaAgentV3(LettaAgentV2):
498
561
  )
499
562
  elif step_progression <= StepProgression.LOGGED_TRACE:
500
563
  if self.stop_reason is None:
501
- self.logger.error("Error in step after logging step")
564
+ self.logger.warning("Error in step after logging step")
502
565
  self.stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
503
566
  if logged_step:
504
567
  await self.step_manager.update_step_stop_reason(self.actor, step_id, self.stop_reason.stop_reason)
505
568
  else:
506
- self.logger.error("Invalid StepProgression value")
569
+ self.logger.warning("Invalid StepProgression value")
507
570
 
508
571
  # Do tracking for failure cases. Can consolidate with success conditions later.
509
572
  if settings.track_stop_reason:
@@ -520,17 +583,15 @@ class LettaAgentV3(LettaAgentV2):
520
583
  run_id=run_id,
521
584
  )
522
585
  except Exception as e:
523
- self.logger.error(f"Error during post-completion step tracking: {e}")
586
+ self.logger.warning(f"Error during post-completion step tracking: {e}")
524
587
 
525
588
  @trace_method
526
589
  async def _handle_ai_response(
527
590
  self,
528
- tool_call: Optional[ToolCall], # NOTE: should only be None for react agents
529
591
  valid_tool_names: list[str],
530
592
  agent_state: AgentState,
531
593
  tool_rules_solver: ToolRulesSolver,
532
594
  usage: UsageStatistics,
533
- # reasoning_content: list[TextContent | ReasoningContent | RedactedReasoningContent | OmittedReasoningContent] | None = None,
534
595
  content: list[TextContent | ReasoningContent | RedactedReasoningContent | OmittedReasoningContent] | None = None,
535
596
  pre_computed_assistant_message_id: str | None = None,
536
597
  step_id: str | None = None,
@@ -539,306 +600,340 @@ class LettaAgentV3(LettaAgentV2):
539
600
  is_final_step: bool | None = None,
540
601
  run_id: str | None = None,
541
602
  step_metrics: StepMetrics = None,
542
- is_approval: bool | None = None,
543
- is_denial: bool | None = None,
544
- denial_reason: str | None = None,
603
+ is_approval_response: bool | None = None,
604
+ tool_calls: list[ToolCall] = [],
605
+ tool_call_denials: list[ToolCallDenial] = [],
606
+ tool_returns: list[ToolReturn] = [],
545
607
  ) -> tuple[list[Message], bool, LettaStopReason | None]:
546
608
  """
547
- Handle the final AI response once streaming completes, execute / validate the
548
- tool call, decide whether we should keep stepping, and persist state.
549
- """
550
- if tool_call is None:
551
- # NOTE: in v3 loop, no tool call is OK
552
- tool_call_id = None
553
- else:
554
- tool_call_id: str = tool_call.id or f"call_{uuid.uuid4().hex[:8]}"
609
+ Handle the final AI response once streaming completes, execute / validate tool calls,
610
+ decide whether we should keep stepping, and persist state.
555
611
 
556
- if is_denial:
557
- continue_stepping = True
558
- stop_reason = None
559
- tool_call_messages = create_letta_messages_from_llm_response(
560
- agent_id=agent_state.id,
561
- model=agent_state.llm_config.model,
562
- function_name=tool_call.function.name,
563
- function_arguments={},
564
- tool_execution_result=ToolExecutionResult(status="error"),
565
- tool_call_id=tool_call_id,
566
- function_response=f"Error: request to call tool denied. User reason: {denial_reason}",
567
- timezone=agent_state.timezone,
568
- continue_stepping=continue_stepping,
569
- # NOTE: we may need to change this to not have a "heartbeat" prefix for v3?
570
- heartbeat_reason=f"{NON_USER_MSG_PREFIX}Continuing: user denied request to call tool.",
571
- reasoning_content=None,
572
- pre_computed_assistant_message_id=None,
573
- step_id=step_id,
574
- run_id=run_id,
575
- is_approval_response=True,
576
- force_set_request_heartbeat=False,
577
- add_heartbeat_on_continue=False,
578
- )
579
- messages_to_persist = (initial_messages or []) + tool_call_messages
580
-
581
- # Set run_id on all messages before persisting
582
- for message in messages_to_persist:
583
- if message.run_id is None:
584
- message.run_id = run_id
585
-
586
- persisted_messages = await self.message_manager.create_many_messages_async(
587
- messages_to_persist,
588
- actor=self.actor,
589
- run_id=run_id,
590
- project_id=agent_state.project_id,
591
- template_id=agent_state.template_id,
592
- )
593
- return persisted_messages, continue_stepping, stop_reason
612
+ Unified approach: treats single and multi-tool calls uniformly to reduce code duplication.
613
+ """
614
+ # 1. Handle no-tool cases (content-only or no-op)
615
+ if not tool_calls and not tool_call_denials and not tool_returns:
616
+ # Case 1a: No tool call, no content (LLM no-op)
617
+ if content is None or len(content) == 0:
618
+ # Check if there are required-before-exit tools that haven't been called
619
+ uncalled = tool_rules_solver.get_uncalled_required_tools(available_tools=set([t.name for t in agent_state.tools]))
620
+ if uncalled:
621
+ heartbeat_reason = (
622
+ f"{NON_USER_MSG_PREFIX}ToolRuleViolated: You must call {', '.join(uncalled)} at least once to exit the loop."
623
+ )
624
+ from letta.server.rest_api.utils import create_heartbeat_system_message
625
+
626
+ heartbeat_msg = create_heartbeat_system_message(
627
+ agent_id=agent_state.id,
628
+ model=agent_state.llm_config.model,
629
+ function_call_success=True,
630
+ timezone=agent_state.timezone,
631
+ heartbeat_reason=heartbeat_reason,
632
+ run_id=run_id,
633
+ )
634
+ messages_to_persist = (initial_messages or []) + [heartbeat_msg]
635
+ continue_stepping, stop_reason = True, None
636
+ else:
637
+ # No required tools remaining, end turn without persisting no-op
638
+ continue_stepping = False
639
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
640
+ messages_to_persist = initial_messages or []
594
641
 
595
- # -1. no tool call, no content
596
- if tool_call is None and (content is None or len(content) == 0):
597
- # Edge case is when there's also no content - basically, the LLM "no-op'd"
598
- # If RequiredBeforeExitToolRule exists and not all required tools have been called,
599
- # inject a rule-violation heartbeat to keep looping and inform the model.
600
- uncalled = tool_rules_solver.get_uncalled_required_tools(available_tools=set([t.name for t in agent_state.tools]))
601
- if uncalled:
602
- # TODO: we may need to change this to not have a "heartbeat" prefix for v3?
603
- heartbeat_reason = (
604
- f"{NON_USER_MSG_PREFIX}ToolRuleViolated: You must call {', '.join(uncalled)} at least once to exit the loop."
642
+ # Case 1b: No tool call but has content
643
+ else:
644
+ continue_stepping, heartbeat_reason, stop_reason = self._decide_continuation(
645
+ agent_state=agent_state,
646
+ tool_call_name=None,
647
+ tool_rule_violated=False,
648
+ tool_rules_solver=tool_rules_solver,
649
+ is_final_step=is_final_step,
605
650
  )
606
- from letta.server.rest_api.utils import create_heartbeat_system_message
607
-
608
- heartbeat_msg = create_heartbeat_system_message(
651
+ assistant_message = create_letta_messages_from_llm_response(
609
652
  agent_id=agent_state.id,
610
653
  model=agent_state.llm_config.model,
611
- function_call_success=True,
654
+ function_name=None,
655
+ function_arguments=None,
656
+ tool_execution_result=None,
657
+ tool_call_id=None,
658
+ function_response=None,
612
659
  timezone=agent_state.timezone,
660
+ continue_stepping=continue_stepping,
613
661
  heartbeat_reason=heartbeat_reason,
662
+ reasoning_content=content,
663
+ pre_computed_assistant_message_id=pre_computed_assistant_message_id,
664
+ step_id=step_id,
614
665
  run_id=run_id,
666
+ is_approval_response=is_approval_response,
667
+ force_set_request_heartbeat=False,
668
+ add_heartbeat_on_continue=bool(heartbeat_reason),
615
669
  )
616
- messages_to_persist = (initial_messages or []) + [heartbeat_msg]
617
- continue_stepping, stop_reason = True, None
618
- else:
619
- # In this case, we actually do not want to persist the no-op message
620
- continue_stepping, heartbeat_reason, stop_reason = False, None, LettaStopReason(stop_reason=StopReasonType.end_turn.value)
621
- messages_to_persist = initial_messages or []
622
-
623
- # 0. If there's no tool call, we can early exit
624
- elif tool_call is None:
625
- # TODO could just hardcode the line here instead of calling the function...
626
- continue_stepping, heartbeat_reason, stop_reason = self._decide_continuation(
627
- agent_state=agent_state,
628
- tool_call_name=None,
629
- tool_rule_violated=False,
630
- tool_rules_solver=tool_rules_solver,
631
- is_final_step=is_final_step,
632
- )
633
- assistant_message = create_letta_messages_from_llm_response(
634
- agent_id=agent_state.id,
635
- model=agent_state.llm_config.model,
636
- function_name=None,
637
- function_arguments=None,
638
- tool_execution_result=None,
639
- tool_call_id=None,
640
- function_response=None,
641
- timezone=agent_state.timezone,
642
- continue_stepping=continue_stepping,
643
- heartbeat_reason=heartbeat_reason,
644
- # NOTE: should probably rename this to `content`?
645
- reasoning_content=content,
646
- pre_computed_assistant_message_id=pre_computed_assistant_message_id,
647
- step_id=step_id,
648
- run_id=run_id,
649
- is_approval_response=is_approval or is_denial,
650
- force_set_request_heartbeat=False,
651
- # If we're continuing due to a required-before-exit rule, include a heartbeat to guide the model
652
- add_heartbeat_on_continue=bool(heartbeat_reason),
653
- )
654
- messages_to_persist = (initial_messages or []) + assistant_message
670
+ messages_to_persist = (initial_messages or []) + assistant_message
655
671
 
656
- else:
657
- # 1. Parse and validate the tool-call envelope
658
- tool_call_name: str = tool_call.function.name
659
-
660
- tool_args = _safe_load_tool_call_str(tool_call.function.arguments)
661
- # NOTE: these are failsafes - for v3, we should eventually be able to remove these
662
- # request_heartbeat: bool = _pop_heartbeat(tool_args)
663
- tool_args.pop(REQUEST_HEARTBEAT_PARAM, None)
664
- tool_args.pop(INNER_THOUGHTS_KWARG, None)
665
-
666
- log_telemetry(
667
- self.logger,
668
- "_handle_ai_response execute tool start",
669
- tool_name=tool_call_name,
670
- tool_args=tool_args,
671
- tool_call_id=tool_call_id,
672
- # request_heartbeat=request_heartbeat,
672
+ # Persist messages for no-tool cases
673
+ for message in messages_to_persist:
674
+ if message.run_id is None:
675
+ message.run_id = run_id
676
+
677
+ persisted_messages = await self.message_manager.create_many_messages_async(
678
+ messages_to_persist, actor=self.actor, run_id=run_id, project_id=agent_state.project_id, template_id=agent_state.template_id
673
679
  )
680
+ return persisted_messages, continue_stepping, stop_reason
674
681
 
675
- if not is_approval and tool_rules_solver.is_requires_approval_tool(tool_call_name):
676
- approval_message = create_approval_request_message_from_llm_response(
682
+ # 2. Check whether tool call requires approval
683
+ if not is_approval_response:
684
+ requested_tool_calls = [t for t in tool_calls if tool_rules_solver.is_requires_approval_tool(t.function.name)]
685
+ allowed_tool_calls = [t for t in tool_calls if not tool_rules_solver.is_requires_approval_tool(t.function.name)]
686
+ if requested_tool_calls:
687
+ approval_messages = create_approval_request_message_from_llm_response(
677
688
  agent_id=agent_state.id,
678
689
  model=agent_state.llm_config.model,
679
- function_name=tool_call_name,
680
- function_arguments=tool_args,
681
- tool_call_id=tool_call_id,
682
- actor=self.actor,
683
- # continue_stepping=request_heartbeat,
684
- continue_stepping=True,
685
- # reasoning_content=reasoning_content,
690
+ requested_tool_calls=requested_tool_calls,
691
+ allowed_tool_calls=allowed_tool_calls,
686
692
  reasoning_content=content,
687
693
  pre_computed_assistant_message_id=pre_computed_assistant_message_id,
688
694
  step_id=step_id,
689
695
  run_id=run_id,
690
- append_request_heartbeat=False,
691
696
  )
692
- messages_to_persist = (initial_messages or []) + [approval_message]
693
- continue_stepping = False
694
- stop_reason = LettaStopReason(stop_reason=StopReasonType.requires_approval.value)
695
- else:
696
- # 2. Execute the tool (or synthesize an error result if disallowed)
697
- tool_rule_violated = tool_call_name not in valid_tool_names and not is_approval
698
- if tool_rule_violated:
699
- tool_execution_result = _build_rule_violation_result(tool_call_name, valid_tool_names, tool_rules_solver)
700
- else:
701
- # Prefill + validate args if a rule provided them
702
- prefill_args = self.tool_rules_solver.last_prefilled_args_by_tool.get(tool_call_name)
703
- if prefill_args:
704
- # Find tool object for schema validation
705
- target_tool = next((t for t in agent_state.tools if t.name == tool_call_name), None)
706
- provenance = self.tool_rules_solver.last_prefilled_args_provenance.get(tool_call_name)
707
- try:
708
- tool_args = merge_and_validate_prefilled_args(
709
- tool=target_tool,
710
- llm_args=tool_args,
711
- prefilled_args=prefill_args,
712
- )
713
- except ValueError as ve:
714
- # Treat invalid prefilled args as user error and end the step
715
- error_prefix = "Invalid prefilled tool arguments from tool rules"
716
- prov_suffix = f" (source={provenance})" if provenance else ""
717
- err_msg = f"{error_prefix}{prov_suffix}: {str(ve)}"
718
- tool_execution_result = ToolExecutionResult(status="error", func_return=err_msg)
719
-
720
- # Create messages and early return persistence path below
721
- continue_stepping, heartbeat_reason, stop_reason = (
722
- False,
723
- None,
724
- LettaStopReason(stop_reason=StopReasonType.invalid_tool_call.value),
725
- )
726
- tool_call_messages = create_letta_messages_from_llm_response(
727
- agent_id=agent_state.id,
728
- model=agent_state.llm_config.model,
729
- function_name=tool_call_name,
730
- function_arguments=tool_args,
731
- tool_execution_result=tool_execution_result,
732
- tool_call_id=tool_call_id,
733
- function_response=tool_execution_result.func_return,
734
- timezone=agent_state.timezone,
735
- continue_stepping=continue_stepping,
736
- heartbeat_reason=None,
737
- reasoning_content=content,
738
- pre_computed_assistant_message_id=pre_computed_assistant_message_id,
739
- step_id=step_id,
740
- run_id=run_id,
741
- is_approval_response=is_approval or is_denial,
742
- force_set_request_heartbeat=False,
743
- add_heartbeat_on_continue=False,
744
- )
745
- messages_to_persist = (initial_messages or []) + tool_call_messages
746
-
747
- # Set run_id on all messages before persisting
748
- for message in messages_to_persist:
749
- if message.run_id is None:
750
- message.run_id = run_id
751
-
752
- persisted_messages = await self.message_manager.create_many_messages_async(
753
- messages_to_persist,
754
- actor=self.actor,
755
- run_id=run_id,
756
- project_id=agent_state.project_id,
757
- template_id=agent_state.template_id,
758
- )
759
- return persisted_messages, continue_stepping, stop_reason
760
-
761
- # Track tool execution time
762
- tool_start_time = get_utc_timestamp_ns()
763
- tool_execution_result = await self._execute_tool(
764
- tool_name=tool_call_name,
765
- tool_args=tool_args,
766
- agent_state=agent_state,
767
- agent_step_span=agent_step_span,
768
- step_id=step_id,
769
- )
770
- tool_end_time = get_utc_timestamp_ns()
697
+ messages_to_persist = (initial_messages or []) + approval_messages
771
698
 
772
- # Store tool execution time in metrics
773
- step_metrics.tool_execution_ns = tool_end_time - tool_start_time
699
+ for message in messages_to_persist:
700
+ if message.run_id is None:
701
+ message.run_id = run_id
774
702
 
775
- log_telemetry(
776
- self.logger,
777
- "_handle_ai_response execute tool finish",
778
- tool_execution_result=tool_execution_result,
779
- tool_call_id=tool_call_id,
703
+ persisted_messages = await self.message_manager.create_many_messages_async(
704
+ messages_to_persist,
705
+ actor=self.actor,
706
+ run_id=run_id,
707
+ project_id=agent_state.project_id,
708
+ template_id=agent_state.template_id,
780
709
  )
710
+ return persisted_messages, False, LettaStopReason(stop_reason=StopReasonType.requires_approval.value)
711
+
712
+ result_tool_returns = []
781
713
 
782
- # 3. Prepare the function-response payload
783
- truncate = tool_call_name not in {"conversation_search", "conversation_search_date", "archival_memory_search"}
784
- return_char_limit = next(
785
- (t.return_char_limit for t in agent_state.tools if t.name == tool_call_name),
786
- None,
714
+ # 3. Handle client side tool execution
715
+ if tool_returns:
716
+ continue_stepping = True
717
+ stop_reason = None
718
+ result_tool_returns = tool_returns
719
+
720
+ # 4. Handle denial cases
721
+ if tool_call_denials:
722
+ for tool_call_denial in tool_call_denials:
723
+ tool_call_id = tool_call_denial.id or f"call_{uuid.uuid4().hex[:8]}"
724
+ packaged_function_response = package_function_response(
725
+ was_success=False,
726
+ response_string=f"Error: request to call tool denied. User reason: {tool_call_denial.reason}",
727
+ timezone=agent_state.timezone,
787
728
  )
788
- function_response_string = validate_function_response(
789
- tool_execution_result.func_return,
790
- return_char_limit=return_char_limit,
791
- truncate=truncate,
729
+ tool_return = ToolReturn(
730
+ tool_call_id=tool_call_id,
731
+ func_response=packaged_function_response,
732
+ status="error",
792
733
  )
793
- self.last_function_response = package_function_response(
794
- was_success=tool_execution_result.success_flag,
795
- response_string=function_response_string,
796
- timezone=agent_state.timezone,
734
+ result_tool_returns.append(tool_return)
735
+
736
+ # 5. Unified tool execution path (works for both single and multiple tools)
737
+
738
+ # 5a. Validate parallel tool calling constraints
739
+ if len(tool_calls) > 1:
740
+ # No parallel tool calls with tool rules
741
+ if agent_state.tool_rules and len([r for r in agent_state.tool_rules if r.type != "requires_approval"]) > 0:
742
+ raise ValueError(
743
+ "Parallel tool calling is not allowed when tool rules are present. Disable tool rules to use parallel tool calls."
797
744
  )
798
745
 
799
- # 4. Decide whether to keep stepping (focal section simplified)
800
- continue_stepping, heartbeat_reason, stop_reason = self._decide_continuation(
746
+ # 5b. Prepare execution specs for all tools
747
+ exec_specs = []
748
+ for tc in tool_calls:
749
+ call_id = tc.id or f"call_{uuid.uuid4().hex[:8]}"
750
+ name = tc.function.name
751
+ args = _safe_load_tool_call_str(tc.function.arguments)
752
+ args.pop(REQUEST_HEARTBEAT_PARAM, None)
753
+ args.pop(INNER_THOUGHTS_KWARG, None)
754
+
755
+ # Validate against allowed tools
756
+ tool_rule_violated = name not in valid_tool_names and not is_approval_response
757
+
758
+ # Handle prefilled args if present
759
+ if not tool_rule_violated:
760
+ prefill_args = tool_rules_solver.last_prefilled_args_by_tool.get(name)
761
+ if prefill_args:
762
+ target_tool = next((t for t in agent_state.tools if t.name == name), None)
763
+ provenance = tool_rules_solver.last_prefilled_args_provenance.get(name)
764
+ try:
765
+ args = merge_and_validate_prefilled_args(
766
+ tool=target_tool,
767
+ llm_args=args,
768
+ prefilled_args=prefill_args,
769
+ )
770
+ except ValueError as ve:
771
+ # Invalid prefilled args - create error result
772
+ error_prefix = "Invalid prefilled tool arguments from tool rules"
773
+ prov_suffix = f" (source={provenance})" if provenance else ""
774
+ err_msg = f"{error_prefix}{prov_suffix}: {str(ve)}"
775
+
776
+ exec_specs.append(
777
+ {
778
+ "id": call_id,
779
+ "name": name,
780
+ "args": args,
781
+ "violated": False,
782
+ "error": err_msg,
783
+ }
784
+ )
785
+ continue
786
+
787
+ exec_specs.append(
788
+ {
789
+ "id": call_id,
790
+ "name": name,
791
+ "args": args,
792
+ "violated": tool_rule_violated,
793
+ "error": None,
794
+ }
795
+ )
796
+
797
+ # 5c. Execute tools (sequentially for single, parallel for multiple)
798
+ async def _run_one(spec: Dict[str, Any]):
799
+ if spec.get("error"):
800
+ return ToolExecutionResult(status="error", func_return=spec["error"]), 0
801
+ if spec["violated"]:
802
+ result = _build_rule_violation_result(spec["name"], valid_tool_names, tool_rules_solver)
803
+ return result, 0
804
+ t0 = get_utc_timestamp_ns()
805
+ target_tool = next((x for x in agent_state.tools if x.name == spec["name"]), None)
806
+ res = await self._execute_tool(
807
+ target_tool=target_tool,
808
+ tool_args=spec["args"],
809
+ agent_state=agent_state,
810
+ agent_step_span=agent_step_span,
811
+ step_id=step_id,
812
+ )
813
+ dt = get_utc_timestamp_ns() - t0
814
+ return res, dt
815
+
816
+ if len(exec_specs) == 1:
817
+ results = [await _run_one(exec_specs[0])]
818
+ else:
819
+ # separate tools by parallel execution capability
820
+ parallel_items = []
821
+ serial_items = []
822
+
823
+ for idx, spec in enumerate(exec_specs):
824
+ target_tool = next((x for x in agent_state.tools if x.name == spec["name"]), None)
825
+ if target_tool and target_tool.enable_parallel_execution:
826
+ parallel_items.append((idx, spec))
827
+ else:
828
+ serial_items.append((idx, spec))
829
+
830
+ # execute all parallel tools concurrently and all serial tools sequentially
831
+ results = [None] * len(exec_specs)
832
+
833
+ parallel_results = await asyncio.gather(*[_run_one(spec) for _, spec in parallel_items]) if parallel_items else []
834
+ for (idx, _), result in zip(parallel_items, parallel_results):
835
+ results[idx] = result
836
+
837
+ for idx, spec in serial_items:
838
+ results[idx] = await _run_one(spec)
839
+
840
+ # 5d. Update metrics with execution time
841
+ if step_metrics is not None and results:
842
+ step_metrics.tool_execution_ns = max(dt for _, dt in results)
843
+
844
+ # 5e. Process results and compute function responses
845
+ function_responses: list[Optional[str]] = []
846
+ persisted_continue_flags: list[bool] = []
847
+ persisted_stop_reasons: list[LettaStopReason | None] = []
848
+
849
+ for idx, spec in enumerate(exec_specs):
850
+ tool_execution_result, _ = results[idx]
851
+ has_prefill_error = bool(spec.get("error"))
852
+
853
+ # Validate and format function response
854
+ truncate = spec["name"] not in {"conversation_search", "conversation_search_date", "archival_memory_search"}
855
+ return_char_limit = next((t.return_char_limit for t in agent_state.tools if t.name == spec["name"]), None)
856
+ function_response_string = validate_function_response(
857
+ tool_execution_result.func_return,
858
+ return_char_limit=return_char_limit,
859
+ truncate=truncate,
860
+ )
861
+ function_responses.append(function_response_string)
862
+
863
+ # Update last function response (for tool rules)
864
+ self.last_function_response = package_function_response(
865
+ was_success=tool_execution_result.success_flag,
866
+ response_string=function_response_string,
867
+ timezone=agent_state.timezone,
868
+ )
869
+
870
+ # Register successful tool call with solver
871
+ if not spec["violated"] and not has_prefill_error:
872
+ tool_rules_solver.register_tool_call(spec["name"])
873
+
874
+ # Decide continuation for this tool
875
+ if has_prefill_error:
876
+ cont = False
877
+ hb_reason = None
878
+ sr = LettaStopReason(stop_reason=StopReasonType.invalid_tool_call.value)
879
+ else:
880
+ cont, hb_reason, sr = self._decide_continuation(
801
881
  agent_state=agent_state,
802
- tool_call_name=tool_call_name,
803
- tool_rule_violated=tool_rule_violated,
882
+ tool_call_name=spec["name"],
883
+ tool_rule_violated=spec["violated"],
804
884
  tool_rules_solver=tool_rules_solver,
805
- is_final_step=is_final_step,
885
+ is_final_step=(is_final_step and idx == len(exec_specs) - 1),
806
886
  )
887
+ persisted_continue_flags.append(cont)
888
+ persisted_stop_reasons.append(sr)
889
+
890
+ # 5f. Create messages using parallel message creation (works for both single and multi)
891
+ tool_call_specs = [{"name": s["name"], "arguments": s["args"], "id": s["id"]} for s in exec_specs]
892
+ tool_execution_results = [res for (res, _) in results]
893
+
894
+ # Use the parallel message creation function for both single and multiple tools
895
+ parallel_messages = create_parallel_tool_messages_from_llm_response(
896
+ agent_id=agent_state.id,
897
+ model=agent_state.llm_config.model,
898
+ tool_call_specs=tool_call_specs,
899
+ tool_execution_results=tool_execution_results,
900
+ function_responses=function_responses,
901
+ timezone=agent_state.timezone,
902
+ run_id=run_id,
903
+ step_id=step_id,
904
+ reasoning_content=content,
905
+ pre_computed_assistant_message_id=pre_computed_assistant_message_id,
906
+ is_approval_response=is_approval_response,
907
+ tool_returns=result_tool_returns,
908
+ )
807
909
 
808
- # 5. Create messages (step was already created at the beginning)
809
- tool_call_messages = create_letta_messages_from_llm_response(
810
- agent_id=agent_state.id,
811
- model=agent_state.llm_config.model,
812
- function_name=tool_call_name,
813
- function_arguments=tool_args,
814
- tool_execution_result=tool_execution_result,
815
- tool_call_id=tool_call_id,
816
- function_response=function_response_string,
817
- timezone=agent_state.timezone,
818
- continue_stepping=continue_stepping,
819
- # heartbeat_reason=heartbeat_reason,
820
- heartbeat_reason=None,
821
- # reasoning_content=reasoning_content,
822
- reasoning_content=content,
823
- pre_computed_assistant_message_id=pre_computed_assistant_message_id,
824
- step_id=step_id,
825
- run_id=run_id,
826
- is_approval_response=is_approval or is_denial,
827
- force_set_request_heartbeat=False,
828
- add_heartbeat_on_continue=False,
829
- )
830
- messages_to_persist = (initial_messages or []) + tool_call_messages
910
+ messages_to_persist: list[Message] = (initial_messages or []) + parallel_messages
831
911
 
832
912
  # Set run_id on all messages before persisting
833
913
  for message in messages_to_persist:
834
914
  if message.run_id is None:
835
915
  message.run_id = run_id
836
916
 
917
+ # Persist all messages
837
918
  persisted_messages = await self.message_manager.create_many_messages_async(
838
- messages_to_persist, actor=self.actor, run_id=run_id, project_id=agent_state.project_id, template_id=agent_state.template_id
919
+ messages_to_persist,
920
+ actor=self.actor,
921
+ run_id=run_id,
922
+ project_id=agent_state.project_id,
923
+ template_id=agent_state.template_id,
839
924
  )
840
925
 
841
- return persisted_messages, continue_stepping, stop_reason
926
+ # 5g. Aggregate continuation decisions
927
+ # For multiple tools: continue if ANY says continue, use last non-None stop_reason
928
+ # For single tool: use its decision directly
929
+ aggregate_continue = any(persisted_continue_flags) if persisted_continue_flags else False
930
+ aggregate_continue = aggregate_continue or tool_call_denials or tool_returns # continue if any tool call was denied or returned
931
+ aggregate_stop_reason = None
932
+ for sr in persisted_stop_reasons:
933
+ if sr is not None:
934
+ aggregate_stop_reason = sr
935
+
936
+ return persisted_messages, aggregate_continue, aggregate_stop_reason
842
937
 
843
938
  @trace_method
844
939
  def _decide_continuation(