letta-nightly 0.11.7.dev20251007104119__py3-none-any.whl → 0.12.0.dev20251009104148__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (151) hide show
  1. letta/__init__.py +1 -1
  2. letta/adapters/letta_llm_adapter.py +1 -0
  3. letta/adapters/letta_llm_request_adapter.py +0 -1
  4. letta/adapters/letta_llm_stream_adapter.py +7 -2
  5. letta/adapters/simple_llm_request_adapter.py +88 -0
  6. letta/adapters/simple_llm_stream_adapter.py +192 -0
  7. letta/agents/agent_loop.py +6 -0
  8. letta/agents/ephemeral_summary_agent.py +2 -1
  9. letta/agents/helpers.py +142 -6
  10. letta/agents/letta_agent.py +13 -33
  11. letta/agents/letta_agent_batch.py +2 -4
  12. letta/agents/letta_agent_v2.py +87 -77
  13. letta/agents/letta_agent_v3.py +927 -0
  14. letta/agents/voice_agent.py +2 -6
  15. letta/constants.py +8 -4
  16. letta/database_utils.py +161 -0
  17. letta/errors.py +40 -0
  18. letta/functions/function_sets/base.py +84 -4
  19. letta/functions/function_sets/multi_agent.py +0 -3
  20. letta/functions/schema_generator.py +113 -71
  21. letta/groups/dynamic_multi_agent.py +3 -2
  22. letta/groups/helpers.py +1 -2
  23. letta/groups/round_robin_multi_agent.py +3 -2
  24. letta/groups/sleeptime_multi_agent.py +3 -2
  25. letta/groups/sleeptime_multi_agent_v2.py +1 -1
  26. letta/groups/sleeptime_multi_agent_v3.py +17 -17
  27. letta/groups/supervisor_multi_agent.py +84 -80
  28. letta/helpers/converters.py +3 -0
  29. letta/helpers/message_helper.py +4 -0
  30. letta/helpers/tool_rule_solver.py +92 -5
  31. letta/interfaces/anthropic_streaming_interface.py +409 -0
  32. letta/interfaces/gemini_streaming_interface.py +296 -0
  33. letta/interfaces/openai_streaming_interface.py +752 -1
  34. letta/llm_api/anthropic_client.py +127 -16
  35. letta/llm_api/bedrock_client.py +4 -2
  36. letta/llm_api/deepseek_client.py +4 -1
  37. letta/llm_api/google_vertex_client.py +124 -42
  38. letta/llm_api/groq_client.py +4 -1
  39. letta/llm_api/llm_api_tools.py +11 -4
  40. letta/llm_api/llm_client_base.py +6 -2
  41. letta/llm_api/openai.py +32 -2
  42. letta/llm_api/openai_client.py +423 -18
  43. letta/llm_api/xai_client.py +4 -1
  44. letta/main.py +9 -5
  45. letta/memory.py +1 -0
  46. letta/orm/__init__.py +2 -1
  47. letta/orm/agent.py +10 -0
  48. letta/orm/block.py +7 -16
  49. letta/orm/blocks_agents.py +8 -2
  50. letta/orm/files_agents.py +2 -0
  51. letta/orm/job.py +7 -5
  52. letta/orm/mcp_oauth.py +1 -0
  53. letta/orm/message.py +21 -6
  54. letta/orm/organization.py +2 -0
  55. letta/orm/provider.py +6 -2
  56. letta/orm/run.py +71 -0
  57. letta/orm/run_metrics.py +82 -0
  58. letta/orm/sandbox_config.py +7 -1
  59. letta/orm/sqlalchemy_base.py +0 -306
  60. letta/orm/step.py +6 -5
  61. letta/orm/step_metrics.py +5 -5
  62. letta/otel/tracing.py +28 -3
  63. letta/plugins/defaults.py +4 -4
  64. letta/prompts/system_prompts/__init__.py +2 -0
  65. letta/prompts/system_prompts/letta_v1.py +25 -0
  66. letta/schemas/agent.py +3 -2
  67. letta/schemas/agent_file.py +9 -3
  68. letta/schemas/block.py +23 -10
  69. letta/schemas/enums.py +21 -2
  70. letta/schemas/job.py +17 -4
  71. letta/schemas/letta_message_content.py +71 -2
  72. letta/schemas/letta_stop_reason.py +5 -5
  73. letta/schemas/llm_config.py +53 -3
  74. letta/schemas/memory.py +1 -1
  75. letta/schemas/message.py +564 -117
  76. letta/schemas/openai/responses_request.py +64 -0
  77. letta/schemas/providers/__init__.py +2 -0
  78. letta/schemas/providers/anthropic.py +16 -0
  79. letta/schemas/providers/ollama.py +115 -33
  80. letta/schemas/providers/openrouter.py +52 -0
  81. letta/schemas/providers/vllm.py +2 -1
  82. letta/schemas/run.py +48 -42
  83. letta/schemas/run_metrics.py +21 -0
  84. letta/schemas/step.py +2 -2
  85. letta/schemas/step_metrics.py +1 -1
  86. letta/schemas/tool.py +15 -107
  87. letta/schemas/tool_rule.py +88 -5
  88. letta/serialize_schemas/marshmallow_agent.py +1 -0
  89. letta/server/db.py +79 -408
  90. letta/server/rest_api/app.py +61 -10
  91. letta/server/rest_api/dependencies.py +14 -0
  92. letta/server/rest_api/redis_stream_manager.py +19 -8
  93. letta/server/rest_api/routers/v1/agents.py +364 -292
  94. letta/server/rest_api/routers/v1/blocks.py +14 -20
  95. letta/server/rest_api/routers/v1/identities.py +45 -110
  96. letta/server/rest_api/routers/v1/internal_templates.py +21 -0
  97. letta/server/rest_api/routers/v1/jobs.py +23 -6
  98. letta/server/rest_api/routers/v1/messages.py +1 -1
  99. letta/server/rest_api/routers/v1/runs.py +149 -99
  100. letta/server/rest_api/routers/v1/sandbox_configs.py +10 -19
  101. letta/server/rest_api/routers/v1/tools.py +281 -594
  102. letta/server/rest_api/routers/v1/voice.py +1 -1
  103. letta/server/rest_api/streaming_response.py +29 -29
  104. letta/server/rest_api/utils.py +122 -64
  105. letta/server/server.py +160 -887
  106. letta/services/agent_manager.py +236 -919
  107. letta/services/agent_serialization_manager.py +16 -0
  108. letta/services/archive_manager.py +0 -100
  109. letta/services/block_manager.py +211 -168
  110. letta/services/context_window_calculator/token_counter.py +1 -1
  111. letta/services/file_manager.py +1 -1
  112. letta/services/files_agents_manager.py +24 -33
  113. letta/services/group_manager.py +0 -142
  114. letta/services/helpers/agent_manager_helper.py +7 -2
  115. letta/services/helpers/run_manager_helper.py +69 -0
  116. letta/services/job_manager.py +96 -411
  117. letta/services/lettuce/__init__.py +6 -0
  118. letta/services/lettuce/lettuce_client_base.py +86 -0
  119. letta/services/mcp_manager.py +38 -6
  120. letta/services/message_manager.py +165 -362
  121. letta/services/organization_manager.py +0 -36
  122. letta/services/passage_manager.py +0 -345
  123. letta/services/provider_manager.py +0 -80
  124. letta/services/run_manager.py +364 -0
  125. letta/services/sandbox_config_manager.py +0 -234
  126. letta/services/step_manager.py +62 -39
  127. letta/services/summarizer/summarizer.py +9 -7
  128. letta/services/telemetry_manager.py +0 -16
  129. letta/services/tool_executor/builtin_tool_executor.py +35 -0
  130. letta/services/tool_executor/core_tool_executor.py +397 -2
  131. letta/services/tool_executor/files_tool_executor.py +3 -3
  132. letta/services/tool_executor/multi_agent_tool_executor.py +30 -15
  133. letta/services/tool_executor/tool_execution_manager.py +6 -8
  134. letta/services/tool_executor/tool_executor_base.py +3 -3
  135. letta/services/tool_manager.py +85 -339
  136. letta/services/tool_sandbox/base.py +24 -13
  137. letta/services/tool_sandbox/e2b_sandbox.py +16 -1
  138. letta/services/tool_schema_generator.py +123 -0
  139. letta/services/user_manager.py +0 -99
  140. letta/settings.py +20 -4
  141. letta/system.py +5 -1
  142. {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.12.0.dev20251009104148.dist-info}/METADATA +3 -5
  143. {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.12.0.dev20251009104148.dist-info}/RECORD +146 -135
  144. letta/agents/temporal/activities/__init__.py +0 -4
  145. letta/agents/temporal/activities/example_activity.py +0 -7
  146. letta/agents/temporal/activities/prepare_messages.py +0 -10
  147. letta/agents/temporal/temporal_agent_workflow.py +0 -56
  148. letta/agents/temporal/types.py +0 -25
  149. {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.12.0.dev20251009104148.dist-info}/WHEEL +0 -0
  150. {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.12.0.dev20251009104148.dist-info}/entry_points.txt +0 -0
  151. {letta_nightly-0.11.7.dev20251007104119.dist-info → letta_nightly-0.12.0.dev20251009104148.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,927 @@
1
+ import uuid
2
+ from typing import AsyncGenerator, Optional
3
+
4
+ from opentelemetry.trace import Span
5
+
6
+ from letta.adapters.letta_llm_adapter import LettaLLMAdapter
7
+ from letta.adapters.simple_llm_request_adapter import SimpleLLMRequestAdapter
8
+ from letta.adapters.simple_llm_stream_adapter import SimpleLLMStreamAdapter
9
+ from letta.agents.helpers import (
10
+ _build_rule_violation_result,
11
+ _load_last_function_response,
12
+ _maybe_get_approval_messages,
13
+ _prepare_in_context_messages_no_persist_async,
14
+ _safe_load_tool_call_str,
15
+ generate_step_id,
16
+ merge_and_validate_prefilled_args,
17
+ )
18
+ from letta.agents.letta_agent_v2 import LettaAgentV2
19
+ from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX, REQUEST_HEARTBEAT_PARAM
20
+ from letta.errors import ContextWindowExceededError, LLMError
21
+ from letta.helpers import ToolRulesSolver
22
+ from letta.helpers.datetime_helpers import get_utc_timestamp_ns
23
+ from letta.helpers.tool_execution_helper import enable_strict_mode
24
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG
25
+ from letta.otel.tracing import trace_method
26
+ from letta.schemas.agent import AgentState
27
+ from letta.schemas.letta_message import LettaMessage, MessageType
28
+ from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
29
+ from letta.schemas.letta_response import LettaResponse
30
+ from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
31
+ from letta.schemas.message import Message, MessageCreate
32
+ from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
33
+ from letta.schemas.step import StepProgression
34
+ from letta.schemas.step_metrics import StepMetrics
35
+ from letta.schemas.tool_execution_result import ToolExecutionResult
36
+ from letta.server.rest_api.utils import create_approval_request_message_from_llm_response, create_letta_messages_from_llm_response
37
+ from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema
38
+ from letta.settings import settings, summarizer_settings
39
+ from letta.system import package_function_response
40
+ from letta.utils import log_telemetry, validate_function_response
41
+
42
+
43
+ class LettaAgentV3(LettaAgentV2):
44
+ """
45
+ Similar to V2, but stripped down / simplified, while also generalized:
46
+ * Supports non-tool returns
47
+ * No inner thoughts in kwargs
48
+ * No heartbeats (loops happen on tool calls)
49
+
50
+ TODOs:
51
+ * Support tool rules
52
+ * Support Gemini / OpenAI client
53
+ """
54
+
55
+ def _initialize_state(self):
56
+ super()._initialize_state()
57
+ self._require_tool_call = False
58
+
59
+ @trace_method
60
+ async def step(
61
+ self,
62
+ input_messages: list[MessageCreate],
63
+ max_steps: int = DEFAULT_MAX_STEPS,
64
+ run_id: str | None = None,
65
+ use_assistant_message: bool = True, # NOTE: not used
66
+ include_return_message_types: list[MessageType] | None = None,
67
+ request_start_timestamp_ns: int | None = None,
68
+ ) -> LettaResponse:
69
+ """
70
+ Execute the agent loop in blocking mode, returning all messages at once.
71
+
72
+ Args:
73
+ input_messages: List of new messages to process
74
+ max_steps: Maximum number of agent steps to execute
75
+ run_id: Optional job/run ID for tracking
76
+ use_assistant_message: Whether to use assistant message format
77
+ include_return_message_types: Filter for which message types to return
78
+ request_start_timestamp_ns: Start time for tracking request duration
79
+
80
+ Returns:
81
+ LettaResponse: Complete response with all messages and metadata
82
+ """
83
+ self._initialize_state()
84
+ request_span = self._request_checkpoint_start(request_start_timestamp_ns=request_start_timestamp_ns)
85
+
86
+ in_context_messages, input_messages_to_persist = await _prepare_in_context_messages_no_persist_async(
87
+ input_messages, self.agent_state, self.message_manager, self.actor, run_id
88
+ )
89
+ in_context_messages = in_context_messages + input_messages_to_persist
90
+ response_letta_messages = []
91
+ for i in range(max_steps):
92
+ response = self._step(
93
+ messages=in_context_messages + self.response_messages,
94
+ input_messages_to_persist=input_messages_to_persist,
95
+ # TODO need to support non-streaming adapter too
96
+ llm_adapter=SimpleLLMRequestAdapter(llm_client=self.llm_client, llm_config=self.agent_state.llm_config),
97
+ run_id=run_id,
98
+ # use_assistant_message=use_assistant_message,
99
+ include_return_message_types=include_return_message_types,
100
+ request_start_timestamp_ns=request_start_timestamp_ns,
101
+ )
102
+
103
+ async for chunk in response:
104
+ response_letta_messages.append(chunk)
105
+
106
+ if not self.should_continue:
107
+ break
108
+
109
+ input_messages_to_persist = []
110
+
111
+ # Rebuild context window after stepping
112
+ if not self.agent_state.message_buffer_autoclear:
113
+ await self.summarize_conversation_history(
114
+ in_context_messages=in_context_messages,
115
+ new_letta_messages=self.response_messages,
116
+ total_tokens=self.usage.total_tokens,
117
+ force=False,
118
+ )
119
+
120
+ if self.stop_reason is None:
121
+ self.stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
122
+
123
+ result = LettaResponse(messages=response_letta_messages, stop_reason=self.stop_reason, usage=self.usage)
124
+ if run_id:
125
+ if self.job_update_metadata is None:
126
+ self.job_update_metadata = {}
127
+ self.job_update_metadata["result"] = result.model_dump(mode="json")
128
+
129
+ await self._request_checkpoint_finish(
130
+ request_span=request_span, request_start_timestamp_ns=request_start_timestamp_ns, run_id=run_id
131
+ )
132
+ return result
133
+
134
+ @trace_method
135
+ async def stream(
136
+ self,
137
+ input_messages: list[MessageCreate],
138
+ max_steps: int = DEFAULT_MAX_STEPS,
139
+ stream_tokens: bool = False,
140
+ run_id: str | None = None,
141
+ use_assistant_message: bool = True, # NOTE: not used
142
+ include_return_message_types: list[MessageType] | None = None,
143
+ request_start_timestamp_ns: int | None = None,
144
+ ) -> AsyncGenerator[str, None]:
145
+ """
146
+ Execute the agent loop in streaming mode, yielding chunks as they become available.
147
+ If stream_tokens is True, individual tokens are streamed as they arrive from the LLM,
148
+ providing the lowest latency experience, otherwise each complete step (reasoning +
149
+ tool call + tool return) is yielded as it completes.
150
+
151
+ Args:
152
+ input_messages: List of new messages to process
153
+ max_steps: Maximum number of agent steps to execute
154
+ stream_tokens: Whether to stream back individual tokens. Not all llm
155
+ providers offer native token streaming functionality; in these cases,
156
+ this api streams back steps rather than individual tokens.
157
+ run_id: Optional job/run ID for tracking
158
+ use_assistant_message: Whether to use assistant message format
159
+ include_return_message_types: Filter for which message types to return
160
+ request_start_timestamp_ns: Start time for tracking request duration
161
+
162
+ Yields:
163
+ str: JSON-formatted SSE data chunks for each completed step
164
+ """
165
+ self._initialize_state()
166
+ request_span = self._request_checkpoint_start(request_start_timestamp_ns=request_start_timestamp_ns)
167
+ first_chunk = True
168
+
169
+ if stream_tokens:
170
+ llm_adapter = SimpleLLMStreamAdapter(
171
+ llm_client=self.llm_client,
172
+ llm_config=self.agent_state.llm_config,
173
+ run_id=run_id,
174
+ )
175
+ else:
176
+ llm_adapter = SimpleLLMRequestAdapter(
177
+ llm_client=self.llm_client,
178
+ llm_config=self.agent_state.llm_config,
179
+ )
180
+
181
+ try:
182
+ in_context_messages, input_messages_to_persist = await _prepare_in_context_messages_no_persist_async(
183
+ input_messages, self.agent_state, self.message_manager, self.actor, run_id
184
+ )
185
+ in_context_messages = in_context_messages + input_messages_to_persist
186
+ for i in range(max_steps):
187
+ response = self._step(
188
+ messages=in_context_messages + self.response_messages,
189
+ input_messages_to_persist=input_messages_to_persist,
190
+ llm_adapter=llm_adapter,
191
+ run_id=run_id,
192
+ # use_assistant_message=use_assistant_message,
193
+ include_return_message_types=include_return_message_types,
194
+ request_start_timestamp_ns=request_start_timestamp_ns,
195
+ )
196
+ async for chunk in response:
197
+ if first_chunk:
198
+ request_span = self._request_checkpoint_ttft(request_span, request_start_timestamp_ns)
199
+ yield f"data: {chunk.model_dump_json()}\n\n"
200
+ first_chunk = False
201
+
202
+ if not self.should_continue:
203
+ break
204
+
205
+ input_messages_to_persist = []
206
+
207
+ if not self.agent_state.message_buffer_autoclear:
208
+ await self.summarize_conversation_history(
209
+ in_context_messages=in_context_messages,
210
+ new_letta_messages=self.response_messages,
211
+ total_tokens=self.usage.total_tokens,
212
+ force=False,
213
+ )
214
+
215
+ except:
216
+ if self.stop_reason and not first_chunk:
217
+ yield f"data: {self.stop_reason.model_dump_json()}\n\n"
218
+ raise
219
+
220
+ if run_id:
221
+ letta_messages = Message.to_letta_messages_from_list(
222
+ self.response_messages,
223
+ use_assistant_message=False, # NOTE: set to false
224
+ reverse=False,
225
+ # text_is_assistant_message=(self.agent_state.agent_type == AgentType.react_agent),
226
+ text_is_assistant_message=True,
227
+ )
228
+ result = LettaResponse(messages=letta_messages, stop_reason=self.stop_reason, usage=self.usage)
229
+ if self.job_update_metadata is None:
230
+ self.job_update_metadata = {}
231
+ self.job_update_metadata["result"] = result.model_dump(mode="json")
232
+
233
+ await self._request_checkpoint_finish(
234
+ request_span=request_span, request_start_timestamp_ns=request_start_timestamp_ns, run_id=run_id
235
+ )
236
+ for finish_chunk in self.get_finish_chunks_for_stream(self.usage, self.stop_reason):
237
+ yield f"data: {finish_chunk}\n\n"
238
+
239
+ @trace_method
240
+ async def _step(
241
+ self,
242
+ messages: list[Message],
243
+ llm_adapter: LettaLLMAdapter,
244
+ input_messages_to_persist: list[Message] | None = None,
245
+ run_id: str | None = None,
246
+ # use_assistant_message: bool = True,
247
+ include_return_message_types: list[MessageType] | None = None,
248
+ request_start_timestamp_ns: int | None = None,
249
+ remaining_turns: int = -1,
250
+ dry_run: bool = False,
251
+ ) -> AsyncGenerator[LettaMessage | dict, None]:
252
+ """
253
+ Execute a single agent step (one LLM call and tool execution).
254
+
255
+ This is the core execution method that all public methods (step, stream_steps,
256
+ stream_tokens) funnel through. It handles the complete flow of making an LLM
257
+ request, processing the response, executing tools, and persisting messages.
258
+
259
+ Args:
260
+ messages: Current in-context messages
261
+ llm_adapter: Adapter for LLM interaction (blocking or streaming)
262
+ input_messages_to_persist: New messages to persist after execution
263
+ run_id: Optional job/run ID for tracking
264
+ include_return_message_types: Filter for which message types to yield
265
+ request_start_timestamp_ns: Start time for tracking request duration
266
+ remaining_turns: Number of turns remaining (for max_steps enforcement)
267
+ dry_run: If true, only build and return the request without executing
268
+
269
+ Yields:
270
+ LettaMessage or dict: Chunks for streaming mode, or request data for dry_run
271
+ """
272
+ step_progression = StepProgression.START
273
+ # TODO(@caren): clean this up
274
+ tool_call, content, agent_step_span, first_chunk, step_id, logged_step, step_start_ns, step_metrics = (
275
+ None,
276
+ None,
277
+ None,
278
+ None,
279
+ None,
280
+ None,
281
+ None,
282
+ None,
283
+ )
284
+ try:
285
+ self.last_function_response = _load_last_function_response(messages)
286
+ valid_tools = await self._get_valid_tools()
287
+ require_tool_call = self.tool_rules_solver.should_force_tool_call()
288
+
289
+ if self._require_tool_call != require_tool_call:
290
+ if require_tool_call:
291
+ self.logger.info("switching to constrained mode (forcing tool call)")
292
+ else:
293
+ self.logger.info("switching to unconstrained mode (allowing non-tool responses)")
294
+ self._require_tool_call = require_tool_call
295
+
296
+ approval_request, approval_response = _maybe_get_approval_messages(messages)
297
+ if approval_request and approval_response:
298
+ tool_call = approval_request.tool_calls[0]
299
+ content = approval_request.content
300
+ step_id = approval_request.step_id
301
+ step_metrics = await self.step_manager.get_step_metrics_async(step_id=step_id, actor=self.actor)
302
+ else:
303
+ # Check for job cancellation at the start of each step
304
+ if run_id and await self._check_run_cancellation(run_id):
305
+ self.stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
306
+ self.logger.info(f"Agent execution cancelled for run {run_id}")
307
+ return
308
+
309
+ step_id = generate_step_id()
310
+ step_progression, logged_step, step_metrics, agent_step_span = await self._step_checkpoint_start(
311
+ step_id=step_id, run_id=run_id
312
+ )
313
+
314
+ messages = await self._refresh_messages(messages)
315
+ force_tool_call = valid_tools[0]["name"] if len(valid_tools) == 1 and self._require_tool_call else None
316
+ for llm_request_attempt in range(summarizer_settings.max_summarizer_retries + 1):
317
+ try:
318
+ request_data = self.llm_client.build_request_data(
319
+ agent_type=self.agent_state.agent_type,
320
+ messages=messages,
321
+ llm_config=self.agent_state.llm_config,
322
+ tools=valid_tools,
323
+ force_tool_call=force_tool_call,
324
+ requires_subsequent_tool_call=self._require_tool_call,
325
+ )
326
+ if dry_run:
327
+ yield request_data
328
+ return
329
+
330
+ step_progression, step_metrics = self._step_checkpoint_llm_request_start(step_metrics, agent_step_span)
331
+
332
+ invocation = llm_adapter.invoke_llm(
333
+ request_data=request_data,
334
+ messages=messages,
335
+ tools=valid_tools,
336
+ use_assistant_message=False, # NOTE: set to false
337
+ requires_approval_tools=self.tool_rules_solver.get_requires_approval_tools(
338
+ set([t["name"] for t in valid_tools])
339
+ ),
340
+ step_id=step_id,
341
+ actor=self.actor,
342
+ )
343
+ async for chunk in invocation:
344
+ if llm_adapter.supports_token_streaming():
345
+ if include_return_message_types is None or chunk.message_type in include_return_message_types:
346
+ first_chunk = True
347
+ yield chunk
348
+ # If you've reached this point without an error, break out of retry loop
349
+ break
350
+ except ValueError as e:
351
+ self.stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response.value)
352
+ raise e
353
+ except LLMError as e:
354
+ self.stop_reason = LettaStopReason(stop_reason=StopReasonType.llm_api_error.value)
355
+ raise e
356
+ except Exception as e:
357
+ if isinstance(e, ContextWindowExceededError) and llm_request_attempt < summarizer_settings.max_summarizer_retries:
358
+ # Retry case
359
+ messages = await self.summarize_conversation_history(
360
+ in_context_messages=messages,
361
+ new_letta_messages=self.response_messages,
362
+ llm_config=self.agent_state.llm_config,
363
+ force=True,
364
+ )
365
+ else:
366
+ raise e
367
+
368
+ step_progression, step_metrics = self._step_checkpoint_llm_request_finish(
369
+ step_metrics, agent_step_span, llm_adapter.llm_request_finish_timestamp_ns
370
+ )
371
+
372
+ self._update_global_usage_stats(llm_adapter.usage)
373
+
374
+ # Handle the AI response with the extracted data
375
+ # NOTE: in v3 loop, no tool call is OK
376
+ # if tool_call is None and llm_adapter.tool_call is None:
377
+
378
+ persisted_messages, self.should_continue, self.stop_reason = await self._handle_ai_response(
379
+ tool_call=tool_call or llm_adapter.tool_call,
380
+ valid_tool_names=[tool["name"] for tool in valid_tools],
381
+ agent_state=self.agent_state,
382
+ tool_rules_solver=self.tool_rules_solver,
383
+ usage=UsageStatistics(
384
+ completion_tokens=self.usage.completion_tokens,
385
+ prompt_tokens=self.usage.prompt_tokens,
386
+ total_tokens=self.usage.total_tokens,
387
+ ),
388
+ # reasoning_content=reasoning_content or llm_adapter.reasoning_content,
389
+ content=content or llm_adapter.content,
390
+ pre_computed_assistant_message_id=llm_adapter.message_id,
391
+ step_id=step_id,
392
+ initial_messages=input_messages_to_persist,
393
+ agent_step_span=agent_step_span,
394
+ is_final_step=(remaining_turns == 0),
395
+ run_id=run_id,
396
+ step_metrics=step_metrics,
397
+ is_approval=approval_response.approve if approval_response is not None else False,
398
+ is_denial=(approval_response.approve == False) if approval_response is not None else False,
399
+ denial_reason=approval_response.denial_reason if approval_response is not None else None,
400
+ )
401
+ # NOTE: there is an edge case where persisted_messages is empty (the LLM did a "no-op")
402
+
403
+ new_message_idx = len(input_messages_to_persist) if input_messages_to_persist else 0
404
+ self.response_messages.extend(persisted_messages[new_message_idx:])
405
+
406
+ if llm_adapter.supports_token_streaming():
407
+ # Stream the tool return if a tool was actually executed.
408
+ # In the normal streaming path, the tool call is surfaced via the streaming interface
409
+ # (llm_adapter.tool_call), so don't rely solely on the local `tool_call` variable.
410
+ has_tool_return = any(m.role == "tool" for m in persisted_messages)
411
+ if len(persisted_messages) > 0 and persisted_messages[-1].role != "approval" and has_tool_return:
412
+ tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
413
+ if include_return_message_types is None or tool_return.message_type in include_return_message_types:
414
+ yield tool_return
415
+ else:
416
+ filter_user_messages = [m for m in persisted_messages[new_message_idx:] if m.role != "user"]
417
+ letta_messages = Message.to_letta_messages_from_list(
418
+ filter_user_messages,
419
+ use_assistant_message=False, # NOTE: set to false
420
+ reverse=False,
421
+ # text_is_assistant_message=(self.agent_state.agent_type == AgentType.react_agent),
422
+ text_is_assistant_message=True,
423
+ )
424
+ for message in letta_messages:
425
+ if include_return_message_types is None or message.message_type in include_return_message_types:
426
+ yield message
427
+
428
+ # Persist approval responses immediately to prevent agent from getting into a bad state
429
+ if (
430
+ len(input_messages_to_persist) == 1
431
+ and input_messages_to_persist[0].role == "approval"
432
+ and persisted_messages[0].role == "approval"
433
+ and persisted_messages[1].role == "tool"
434
+ ):
435
+ self.agent_state.message_ids = self.agent_state.message_ids + [m.id for m in persisted_messages[:2]]
436
+ await self.agent_manager.update_message_ids_async(
437
+ agent_id=self.agent_state.id, message_ids=self.agent_state.message_ids, actor=self.actor
438
+ )
439
+ # TODO should we be logging this even if persisted_messages is empty? Technically, there still was an LLM call
440
+ step_progression, step_metrics = await self._step_checkpoint_finish(step_metrics, agent_step_span, logged_step)
441
+ except Exception as e:
442
+ import traceback
443
+
444
+ self.logger.error(f"Error during step processing: {e}")
445
+ self.logger.error(f"Error traceback: {traceback.format_exc()}")
446
+ # self.logger.error(f"Error during step processing: {e}")
447
+ self.job_update_metadata = {"error": str(e)}
448
+
449
+ # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
450
+ if not self.stop_reason:
451
+ self.stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
452
+ elif self.stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
453
+ self.logger.error("Error occurred during step processing, with valid stop reason: %s", self.stop_reason.stop_reason)
454
+ elif self.stop_reason.stop_reason not in (
455
+ StopReasonType.no_tool_call,
456
+ StopReasonType.invalid_tool_call,
457
+ StopReasonType.invalid_llm_response,
458
+ StopReasonType.llm_api_error,
459
+ ):
460
+ self.logger.error("Error occurred during step processing, with unexpected stop reason: %s", self.stop_reason.stop_reason)
461
+ raise e
462
+ finally:
463
+ self.logger.debug("Running cleanup for agent loop run: %s", run_id)
464
+ self.logger.info("Running final update. Step Progression: %s", step_progression)
465
+ try:
466
+ if step_progression == StepProgression.FINISHED:
467
+ if not self.should_continue:
468
+ if self.stop_reason is None:
469
+ self.stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
470
+ if logged_step and step_id:
471
+ await self.step_manager.update_step_stop_reason(self.actor, step_id, self.stop_reason.stop_reason)
472
+ return
473
+ if step_progression < StepProgression.STEP_LOGGED:
474
+ # Error occurred before step was fully logged
475
+ import traceback
476
+
477
+ if logged_step:
478
+ await self.step_manager.update_step_error_async(
479
+ actor=self.actor,
480
+ step_id=step_id, # Use original step_id for telemetry
481
+ error_type=type(e).__name__ if "e" in locals() else "Unknown",
482
+ error_message=str(e) if "e" in locals() else "Unknown error",
483
+ error_traceback=traceback.format_exc(),
484
+ stop_reason=self.stop_reason,
485
+ )
486
+ if step_progression <= StepProgression.STREAM_RECEIVED:
487
+ if first_chunk and settings.track_errored_messages and input_messages_to_persist:
488
+ for message in input_messages_to_persist:
489
+ message.is_err = True
490
+ message.step_id = step_id
491
+ message.run_id = run_id
492
+ await self.message_manager.create_many_messages_async(
493
+ input_messages_to_persist,
494
+ actor=self.actor,
495
+ run_id=run_id,
496
+ project_id=self.agent_state.project_id,
497
+ template_id=self.agent_state.template_id,
498
+ )
499
+ elif step_progression <= StepProgression.LOGGED_TRACE:
500
+ if self.stop_reason is None:
501
+ self.logger.error("Error in step after logging step")
502
+ self.stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
503
+ if logged_step:
504
+ await self.step_manager.update_step_stop_reason(self.actor, step_id, self.stop_reason.stop_reason)
505
+ else:
506
+ self.logger.error("Invalid StepProgression value")
507
+
508
+ # Do tracking for failure cases. Can consolidate with success conditions later.
509
+ if settings.track_stop_reason:
510
+ await self._log_request(request_start_timestamp_ns, None, self.job_update_metadata, is_error=True, run_id=run_id)
511
+
512
+ # Record partial step metrics on failure (capture whatever timing data we have)
513
+ if logged_step and step_metrics and step_progression < StepProgression.FINISHED:
514
+ # Calculate total step time up to the failure point
515
+ step_metrics.step_ns = get_utc_timestamp_ns() - step_metrics.step_start_ns
516
+
517
+ await self._record_step_metrics(
518
+ step_id=step_id,
519
+ step_metrics=step_metrics,
520
+ run_id=run_id,
521
+ )
522
+ except Exception as e:
523
+ self.logger.error(f"Error during post-completion step tracking: {e}")
524
+
525
+ @trace_method
526
+ async def _handle_ai_response(
527
+ self,
528
+ tool_call: Optional[ToolCall], # NOTE: should only be None for react agents
529
+ valid_tool_names: list[str],
530
+ agent_state: AgentState,
531
+ tool_rules_solver: ToolRulesSolver,
532
+ usage: UsageStatistics,
533
+ # reasoning_content: list[TextContent | ReasoningContent | RedactedReasoningContent | OmittedReasoningContent] | None = None,
534
+ content: list[TextContent | ReasoningContent | RedactedReasoningContent | OmittedReasoningContent] | None = None,
535
+ pre_computed_assistant_message_id: str | None = None,
536
+ step_id: str | None = None,
537
+ initial_messages: list[Message] | None = None,
538
+ agent_step_span: Span | None = None,
539
+ is_final_step: bool | None = None,
540
+ run_id: str | None = None,
541
+ step_metrics: StepMetrics = None,
542
+ is_approval: bool | None = None,
543
+ is_denial: bool | None = None,
544
+ denial_reason: str | None = None,
545
+ ) -> tuple[list[Message], bool, LettaStopReason | None]:
546
+ """
547
+ Handle the final AI response once streaming completes, execute / validate the
548
+ tool call, decide whether we should keep stepping, and persist state.
549
+ """
550
+ if tool_call is None:
551
+ # NOTE: in v3 loop, no tool call is OK
552
+ tool_call_id = None
553
+ else:
554
+ tool_call_id: str = tool_call.id or f"call_{uuid.uuid4().hex[:8]}"
555
+
556
+ if is_denial:
557
+ continue_stepping = True
558
+ stop_reason = None
559
+ tool_call_messages = create_letta_messages_from_llm_response(
560
+ agent_id=agent_state.id,
561
+ model=agent_state.llm_config.model,
562
+ function_name=tool_call.function.name,
563
+ function_arguments={},
564
+ tool_execution_result=ToolExecutionResult(status="error"),
565
+ tool_call_id=tool_call_id,
566
+ function_response=f"Error: request to call tool denied. User reason: {denial_reason}",
567
+ timezone=agent_state.timezone,
568
+ continue_stepping=continue_stepping,
569
+ # NOTE: we may need to change this to not have a "heartbeat" prefix for v3?
570
+ heartbeat_reason=f"{NON_USER_MSG_PREFIX}Continuing: user denied request to call tool.",
571
+ reasoning_content=None,
572
+ pre_computed_assistant_message_id=None,
573
+ step_id=step_id,
574
+ run_id=run_id,
575
+ is_approval_response=True,
576
+ force_set_request_heartbeat=False,
577
+ add_heartbeat_on_continue=False,
578
+ )
579
+ messages_to_persist = (initial_messages or []) + tool_call_messages
580
+
581
+ # Set run_id on all messages before persisting
582
+ for message in messages_to_persist:
583
+ if message.run_id is None:
584
+ message.run_id = run_id
585
+
586
+ persisted_messages = await self.message_manager.create_many_messages_async(
587
+ messages_to_persist,
588
+ actor=self.actor,
589
+ run_id=run_id,
590
+ project_id=agent_state.project_id,
591
+ template_id=agent_state.template_id,
592
+ )
593
+ return persisted_messages, continue_stepping, stop_reason
594
+
595
+ # -1. no tool call, no content
596
+ if tool_call is None and (content is None or len(content) == 0):
597
+ # Edge case is when there's also no content - basically, the LLM "no-op'd"
598
+ # If RequiredBeforeExitToolRule exists and not all required tools have been called,
599
+ # inject a rule-violation heartbeat to keep looping and inform the model.
600
+ uncalled = tool_rules_solver.get_uncalled_required_tools(available_tools=set([t.name for t in agent_state.tools]))
601
+ if uncalled:
602
+ # TODO: we may need to change this to not have a "heartbeat" prefix for v3?
603
+ heartbeat_reason = (
604
+ f"{NON_USER_MSG_PREFIX}ToolRuleViolated: You must call {', '.join(uncalled)} at least once to exit the loop."
605
+ )
606
+ from letta.server.rest_api.utils import create_heartbeat_system_message
607
+
608
+ heartbeat_msg = create_heartbeat_system_message(
609
+ agent_id=agent_state.id,
610
+ model=agent_state.llm_config.model,
611
+ function_call_success=True,
612
+ timezone=agent_state.timezone,
613
+ heartbeat_reason=heartbeat_reason,
614
+ run_id=run_id,
615
+ )
616
+ messages_to_persist = (initial_messages or []) + [heartbeat_msg]
617
+ continue_stepping, stop_reason = True, None
618
+ else:
619
+ # In this case, we actually do not want to persist the no-op message
620
+ continue_stepping, heartbeat_reason, stop_reason = False, None, LettaStopReason(stop_reason=StopReasonType.end_turn.value)
621
+ messages_to_persist = initial_messages or []
622
+
623
+ # 0. If there's no tool call, we can early exit
624
+ elif tool_call is None:
625
+ # TODO could just hardcode the line here instead of calling the function...
626
+ continue_stepping, heartbeat_reason, stop_reason = self._decide_continuation(
627
+ agent_state=agent_state,
628
+ tool_call_name=None,
629
+ tool_rule_violated=False,
630
+ tool_rules_solver=tool_rules_solver,
631
+ is_final_step=is_final_step,
632
+ )
633
+ assistant_message = create_letta_messages_from_llm_response(
634
+ agent_id=agent_state.id,
635
+ model=agent_state.llm_config.model,
636
+ function_name=None,
637
+ function_arguments=None,
638
+ tool_execution_result=None,
639
+ tool_call_id=None,
640
+ function_response=None,
641
+ timezone=agent_state.timezone,
642
+ continue_stepping=continue_stepping,
643
+ heartbeat_reason=heartbeat_reason,
644
+ # NOTE: should probably rename this to `content`?
645
+ reasoning_content=content,
646
+ pre_computed_assistant_message_id=pre_computed_assistant_message_id,
647
+ step_id=step_id,
648
+ run_id=run_id,
649
+ is_approval_response=is_approval or is_denial,
650
+ force_set_request_heartbeat=False,
651
+ # If we're continuing due to a required-before-exit rule, include a heartbeat to guide the model
652
+ add_heartbeat_on_continue=bool(heartbeat_reason),
653
+ )
654
+ messages_to_persist = (initial_messages or []) + assistant_message
655
+
656
+ else:
657
+ # 1. Parse and validate the tool-call envelope
658
+ tool_call_name: str = tool_call.function.name
659
+
660
+ tool_args = _safe_load_tool_call_str(tool_call.function.arguments)
661
+ # NOTE: these are failsafes - for v3, we should eventually be able to remove these
662
+ # request_heartbeat: bool = _pop_heartbeat(tool_args)
663
+ tool_args.pop(REQUEST_HEARTBEAT_PARAM, None)
664
+ tool_args.pop(INNER_THOUGHTS_KWARG, None)
665
+
666
+ log_telemetry(
667
+ self.logger,
668
+ "_handle_ai_response execute tool start",
669
+ tool_name=tool_call_name,
670
+ tool_args=tool_args,
671
+ tool_call_id=tool_call_id,
672
+ # request_heartbeat=request_heartbeat,
673
+ )
674
+
675
+ if not is_approval and tool_rules_solver.is_requires_approval_tool(tool_call_name):
676
+ approval_message = create_approval_request_message_from_llm_response(
677
+ agent_id=agent_state.id,
678
+ model=agent_state.llm_config.model,
679
+ function_name=tool_call_name,
680
+ function_arguments=tool_args,
681
+ tool_call_id=tool_call_id,
682
+ actor=self.actor,
683
+ # continue_stepping=request_heartbeat,
684
+ continue_stepping=True,
685
+ # reasoning_content=reasoning_content,
686
+ reasoning_content=content,
687
+ pre_computed_assistant_message_id=pre_computed_assistant_message_id,
688
+ step_id=step_id,
689
+ run_id=run_id,
690
+ append_request_heartbeat=False,
691
+ )
692
+ messages_to_persist = (initial_messages or []) + [approval_message]
693
+ continue_stepping = False
694
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.requires_approval.value)
695
+ else:
696
+ # 2. Execute the tool (or synthesize an error result if disallowed)
697
+ tool_rule_violated = tool_call_name not in valid_tool_names and not is_approval
698
+ if tool_rule_violated:
699
+ tool_execution_result = _build_rule_violation_result(tool_call_name, valid_tool_names, tool_rules_solver)
700
+ else:
701
+ # Prefill + validate args if a rule provided them
702
+ prefill_args = self.tool_rules_solver.last_prefilled_args_by_tool.get(tool_call_name)
703
+ if prefill_args:
704
+ # Find tool object for schema validation
705
+ target_tool = next((t for t in agent_state.tools if t.name == tool_call_name), None)
706
+ provenance = self.tool_rules_solver.last_prefilled_args_provenance.get(tool_call_name)
707
+ try:
708
+ tool_args = merge_and_validate_prefilled_args(
709
+ tool=target_tool,
710
+ llm_args=tool_args,
711
+ prefilled_args=prefill_args,
712
+ )
713
+ except ValueError as ve:
714
+ # Treat invalid prefilled args as user error and end the step
715
+ error_prefix = "Invalid prefilled tool arguments from tool rules"
716
+ prov_suffix = f" (source={provenance})" if provenance else ""
717
+ err_msg = f"{error_prefix}{prov_suffix}: {str(ve)}"
718
+ tool_execution_result = ToolExecutionResult(status="error", func_return=err_msg)
719
+
720
+ # Create messages and early return persistence path below
721
+ continue_stepping, heartbeat_reason, stop_reason = (
722
+ False,
723
+ None,
724
+ LettaStopReason(stop_reason=StopReasonType.invalid_tool_call.value),
725
+ )
726
+ tool_call_messages = create_letta_messages_from_llm_response(
727
+ agent_id=agent_state.id,
728
+ model=agent_state.llm_config.model,
729
+ function_name=tool_call_name,
730
+ function_arguments=tool_args,
731
+ tool_execution_result=tool_execution_result,
732
+ tool_call_id=tool_call_id,
733
+ function_response=tool_execution_result.func_return,
734
+ timezone=agent_state.timezone,
735
+ continue_stepping=continue_stepping,
736
+ heartbeat_reason=None,
737
+ reasoning_content=content,
738
+ pre_computed_assistant_message_id=pre_computed_assistant_message_id,
739
+ step_id=step_id,
740
+ run_id=run_id,
741
+ is_approval_response=is_approval or is_denial,
742
+ force_set_request_heartbeat=False,
743
+ add_heartbeat_on_continue=False,
744
+ )
745
+ messages_to_persist = (initial_messages or []) + tool_call_messages
746
+
747
+ # Set run_id on all messages before persisting
748
+ for message in messages_to_persist:
749
+ if message.run_id is None:
750
+ message.run_id = run_id
751
+
752
+ persisted_messages = await self.message_manager.create_many_messages_async(
753
+ messages_to_persist,
754
+ actor=self.actor,
755
+ run_id=run_id,
756
+ project_id=agent_state.project_id,
757
+ template_id=agent_state.template_id,
758
+ )
759
+ return persisted_messages, continue_stepping, stop_reason
760
+
761
+ # Track tool execution time
762
+ tool_start_time = get_utc_timestamp_ns()
763
+ tool_execution_result = await self._execute_tool(
764
+ tool_name=tool_call_name,
765
+ tool_args=tool_args,
766
+ agent_state=agent_state,
767
+ agent_step_span=agent_step_span,
768
+ step_id=step_id,
769
+ )
770
+ tool_end_time = get_utc_timestamp_ns()
771
+
772
+ # Store tool execution time in metrics
773
+ step_metrics.tool_execution_ns = tool_end_time - tool_start_time
774
+
775
+ log_telemetry(
776
+ self.logger,
777
+ "_handle_ai_response execute tool finish",
778
+ tool_execution_result=tool_execution_result,
779
+ tool_call_id=tool_call_id,
780
+ )
781
+
782
+ # 3. Prepare the function-response payload
783
+ truncate = tool_call_name not in {"conversation_search", "conversation_search_date", "archival_memory_search"}
784
+ return_char_limit = next(
785
+ (t.return_char_limit for t in agent_state.tools if t.name == tool_call_name),
786
+ None,
787
+ )
788
+ function_response_string = validate_function_response(
789
+ tool_execution_result.func_return,
790
+ return_char_limit=return_char_limit,
791
+ truncate=truncate,
792
+ )
793
+ self.last_function_response = package_function_response(
794
+ was_success=tool_execution_result.success_flag,
795
+ response_string=function_response_string,
796
+ timezone=agent_state.timezone,
797
+ )
798
+
799
+ # 4. Decide whether to keep stepping (focal section simplified)
800
+ continue_stepping, heartbeat_reason, stop_reason = self._decide_continuation(
801
+ agent_state=agent_state,
802
+ tool_call_name=tool_call_name,
803
+ tool_rule_violated=tool_rule_violated,
804
+ tool_rules_solver=tool_rules_solver,
805
+ is_final_step=is_final_step,
806
+ )
807
+
808
+ # 5. Create messages (step was already created at the beginning)
809
+ tool_call_messages = create_letta_messages_from_llm_response(
810
+ agent_id=agent_state.id,
811
+ model=agent_state.llm_config.model,
812
+ function_name=tool_call_name,
813
+ function_arguments=tool_args,
814
+ tool_execution_result=tool_execution_result,
815
+ tool_call_id=tool_call_id,
816
+ function_response=function_response_string,
817
+ timezone=agent_state.timezone,
818
+ continue_stepping=continue_stepping,
819
+ # heartbeat_reason=heartbeat_reason,
820
+ heartbeat_reason=None,
821
+ # reasoning_content=reasoning_content,
822
+ reasoning_content=content,
823
+ pre_computed_assistant_message_id=pre_computed_assistant_message_id,
824
+ step_id=step_id,
825
+ run_id=run_id,
826
+ is_approval_response=is_approval or is_denial,
827
+ force_set_request_heartbeat=False,
828
+ add_heartbeat_on_continue=False,
829
+ )
830
+ messages_to_persist = (initial_messages or []) + tool_call_messages
831
+
832
+ # Set run_id on all messages before persisting
833
+ for message in messages_to_persist:
834
+ if message.run_id is None:
835
+ message.run_id = run_id
836
+
837
+ persisted_messages = await self.message_manager.create_many_messages_async(
838
+ messages_to_persist, actor=self.actor, run_id=run_id, project_id=agent_state.project_id, template_id=agent_state.template_id
839
+ )
840
+
841
+ return persisted_messages, continue_stepping, stop_reason
842
+
843
+ @trace_method
844
+ def _decide_continuation(
845
+ self,
846
+ agent_state: AgentState,
847
+ tool_call_name: Optional[str],
848
+ tool_rule_violated: bool,
849
+ tool_rules_solver: ToolRulesSolver,
850
+ is_final_step: bool | None,
851
+ ) -> tuple[bool, str | None, LettaStopReason | None]:
852
+ """
853
+ In v3 loop, we apply the following rules:
854
+
855
+ 1. Did not call a tool? Loop ends
856
+
857
+ 2. Called a tool? Loop continues. This can be:
858
+ 2a. Called tool, tool executed successfully
859
+ 2b. Called tool, tool failed to execute
860
+ 2c. Called tool + tool rule violation (did not execute)
861
+
862
+ """
863
+ continue_stepping = True # Default continue
864
+ continuation_reason: str | None = None
865
+ stop_reason: LettaStopReason | None = None
866
+
867
+ if tool_call_name is None:
868
+ # No tool call – if there are required-before-exit tools uncalled, keep stepping
869
+ # and provide explicit feedback to the model; otherwise end the loop.
870
+ uncalled = tool_rules_solver.get_uncalled_required_tools(available_tools=set([t.name for t in agent_state.tools]))
871
+ if uncalled and not is_final_step:
872
+ reason = f"{NON_USER_MSG_PREFIX}ToolRuleViolated: You must call {', '.join(uncalled)} at least once to exit the loop."
873
+ return True, reason, None
874
+ # No required tools remaining → end turn
875
+ return False, None, LettaStopReason(stop_reason=StopReasonType.end_turn.value)
876
+ else:
877
+ if tool_rule_violated:
878
+ continue_stepping = True
879
+ continuation_reason = f"{NON_USER_MSG_PREFIX}Continuing: tool rule violation."
880
+ else:
881
+ tool_rules_solver.register_tool_call(tool_call_name)
882
+
883
+ if tool_rules_solver.is_terminal_tool(tool_call_name):
884
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.tool_rule.value)
885
+ continue_stepping = False
886
+
887
+ elif tool_rules_solver.has_children_tools(tool_call_name):
888
+ continue_stepping = True
889
+ continuation_reason = f"{NON_USER_MSG_PREFIX}Continuing: child tool rule."
890
+
891
+ elif tool_rules_solver.is_continue_tool(tool_call_name):
892
+ continue_stepping = True
893
+ continuation_reason = f"{NON_USER_MSG_PREFIX}Continuing: continue tool rule."
894
+
895
+ # – hard stop overrides –
896
+ if is_final_step:
897
+ continue_stepping = False
898
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.max_steps.value)
899
+ else:
900
+ uncalled = tool_rules_solver.get_uncalled_required_tools(available_tools=set([t.name for t in agent_state.tools]))
901
+ if uncalled:
902
+ continue_stepping = True
903
+ continuation_reason = (
904
+ f"{NON_USER_MSG_PREFIX}Continuing, user expects these tools: [{', '.join(uncalled)}] to be called still."
905
+ )
906
+
907
+ stop_reason = None # reset – we’re still going
908
+
909
+ return continue_stepping, continuation_reason, stop_reason
910
+
911
+ @trace_method
912
+ async def _get_valid_tools(self):
913
+ tools = self.agent_state.tools
914
+ valid_tool_names = self.tool_rules_solver.get_allowed_tool_names(
915
+ available_tools=set([t.name for t in tools]),
916
+ last_function_response=self.last_function_response,
917
+ error_on_empty=False, # Return empty list instead of raising error
918
+ ) or list(set(t.name for t in tools))
919
+ allowed_tools = [enable_strict_mode(t.json_schema) for t in tools if t.name in set(valid_tool_names)]
920
+ terminal_tool_names = {rule.tool_name for rule in self.tool_rules_solver.terminal_tool_rules}
921
+ allowed_tools = runtime_override_tool_json_schema(
922
+ tool_list=allowed_tools,
923
+ response_format=self.agent_state.response_format,
924
+ request_heartbeat=False, # NOTE: difference for v3 (don't add request heartbeat)
925
+ terminal_tools=terminal_tool_names,
926
+ )
927
+ return allowed_tools