letta-nightly 0.11.7.dev20250909104137__py3-none-any.whl → 0.11.7.dev20250911104039__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. letta/adapters/letta_llm_adapter.py +81 -0
  2. letta/adapters/letta_llm_request_adapter.py +113 -0
  3. letta/adapters/letta_llm_stream_adapter.py +171 -0
  4. letta/agents/agent_loop.py +23 -0
  5. letta/agents/base_agent.py +4 -1
  6. letta/agents/base_agent_v2.py +68 -0
  7. letta/agents/helpers.py +3 -5
  8. letta/agents/letta_agent.py +23 -12
  9. letta/agents/letta_agent_v2.py +1221 -0
  10. letta/agents/voice_agent.py +2 -1
  11. letta/constants.py +1 -1
  12. letta/errors.py +12 -0
  13. letta/functions/function_sets/base.py +53 -12
  14. letta/functions/helpers.py +3 -2
  15. letta/functions/schema_generator.py +1 -1
  16. letta/groups/sleeptime_multi_agent_v2.py +4 -2
  17. letta/groups/sleeptime_multi_agent_v3.py +233 -0
  18. letta/helpers/tool_rule_solver.py +4 -0
  19. letta/helpers/tpuf_client.py +607 -34
  20. letta/interfaces/anthropic_streaming_interface.py +74 -30
  21. letta/interfaces/openai_streaming_interface.py +80 -37
  22. letta/llm_api/google_vertex_client.py +1 -1
  23. letta/llm_api/openai_client.py +45 -4
  24. letta/orm/agent.py +4 -1
  25. letta/orm/block.py +2 -0
  26. letta/orm/blocks_agents.py +1 -0
  27. letta/orm/group.py +1 -0
  28. letta/orm/source.py +8 -1
  29. letta/orm/sources_agents.py +2 -1
  30. letta/orm/step_metrics.py +10 -0
  31. letta/orm/tools_agents.py +5 -2
  32. letta/schemas/block.py +4 -0
  33. letta/schemas/enums.py +1 -0
  34. letta/schemas/group.py +8 -0
  35. letta/schemas/letta_message.py +1 -1
  36. letta/schemas/letta_request.py +2 -2
  37. letta/schemas/mcp.py +9 -1
  38. letta/schemas/message.py +42 -2
  39. letta/schemas/providers/ollama.py +1 -1
  40. letta/schemas/providers.py +1 -2
  41. letta/schemas/source.py +6 -0
  42. letta/schemas/step_metrics.py +2 -0
  43. letta/server/rest_api/interface.py +34 -2
  44. letta/server/rest_api/json_parser.py +2 -0
  45. letta/server/rest_api/redis_stream_manager.py +2 -1
  46. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -2
  47. letta/server/rest_api/routers/v1/__init__.py +2 -0
  48. letta/server/rest_api/routers/v1/agents.py +132 -170
  49. letta/server/rest_api/routers/v1/blocks.py +6 -0
  50. letta/server/rest_api/routers/v1/folders.py +25 -7
  51. letta/server/rest_api/routers/v1/groups.py +6 -0
  52. letta/server/rest_api/routers/v1/internal_templates.py +218 -12
  53. letta/server/rest_api/routers/v1/messages.py +14 -19
  54. letta/server/rest_api/routers/v1/runs.py +43 -28
  55. letta/server/rest_api/routers/v1/sources.py +25 -7
  56. letta/server/rest_api/routers/v1/tools.py +42 -0
  57. letta/server/rest_api/streaming_response.py +11 -2
  58. letta/server/server.py +9 -6
  59. letta/services/agent_manager.py +39 -59
  60. letta/services/agent_serialization_manager.py +26 -11
  61. letta/services/archive_manager.py +60 -9
  62. letta/services/block_manager.py +5 -0
  63. letta/services/file_processor/embedder/base_embedder.py +5 -0
  64. letta/services/file_processor/embedder/openai_embedder.py +4 -0
  65. letta/services/file_processor/embedder/pinecone_embedder.py +5 -1
  66. letta/services/file_processor/embedder/turbopuffer_embedder.py +71 -0
  67. letta/services/file_processor/file_processor.py +9 -7
  68. letta/services/group_manager.py +74 -11
  69. letta/services/mcp_manager.py +134 -28
  70. letta/services/message_manager.py +229 -125
  71. letta/services/passage_manager.py +2 -1
  72. letta/services/source_manager.py +23 -1
  73. letta/services/summarizer/summarizer.py +4 -1
  74. letta/services/tool_executor/core_tool_executor.py +2 -120
  75. letta/services/tool_executor/files_tool_executor.py +133 -8
  76. letta/services/tool_executor/multi_agent_tool_executor.py +17 -14
  77. letta/services/tool_sandbox/local_sandbox.py +2 -2
  78. letta/services/tool_sandbox/modal_version_manager.py +2 -1
  79. letta/settings.py +6 -0
  80. letta/streaming_utils.py +29 -4
  81. letta/utils.py +106 -4
  82. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/METADATA +2 -2
  83. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/RECORD +86 -78
  84. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/WHEEL +0 -0
  85. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/entry_points.txt +0 -0
  86. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1221 @@
1
+ import asyncio
2
+ import json
3
+ import uuid
4
+ from datetime import datetime
5
+ from typing import AsyncGenerator, Tuple
6
+
7
+ from opentelemetry.trace import Span
8
+
9
+ from letta.adapters.letta_llm_adapter import LettaLLMAdapter
10
+ from letta.adapters.letta_llm_request_adapter import LettaLLMRequestAdapter
11
+ from letta.adapters.letta_llm_stream_adapter import LettaLLMStreamAdapter
12
+ from letta.agents.base_agent_v2 import BaseAgentV2
13
+ from letta.agents.ephemeral_summary_agent import EphemeralSummaryAgent
14
+ from letta.agents.helpers import (
15
+ _build_rule_violation_result,
16
+ _pop_heartbeat,
17
+ _prepare_in_context_messages_no_persist_async,
18
+ _safe_load_tool_call_str,
19
+ generate_step_id,
20
+ )
21
+ from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX
22
+ from letta.errors import ContextWindowExceededError
23
+ from letta.helpers import ToolRulesSolver
24
+ from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns, ns_to_ms
25
+ from letta.helpers.reasoning_helper import scrub_inner_thoughts_from_messages
26
+ from letta.helpers.tool_execution_helper import enable_strict_mode
27
+ from letta.llm_api.llm_client import LLMClient
28
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG
29
+ from letta.log import get_logger
30
+ from letta.otel.tracing import log_event, trace_method, tracer
31
+ from letta.prompts.prompt_generator import PromptGenerator
32
+ from letta.schemas.agent import AgentState, AgentType, UpdateAgent
33
+ from letta.schemas.enums import JobStatus, MessageRole, MessageStreamStatus, StepStatus
34
+ from letta.schemas.letta_message import LettaMessage, MessageType
35
+ from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
36
+ from letta.schemas.letta_response import LettaResponse
37
+ from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
38
+ from letta.schemas.message import Message, MessageCreate, MessageUpdate
39
+ from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
40
+ from letta.schemas.step import Step, StepProgression
41
+ from letta.schemas.step_metrics import StepMetrics
42
+ from letta.schemas.tool_execution_result import ToolExecutionResult
43
+ from letta.schemas.usage import LettaUsageStatistics
44
+ from letta.schemas.user import User
45
+ from letta.server.rest_api.utils import create_approval_request_message_from_llm_response, create_letta_messages_from_llm_response
46
+ from letta.services.agent_manager import AgentManager
47
+ from letta.services.archive_manager import ArchiveManager
48
+ from letta.services.block_manager import BlockManager
49
+ from letta.services.helpers.tool_parser_helper import runtime_override_tool_json_schema
50
+ from letta.services.job_manager import JobManager
51
+ from letta.services.message_manager import MessageManager
52
+ from letta.services.passage_manager import PassageManager
53
+ from letta.services.step_manager import StepManager
54
+ from letta.services.summarizer.enums import SummarizationMode
55
+ from letta.services.summarizer.summarizer import Summarizer
56
+ from letta.services.telemetry_manager import TelemetryManager
57
+ from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
58
+ from letta.settings import model_settings, settings, summarizer_settings
59
+ from letta.system import package_function_response
60
+ from letta.types import JsonDict
61
+ from letta.utils import log_telemetry, safe_create_task, united_diff, validate_function_response
62
+
63
+
64
+ class LettaAgentV2(BaseAgentV2):
65
+ """
66
+ Abstract base class for the Letta agent loop, handling message management,
67
+ LLM API requests, tool execution, and context tracking.
68
+
69
+ This implementation uses a unified execution path through the _step method,
70
+ supporting both blocking and streaming LLM interactions via the adapter pattern.
71
+ """
72
+
73
+ def __init__(
74
+ self,
75
+ agent_state: AgentState,
76
+ actor: User,
77
+ ):
78
+ super().__init__(agent_state, actor)
79
+ self.logger = get_logger(agent_state.id)
80
+ self.tool_rules_solver = ToolRulesSolver(tool_rules=agent_state.tool_rules)
81
+ self.llm_client = LLMClient.create(
82
+ provider_type=agent_state.llm_config.model_endpoint_type,
83
+ put_inner_thoughts_first=True,
84
+ actor=actor,
85
+ )
86
+ self._initialize_state()
87
+
88
+ # Manager classes
89
+ self.agent_manager = AgentManager()
90
+ self.archive_manager = ArchiveManager()
91
+ self.block_manager = BlockManager()
92
+ self.job_manager = JobManager()
93
+ self.message_manager = MessageManager()
94
+ self.passage_manager = PassageManager()
95
+ self.step_manager = StepManager()
96
+ self.telemetry_manager = TelemetryManager()
97
+
98
+ # TODO: Expand to more
99
+ if summarizer_settings.enable_summarization and model_settings.openai_api_key:
100
+ self.summarization_agent = EphemeralSummaryAgent(
101
+ target_block_label="conversation_summary",
102
+ agent_id=self.agent_state.id,
103
+ block_manager=self.block_manager,
104
+ message_manager=self.message_manager,
105
+ agent_manager=self.agent_manager,
106
+ actor=self.actor,
107
+ )
108
+
109
+ # Initialize summarizer for context window management
110
+ self.summarizer = Summarizer(
111
+ mode=(
112
+ SummarizationMode.STATIC_MESSAGE_BUFFER
113
+ if self.agent_state.agent_type == AgentType.voice_convo_agent
114
+ else summarizer_settings.mode
115
+ ),
116
+ summarizer_agent=self.summarization_agent,
117
+ message_buffer_limit=summarizer_settings.message_buffer_limit,
118
+ message_buffer_min=summarizer_settings.message_buffer_min,
119
+ partial_evict_summarizer_percentage=summarizer_settings.partial_evict_summarizer_percentage,
120
+ agent_manager=self.agent_manager,
121
+ message_manager=self.message_manager,
122
+ actor=self.actor,
123
+ agent_id=self.agent_state.id,
124
+ )
125
+
126
+ @trace_method
127
+ async def build_request(self, input_messages: list[MessageCreate]) -> dict:
128
+ """
129
+ Build the request data for an LLM call without actually executing it.
130
+
131
+ This is useful for debugging and testing to see what would be sent to the LLM.
132
+
133
+ Args:
134
+ input_messages: List of new messages to process
135
+
136
+ Returns:
137
+ dict: The request data that would be sent to the LLM
138
+ """
139
+ request = {}
140
+ in_context_messages, input_messages_to_persist = await _prepare_in_context_messages_no_persist_async(
141
+ input_messages, self.agent_state, self.message_manager, self.actor
142
+ )
143
+ response = self._step(
144
+ messages=in_context_messages + input_messages_to_persist,
145
+ llm_adapter=LettaLLMRequestAdapter(llm_client=self.llm_client, llm_config=self.agent_state.llm_config),
146
+ dry_run=True,
147
+ )
148
+ async for chunk in response:
149
+ request = chunk # First chunk contains request data
150
+ break
151
+
152
+ return request
153
+
154
+ @trace_method
155
+ async def step(
156
+ self,
157
+ input_messages: list[MessageCreate],
158
+ max_steps: int = DEFAULT_MAX_STEPS,
159
+ run_id: str | None = None,
160
+ use_assistant_message: bool = True,
161
+ include_return_message_types: list[MessageType] | None = None,
162
+ request_start_timestamp_ns: int | None = None,
163
+ ) -> LettaResponse:
164
+ """
165
+ Execute the agent loop in blocking mode, returning all messages at once.
166
+
167
+ Args:
168
+ input_messages: List of new messages to process
169
+ max_steps: Maximum number of agent steps to execute
170
+ run_id: Optional job/run ID for tracking
171
+ use_assistant_message: Whether to use assistant message format
172
+ include_return_message_types: Filter for which message types to return
173
+ request_start_timestamp_ns: Start time for tracking request duration
174
+
175
+ Returns:
176
+ LettaResponse: Complete response with all messages and metadata
177
+ """
178
+ self._initialize_state()
179
+ request_span = self._request_checkpoint_start(request_start_timestamp_ns=request_start_timestamp_ns)
180
+
181
+ in_context_messages, input_messages_to_persist = await _prepare_in_context_messages_no_persist_async(
182
+ input_messages, self.agent_state, self.message_manager, self.actor
183
+ )
184
+ in_context_messages = in_context_messages + input_messages_to_persist
185
+ response_letta_messages = []
186
+ for i in range(max_steps):
187
+ response = self._step(
188
+ messages=in_context_messages + self.response_messages,
189
+ input_messages_to_persist=input_messages_to_persist,
190
+ llm_adapter=LettaLLMRequestAdapter(llm_client=self.llm_client, llm_config=self.agent_state.llm_config),
191
+ run_id=run_id,
192
+ use_assistant_message=use_assistant_message,
193
+ include_return_message_types=include_return_message_types,
194
+ request_start_timestamp_ns=request_start_timestamp_ns,
195
+ )
196
+
197
+ async for chunk in response:
198
+ response_letta_messages.append(chunk)
199
+
200
+ if not self.should_continue:
201
+ break
202
+
203
+ input_messages_to_persist = []
204
+
205
+ # Rebuild context window after stepping
206
+ if not self.agent_state.message_buffer_autoclear:
207
+ await self.summarize_conversation_history(
208
+ in_context_messages=in_context_messages,
209
+ new_letta_messages=self.response_messages,
210
+ total_tokens=self.usage.total_tokens,
211
+ force=False,
212
+ )
213
+
214
+ if self.stop_reason is None:
215
+ self.stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
216
+ self._request_checkpoint_finish(request_span=request_span, request_start_timestamp_ns=request_start_timestamp_ns)
217
+ return LettaResponse(messages=response_letta_messages, stop_reason=self.stop_reason, usage=self.usage)
218
+
219
+ @trace_method
220
+ async def stream(
221
+ self,
222
+ input_messages: list[MessageCreate],
223
+ max_steps: int = DEFAULT_MAX_STEPS,
224
+ stream_tokens: bool = False,
225
+ run_id: str | None = None,
226
+ use_assistant_message: bool = True,
227
+ include_return_message_types: list[MessageType] | None = None,
228
+ request_start_timestamp_ns: int | None = None,
229
+ ) -> AsyncGenerator[str, None]:
230
+ """
231
+ Execute the agent loop in streaming mode, yielding chunks as they become available.
232
+ If stream_tokens is True, individual tokens are streamed as they arrive from the LLM,
233
+ providing the lowest latency experience, otherwise each complete step (reasoning +
234
+ tool call + tool return) is yielded as it completes.
235
+
236
+ Args:
237
+ input_messages: List of new messages to process
238
+ max_steps: Maximum number of agent steps to execute
239
+ stream_tokens: Whether to stream back individual tokens. Not all llm
240
+ providers offer native token streaming functionality; in these cases,
241
+ this api streams back steps rather than individual tokens.
242
+ run_id: Optional job/run ID for tracking
243
+ use_assistant_message: Whether to use assistant message format
244
+ include_return_message_types: Filter for which message types to return
245
+ request_start_timestamp_ns: Start time for tracking request duration
246
+
247
+ Yields:
248
+ str: JSON-formatted SSE data chunks for each completed step
249
+ """
250
+ self._initialize_state()
251
+ request_span = self._request_checkpoint_start(request_start_timestamp_ns=request_start_timestamp_ns)
252
+ first_chunk = True
253
+
254
+ if stream_tokens:
255
+ llm_adapter = LettaLLMStreamAdapter(
256
+ llm_client=self.llm_client,
257
+ llm_config=self.agent_state.llm_config,
258
+ )
259
+ else:
260
+ llm_adapter = LettaLLMRequestAdapter(
261
+ llm_client=self.llm_client,
262
+ llm_config=self.agent_state.llm_config,
263
+ )
264
+
265
+ try:
266
+ in_context_messages, input_messages_to_persist = await _prepare_in_context_messages_no_persist_async(
267
+ input_messages, self.agent_state, self.message_manager, self.actor
268
+ )
269
+ in_context_messages = in_context_messages + input_messages_to_persist
270
+ for i in range(max_steps):
271
+ response = self._step(
272
+ messages=in_context_messages + self.response_messages,
273
+ input_messages_to_persist=input_messages_to_persist,
274
+ llm_adapter=llm_adapter,
275
+ run_id=run_id,
276
+ use_assistant_message=use_assistant_message,
277
+ include_return_message_types=include_return_message_types,
278
+ request_start_timestamp_ns=request_start_timestamp_ns,
279
+ )
280
+ async for chunk in response:
281
+ if first_chunk:
282
+ request_span = self._request_checkpoint_ttft(request_span, request_start_timestamp_ns)
283
+ yield f"data: {chunk.model_dump_json()}\n\n"
284
+ first_chunk = False
285
+
286
+ if not self.should_continue:
287
+ break
288
+
289
+ input_messages_to_persist = []
290
+
291
+ if not self.agent_state.message_buffer_autoclear:
292
+ await self.summarize_conversation_history(
293
+ in_context_messages=in_context_messages,
294
+ new_letta_messages=self.response_messages,
295
+ total_tokens=self.usage.total_tokens,
296
+ force=False,
297
+ )
298
+
299
+ except:
300
+ if self.stop_reason:
301
+ yield f"data: {self.stop_reason.model_dump_json()}\n\n"
302
+ raise
303
+
304
+ self._request_checkpoint_finish(request_span=request_span, request_start_timestamp_ns=request_start_timestamp_ns)
305
+ for finish_chunk in self.get_finish_chunks_for_stream(self.usage, self.stop_reason):
306
+ yield f"data: {finish_chunk}\n\n"
307
+
308
+ @trace_method
309
+ async def _step(
310
+ self,
311
+ messages: list[Message],
312
+ llm_adapter: LettaLLMAdapter,
313
+ input_messages_to_persist: list[Message] | None = None,
314
+ run_id: str | None = None,
315
+ use_assistant_message: bool = True,
316
+ include_return_message_types: list[MessageType] | None = None,
317
+ request_start_timestamp_ns: int | None = None,
318
+ remaining_turns: int = -1,
319
+ dry_run: bool = False,
320
+ ) -> AsyncGenerator[LettaMessage | dict, None]:
321
+ """
322
+ Execute a single agent step (one LLM call and tool execution).
323
+
324
+ This is the core execution method that all public methods (step, stream_steps,
325
+ stream_tokens) funnel through. It handles the complete flow of making an LLM
326
+ request, processing the response, executing tools, and persisting messages.
327
+
328
+ Args:
329
+ messages: Current in-context messages
330
+ llm_adapter: Adapter for LLM interaction (blocking or streaming)
331
+ input_messages_to_persist: New messages to persist after execution
332
+ run_id: Optional job/run ID for tracking
333
+ use_assistant_message: Whether to use assistant message format
334
+ include_return_message_types: Filter for which message types to yield
335
+ request_start_timestamp_ns: Start time for tracking request duration
336
+ remaining_turns: Number of turns remaining (for max_steps enforcement)
337
+ dry_run: If true, only build and return the request without executing
338
+
339
+ Yields:
340
+ LettaMessage or dict: Chunks for streaming mode, or request data for dry_run
341
+ """
342
+ step_progression = StepProgression.START
343
+ # TODO(@caren): clean this up
344
+ tool_call, reasoning_content, agent_step_span, first_chunk, step_id, logged_step, step_start_ns, step_metrics = (
345
+ None,
346
+ None,
347
+ None,
348
+ None,
349
+ None,
350
+ None,
351
+ None,
352
+ None,
353
+ )
354
+ try:
355
+ self.last_function_response = self._load_last_function_response(messages)
356
+ valid_tools = await self._get_valid_tools()
357
+ approval_request, approval_response = await self._maybe_get_approval_messages(messages)
358
+ if approval_request and approval_response:
359
+ tool_call = approval_request.tool_calls[0]
360
+ reasoning_content = approval_request.content
361
+ step_id = approval_request.step_id
362
+ step_metrics = await self.step_manager.get_step_metrics_async(step_id=step_id, actor=self.actor)
363
+ else:
364
+ # Check for job cancellation at the start of each step
365
+ if run_id and await self._check_run_cancellation(run_id):
366
+ self.stop_reason = LettaStopReason(stop_reason=StopReasonType.cancelled.value)
367
+ self.logger.info(f"Agent execution cancelled for run {run_id}")
368
+ return
369
+
370
+ step_id = generate_step_id()
371
+ step_progression, logged_step, step_metrics, agent_step_span = await self._step_checkpoint_start(
372
+ step_id=step_id, run_id=run_id
373
+ )
374
+
375
+ messages = await self._refresh_messages(messages)
376
+ force_tool_call = valid_tools[0]["name"] if len(valid_tools) == 1 else None
377
+ for llm_request_attempt in range(summarizer_settings.max_summarizer_retries + 1):
378
+ try:
379
+ request_data = self.llm_client.build_request_data(
380
+ messages=messages,
381
+ llm_config=self.agent_state.llm_config,
382
+ tools=valid_tools,
383
+ force_tool_call=force_tool_call,
384
+ )
385
+ if dry_run:
386
+ yield request_data
387
+ return
388
+
389
+ step_progression, step_metrics = self._step_checkpoint_llm_request_start(step_metrics, agent_step_span)
390
+
391
+ invocation = llm_adapter.invoke_llm(
392
+ request_data=request_data,
393
+ messages=messages,
394
+ tools=valid_tools,
395
+ use_assistant_message=use_assistant_message,
396
+ requires_approval_tools=self.tool_rules_solver.get_requires_approval_tools(
397
+ set([t["name"] for t in valid_tools])
398
+ ),
399
+ step_id=step_id,
400
+ actor=self.actor,
401
+ )
402
+ async for chunk in invocation:
403
+ if llm_adapter.supports_token_streaming():
404
+ if include_return_message_types is None or chunk.message_type in include_return_message_types:
405
+ first_chunk = True
406
+ yield chunk
407
+ # If you've reached this point without an error, break out of retry loop
408
+ break
409
+ except ValueError as e:
410
+ self.stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response.value)
411
+ raise e
412
+ except Exception as e:
413
+ if isinstance(e, ContextWindowExceededError) and llm_request_attempt < summarizer_settings.max_summarizer_retries:
414
+ # Retry case
415
+ messages = await self.summarize_conversation_history(
416
+ in_context_messages=messages,
417
+ new_letta_messages=self.response_messages,
418
+ llm_config=self.agent_state.llm_config,
419
+ force=True,
420
+ )
421
+ else:
422
+ raise e
423
+
424
+ step_progression, step_metrics = self._step_checkpoint_llm_request_finish(
425
+ step_metrics, agent_step_span, llm_adapter.llm_request_finish_timestamp_ns
426
+ )
427
+
428
+ self._update_global_usage_stats(llm_adapter.usage)
429
+
430
+ # Handle the AI response with the extracted data
431
+ if tool_call is None and llm_adapter.tool_call is None:
432
+ self.stop_reason = LettaStopReason(stop_reason=StopReasonType.no_tool_call.value)
433
+ raise ValueError("No tool calls found in response, model must make a tool call")
434
+
435
+ persisted_messages, self.should_continue, self.stop_reason = await self._handle_ai_response(
436
+ tool_call or llm_adapter.tool_call,
437
+ [tool["name"] for tool in valid_tools],
438
+ self.agent_state,
439
+ self.tool_rules_solver,
440
+ UsageStatistics(
441
+ completion_tokens=self.usage.completion_tokens,
442
+ prompt_tokens=self.usage.prompt_tokens,
443
+ total_tokens=self.usage.total_tokens,
444
+ ),
445
+ reasoning_content=reasoning_content or llm_adapter.reasoning_content,
446
+ pre_computed_assistant_message_id=llm_adapter.message_id,
447
+ step_id=step_id,
448
+ initial_messages=input_messages_to_persist,
449
+ agent_step_span=agent_step_span,
450
+ is_final_step=(remaining_turns == 0),
451
+ run_id=run_id,
452
+ step_metrics=step_metrics,
453
+ is_approval=approval_response.approve if approval_response is not None else False,
454
+ is_denial=(approval_response.approve == False) if approval_response is not None else False,
455
+ denial_reason=approval_response.denial_reason if approval_response is not None else None,
456
+ )
457
+
458
+ new_message_idx = len(input_messages_to_persist) if input_messages_to_persist else 0
459
+ self.response_messages.extend(persisted_messages[new_message_idx:])
460
+
461
+ if llm_adapter.supports_token_streaming():
462
+ if persisted_messages[-1].role != "approval":
463
+ tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
464
+ if not (use_assistant_message and tool_return.name == "send_message"):
465
+ if include_return_message_types is None or tool_return.message_type in include_return_message_types:
466
+ yield tool_return
467
+ else:
468
+ filter_user_messages = [m for m in persisted_messages[new_message_idx:] if m.role != "user"]
469
+ letta_messages = Message.to_letta_messages_from_list(
470
+ filter_user_messages,
471
+ use_assistant_message=use_assistant_message,
472
+ reverse=False,
473
+ )
474
+ for message in letta_messages:
475
+ if include_return_message_types is None or message.message_type in include_return_message_types:
476
+ yield message
477
+
478
+ step_progression, step_metrics = await self._step_checkpoint_finish(step_metrics, agent_step_span, logged_step)
479
+ except Exception as e:
480
+ self.logger.error(f"Error during step processing: {e}")
481
+ self.job_update_metadata = {"error": str(e)}
482
+
483
+ # This indicates we failed after we decided to stop stepping, which indicates a bug with our flow.
484
+ if not self.stop_reason:
485
+ self.stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
486
+ elif self.stop_reason.stop_reason in (StopReasonType.end_turn, StopReasonType.max_steps, StopReasonType.tool_rule):
487
+ self.logger.error("Error occurred during step processing, with valid stop reason: %s", self.stop_reason.stop_reason)
488
+ elif self.stop_reason.stop_reason not in (
489
+ StopReasonType.no_tool_call,
490
+ StopReasonType.invalid_tool_call,
491
+ StopReasonType.invalid_llm_response,
492
+ ):
493
+ self.logger.error("Error occurred during step processing, with unexpected stop reason: %s", self.stop_reason.stop_reason)
494
+ raise e
495
+ finally:
496
+ self.logger.debug("Running cleanup for agent loop run: %s", run_id)
497
+ self.logger.info("Running final update. Step Progression: %s", step_progression)
498
+ try:
499
+ if step_progression == StepProgression.FINISHED:
500
+ if not self.should_continue:
501
+ if self.stop_reason is None:
502
+ self.stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
503
+ if logged_step and step_id:
504
+ await self.step_manager.update_step_stop_reason(self.actor, step_id, self.stop_reason.stop_reason)
505
+ return
506
+ if step_progression < StepProgression.STEP_LOGGED:
507
+ # Error occurred before step was fully logged
508
+ import traceback
509
+
510
+ if logged_step:
511
+ await self.step_manager.update_step_error_async(
512
+ actor=self.actor,
513
+ step_id=step_id, # Use original step_id for telemetry
514
+ error_type=type(e).__name__ if "e" in locals() else "Unknown",
515
+ error_message=str(e) if "e" in locals() else "Unknown error",
516
+ error_traceback=traceback.format_exc(),
517
+ stop_reason=self.stop_reason,
518
+ )
519
+ if step_progression <= StepProgression.STREAM_RECEIVED:
520
+ if first_chunk and settings.track_errored_messages and input_messages_to_persist:
521
+ for message in input_messages_to_persist:
522
+ message.is_err = True
523
+ message.step_id = step_id
524
+ await self.message_manager.create_many_messages_async(
525
+ input_messages_to_persist,
526
+ actor=self.actor,
527
+ project_id=self.agent_state.project_id,
528
+ template_id=self.agent_state.template_id,
529
+ )
530
+ elif step_progression <= StepProgression.LOGGED_TRACE:
531
+ if self.stop_reason is None:
532
+ self.logger.error("Error in step after logging step")
533
+ self.stop_reason = LettaStopReason(stop_reason=StopReasonType.error.value)
534
+ if logged_step:
535
+ await self.step_manager.update_step_stop_reason(self.actor, step_id, self.stop_reason.stop_reason)
536
+ else:
537
+ self.logger.error("Invalid StepProgression value")
538
+
539
+ # Do tracking for failure cases. Can consolidate with success conditions later.
540
+ if settings.track_stop_reason:
541
+ await self._log_request(request_start_timestamp_ns, None, self.job_update_metadata, is_error=True, run_id=run_id)
542
+
543
+ # Record partial step metrics on failure (capture whatever timing data we have)
544
+ if logged_step and step_metrics and step_progression < StepProgression.FINISHED:
545
+ # Calculate total step time up to the failure point
546
+ step_metrics.step_ns = get_utc_timestamp_ns() - step_metrics.step_start_ns
547
+
548
+ await self._record_step_metrics(
549
+ step_id=step_id,
550
+ step_metrics=step_metrics,
551
+ run_id=run_id,
552
+ )
553
+ except Exception as e:
554
+ self.logger.error(f"Error during post-completion step tracking: {e}")
555
+
556
+ def _initialize_state(self):
557
+ self.should_continue = True
558
+ self.stop_reason = None
559
+ self.usage = LettaUsageStatistics()
560
+ self.job_update_metadata = None
561
+ self.last_function_response = None
562
+ self.response_messages = []
563
+
564
+ async def _maybe_get_approval_messages(self, messages: list[Message]) -> Tuple[Message | None, Message | None]:
565
+ if len(messages) >= 2:
566
+ maybe_approval_request, maybe_approval_response = messages[-2], messages[-1]
567
+ if maybe_approval_request.role == "approval" and maybe_approval_response.role == "approval":
568
+ return maybe_approval_request, maybe_approval_response
569
+ return None, None
570
+
571
+ @trace_method
572
+ async def _check_run_cancellation(self, run_id) -> bool:
573
+ try:
574
+ job = await self.job_manager.get_job_by_id_async(job_id=run_id, actor=self.actor)
575
+ return job.status == JobStatus.cancelled
576
+ except Exception as e:
577
+ # Log the error but don't fail the execution
578
+ self.logger.warning(f"Failed to check job cancellation status for job {run_id}: {e}")
579
+ return False
580
+
581
+ @trace_method
582
+ async def _refresh_messages(self, in_context_messages: list[Message]):
583
+ num_messages = await self.message_manager.size_async(
584
+ agent_id=self.agent_state.id,
585
+ actor=self.actor,
586
+ )
587
+ num_archival_memories = await self.passage_manager.agent_passage_size_async(
588
+ agent_id=self.agent_state.id,
589
+ actor=self.actor,
590
+ )
591
+ in_context_messages = await self._rebuild_memory(
592
+ in_context_messages,
593
+ num_messages=num_messages,
594
+ num_archival_memories=num_archival_memories,
595
+ )
596
+ in_context_messages = scrub_inner_thoughts_from_messages(in_context_messages, self.agent_state.llm_config)
597
+ return in_context_messages
598
+
599
+ @trace_method
600
+ async def _rebuild_memory(
601
+ self,
602
+ in_context_messages: list[Message],
603
+ num_messages: int,
604
+ num_archival_memories: int,
605
+ ):
606
+ agent_state = await self.agent_manager.refresh_memory_async(agent_state=self.agent_state, actor=self.actor)
607
+
608
+ tool_constraint_block = None
609
+ if self.tool_rules_solver is not None:
610
+ tool_constraint_block = self.tool_rules_solver.compile_tool_rule_prompts()
611
+
612
+ archive = await self.archive_manager.get_default_archive_for_agent_async(
613
+ agent_id=self.agent_state.id,
614
+ actor=self.actor,
615
+ )
616
+
617
+ if archive:
618
+ archive_tags = await self.passage_manager.get_unique_tags_for_archive_async(
619
+ archive_id=archive.id,
620
+ actor=self.actor,
621
+ )
622
+ else:
623
+ archive_tags = None
624
+
625
+ # TODO: This is a pretty brittle pattern established all over our code, need to get rid of this
626
+ curr_system_message = in_context_messages[0]
627
+ curr_system_message_text = curr_system_message.content[0].text
628
+
629
+ # extract the dynamic section that includes memory blocks, tool rules, and directories
630
+ # this avoids timestamp comparison issues
631
+ def extract_dynamic_section(text):
632
+ start_marker = "</base_instructions>"
633
+ end_marker = "<memory_metadata>"
634
+
635
+ start_idx = text.find(start_marker)
636
+ end_idx = text.find(end_marker)
637
+
638
+ if start_idx != -1 and end_idx != -1:
639
+ return text[start_idx:end_idx]
640
+ return text # fallback to full text if markers not found
641
+
642
+ curr_dynamic_section = extract_dynamic_section(curr_system_message_text)
643
+
644
+ # generate just the memory string with current state for comparison
645
+ curr_memory_str = await agent_state.memory.compile_in_thread_async(
646
+ tool_usage_rules=tool_constraint_block, sources=agent_state.sources, max_files_open=agent_state.max_files_open
647
+ )
648
+ new_dynamic_section = extract_dynamic_section(curr_memory_str)
649
+
650
+ # compare just the dynamic sections (memory blocks, tool rules, directories)
651
+ if curr_dynamic_section == new_dynamic_section:
652
+ self.logger.debug(
653
+ f"Memory and sources haven't changed for agent id={agent_state.id} and actor=({self.actor.id}, {self.actor.name}), skipping system prompt rebuild"
654
+ )
655
+ return in_context_messages
656
+
657
+ memory_edit_timestamp = get_utc_time()
658
+
659
+ # size of messages and archival memories
660
+ if num_messages is None:
661
+ num_messages = await self.message_manager.size_async(actor=self.actor, agent_id=agent_state.id)
662
+ if num_archival_memories is None:
663
+ num_archival_memories = await self.passage_manager.agent_passage_size_async(actor=self.actor, agent_id=agent_state.id)
664
+
665
+ new_system_message_str = PromptGenerator.get_system_message_from_compiled_memory(
666
+ system_prompt=agent_state.system,
667
+ memory_with_sources=curr_memory_str,
668
+ in_context_memory_last_edit=memory_edit_timestamp,
669
+ timezone=agent_state.timezone,
670
+ previous_message_count=num_messages - len(in_context_messages),
671
+ archival_memory_size=num_archival_memories,
672
+ archive_tags=archive_tags,
673
+ )
674
+
675
+ diff = united_diff(curr_system_message_text, new_system_message_str)
676
+ if len(diff) > 0:
677
+ self.logger.debug(f"Rebuilding system with new memory...\nDiff:\n{diff}")
678
+
679
+ # [DB Call] Update Messages
680
+ new_system_message = await self.message_manager.update_message_by_id_async(
681
+ curr_system_message.id, message_update=MessageUpdate(content=new_system_message_str), actor=self.actor
682
+ )
683
+ return [new_system_message] + in_context_messages[1:]
684
+
685
+ else:
686
+ return in_context_messages
687
+
688
+ @trace_method
689
+ async def _get_valid_tools(self):
690
+ tools = self.agent_state.tools
691
+ valid_tool_names = self.tool_rules_solver.get_allowed_tool_names(
692
+ available_tools=set([t.name for t in tools]),
693
+ last_function_response=self.last_function_response,
694
+ error_on_empty=False, # Return empty list instead of raising error
695
+ ) or list(set(t.name for t in tools))
696
+ allowed_tools = [enable_strict_mode(t.json_schema) for t in tools if t.name in set(valid_tool_names)]
697
+ terminal_tool_names = {rule.tool_name for rule in self.tool_rules_solver.terminal_tool_rules}
698
+ allowed_tools = runtime_override_tool_json_schema(
699
+ tool_list=allowed_tools,
700
+ response_format=self.agent_state.response_format,
701
+ request_heartbeat=True,
702
+ terminal_tools=terminal_tool_names,
703
+ )
704
+ return allowed_tools
705
+
706
+ @trace_method
707
+ def _load_last_function_response(self, in_context_messages: list[Message]):
708
+ """Load the last function response from message history"""
709
+ for msg in reversed(in_context_messages):
710
+ if msg.role == MessageRole.tool and msg.content and len(msg.content) == 1 and isinstance(msg.content[0], TextContent):
711
+ text_content = msg.content[0].text
712
+ try:
713
+ response_json = json.loads(text_content)
714
+ if response_json.get("message"):
715
+ return response_json["message"]
716
+ except (json.JSONDecodeError, KeyError):
717
+ raise ValueError(f"Invalid JSON format in message: {text_content}")
718
+ return None
719
+
720
+ @trace_method
721
+ def _request_checkpoint_start(self, request_start_timestamp_ns: int | None) -> Span | None:
722
+ if request_start_timestamp_ns is not None:
723
+ request_span = tracer.start_span("time_to_first_token", start_time=request_start_timestamp_ns)
724
+ request_span.set_attributes(
725
+ {f"llm_config.{k}": v for k, v in self.agent_state.llm_config.model_dump().items() if v is not None}
726
+ )
727
+ return request_span
728
+ return None
729
+
730
+ @trace_method
731
+ def _request_checkpoint_ttft(self, request_span: Span | None, request_start_timestamp_ns: int | None) -> Span | None:
732
+ if request_span:
733
+ ttft_ns = get_utc_timestamp_ns() - request_start_timestamp_ns
734
+ request_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ns_to_ms(ttft_ns)})
735
+ return request_span
736
+ return None
737
+
738
+ @trace_method
739
+ def _request_checkpoint_finish(self, request_span: Span | None, request_start_timestamp_ns: int | None) -> None:
740
+ if request_span is not None:
741
+ duration_ns = get_utc_timestamp_ns() - request_start_timestamp_ns
742
+ request_span.add_event(name="letta_request_ms", attributes={"duration_ms": ns_to_ms(duration_ns)})
743
+ request_span.end()
744
+ return None
745
+
746
+ @trace_method
747
+ async def _step_checkpoint_start(self, step_id: str, run_id: str | None) -> Tuple[StepProgression, Step, StepMetrics, Span]:
748
+ step_start_ns = get_utc_timestamp_ns()
749
+ step_metrics = StepMetrics(id=step_id, step_start_ns=step_start_ns)
750
+ agent_step_span = tracer.start_span("agent_step", start_time=step_start_ns)
751
+ agent_step_span.set_attributes({"step_id": step_id})
752
+ # Create step early with PENDING status
753
+ logged_step = await self.step_manager.log_step_async(
754
+ actor=self.actor,
755
+ agent_id=self.agent_state.id,
756
+ provider_name=self.agent_state.llm_config.model_endpoint_type,
757
+ provider_category=self.agent_state.llm_config.provider_category or "base",
758
+ model=self.agent_state.llm_config.model,
759
+ model_endpoint=self.agent_state.llm_config.model_endpoint,
760
+ context_window_limit=self.agent_state.llm_config.context_window,
761
+ usage=UsageStatistics(completion_tokens=0, prompt_tokens=0, total_tokens=0),
762
+ provider_id=None,
763
+ job_id=run_id,
764
+ step_id=step_id,
765
+ project_id=self.agent_state.project_id,
766
+ status=StepStatus.PENDING,
767
+ )
768
+ return StepProgression.START, logged_step, step_metrics, agent_step_span
769
+
770
+ @trace_method
771
+ def _step_checkpoint_llm_request_start(self, step_metrics: StepMetrics, agent_step_span: Span) -> Tuple[StepProgression, StepMetrics]:
772
+ llm_request_start_ns = get_utc_timestamp_ns()
773
+ step_metrics.llm_request_start_ns = llm_request_start_ns
774
+ agent_step_span.add_event(
775
+ name="request_start_to_provider_request_start_ns",
776
+ attributes={"request_start_to_provider_request_start_ns": ns_to_ms(llm_request_start_ns)},
777
+ )
778
+ return StepProgression.START, step_metrics
779
+
780
+ @trace_method
781
+ def _step_checkpoint_llm_request_finish(
782
+ self, step_metrics: StepMetrics, agent_step_span: Span, llm_request_finish_timestamp_ns: int
783
+ ) -> Tuple[StepProgression, StepMetrics]:
784
+ llm_request_ns = llm_request_finish_timestamp_ns - step_metrics.llm_request_start_ns
785
+ step_metrics.llm_request_ns = llm_request_ns
786
+ agent_step_span.add_event(name="llm_request_ms", attributes={"duration_ms": ns_to_ms(llm_request_ns)})
787
+ return StepProgression.RESPONSE_RECEIVED, step_metrics
788
+
789
+ @trace_method
790
+ async def _step_checkpoint_finish(
791
+ self, step_metrics: StepMetrics, agent_step_span: Span | None, logged_step: Step | None
792
+ ) -> Tuple[StepProgression, StepMetrics]:
793
+ if step_metrics.step_start_ns:
794
+ step_ns = get_utc_timestamp_ns() - step_metrics.step_start_ns
795
+ step_metrics.step_ns = step_ns
796
+ if agent_step_span is not None:
797
+ agent_step_span.add_event(name="step_ms", attributes={"duration_ms": ns_to_ms(step_ns)})
798
+ agent_step_span.end()
799
+ self._record_step_metrics(step_id=step_metrics.id, step_metrics=step_metrics)
800
+
801
+ # Update step with actual usage now that we have it (if step was created)
802
+ if logged_step:
803
+ await self.step_manager.update_step_success_async(
804
+ self.actor,
805
+ step_metrics.id,
806
+ UsageStatistics(
807
+ completion_tokens=self.usage.completion_tokens,
808
+ prompt_tokens=self.usage.prompt_tokens,
809
+ total_tokens=self.usage.total_tokens,
810
+ ),
811
+ self.stop_reason,
812
+ )
813
+ return StepProgression.FINISHED, step_metrics
814
+
815
+ def _update_global_usage_stats(self, step_usage_stats: LettaUsageStatistics):
816
+ self.usage.step_count += step_usage_stats.step_count
817
+ self.usage.completion_tokens += step_usage_stats.completion_tokens
818
+ self.usage.prompt_tokens += step_usage_stats.prompt_tokens
819
+ self.usage.total_tokens += step_usage_stats.total_tokens
820
+
821
+ @trace_method
822
+ async def _handle_ai_response(
823
+ self,
824
+ tool_call: ToolCall,
825
+ valid_tool_names: list[str],
826
+ agent_state: AgentState,
827
+ tool_rules_solver: ToolRulesSolver,
828
+ usage: UsageStatistics,
829
+ reasoning_content: list[TextContent | ReasoningContent | RedactedReasoningContent | OmittedReasoningContent] | None = None,
830
+ pre_computed_assistant_message_id: str | None = None,
831
+ step_id: str | None = None,
832
+ initial_messages: list[Message] | None = None,
833
+ agent_step_span: Span | None = None,
834
+ is_final_step: bool | None = None,
835
+ run_id: str | None = None,
836
+ step_metrics: StepMetrics = None,
837
+ is_approval: bool | None = None,
838
+ is_denial: bool | None = None,
839
+ denial_reason: str | None = None,
840
+ ) -> tuple[list[Message], bool, LettaStopReason | None]:
841
+ """
842
+ Handle the final AI response once streaming completes, execute / validate the
843
+ tool call, decide whether we should keep stepping, and persist state.
844
+ """
845
+ tool_call_id: str = tool_call.id or f"call_{uuid.uuid4().hex[:8]}"
846
+
847
+ if is_denial:
848
+ continue_stepping = True
849
+ stop_reason = None
850
+ tool_call_messages = create_letta_messages_from_llm_response(
851
+ agent_id=agent_state.id,
852
+ model=agent_state.llm_config.model,
853
+ function_name=tool_call.function.name,
854
+ function_arguments={},
855
+ tool_execution_result=ToolExecutionResult(status="error"),
856
+ tool_call_id=tool_call_id,
857
+ function_call_success=False,
858
+ function_response=f"Error: request to call tool denied. User reason: {denial_reason}",
859
+ timezone=agent_state.timezone,
860
+ actor=self.actor,
861
+ continue_stepping=continue_stepping,
862
+ heartbeat_reason=f"{NON_USER_MSG_PREFIX}Continuing: user denied request to call tool.",
863
+ reasoning_content=None,
864
+ pre_computed_assistant_message_id=None,
865
+ step_id=step_id,
866
+ is_approval_response=True,
867
+ )
868
+ messages_to_persist = (initial_messages or []) + tool_call_messages
869
+ persisted_messages = await self.message_manager.create_many_messages_async(
870
+ messages_to_persist,
871
+ actor=self.actor,
872
+ project_id=agent_state.project_id,
873
+ template_id=agent_state.template_id,
874
+ )
875
+ return persisted_messages, continue_stepping, stop_reason
876
+
877
+ # 1. Parse and validate the tool-call envelope
878
+ tool_call_name: str = tool_call.function.name
879
+
880
+ tool_args = _safe_load_tool_call_str(tool_call.function.arguments)
881
+ request_heartbeat: bool = _pop_heartbeat(tool_args)
882
+ tool_args.pop(INNER_THOUGHTS_KWARG, None)
883
+
884
+ log_telemetry(
885
+ self.logger,
886
+ "_handle_ai_response execute tool start",
887
+ tool_name=tool_call_name,
888
+ tool_args=tool_args,
889
+ tool_call_id=tool_call_id,
890
+ request_heartbeat=request_heartbeat,
891
+ )
892
+
893
+ if not is_approval and tool_rules_solver.is_requires_approval_tool(tool_call_name):
894
+ approval_message = create_approval_request_message_from_llm_response(
895
+ agent_id=agent_state.id,
896
+ model=agent_state.llm_config.model,
897
+ function_name=tool_call_name,
898
+ function_arguments=tool_args,
899
+ tool_call_id=tool_call_id,
900
+ actor=self.actor,
901
+ continue_stepping=request_heartbeat,
902
+ reasoning_content=reasoning_content,
903
+ pre_computed_assistant_message_id=pre_computed_assistant_message_id,
904
+ step_id=step_id,
905
+ )
906
+ messages_to_persist = (initial_messages or []) + [approval_message]
907
+ continue_stepping = False
908
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.requires_approval.value)
909
+ else:
910
+ # 2. Execute the tool (or synthesize an error result if disallowed)
911
+ tool_rule_violated = tool_call_name not in valid_tool_names and not is_approval
912
+ if tool_rule_violated:
913
+ tool_execution_result = _build_rule_violation_result(tool_call_name, valid_tool_names, tool_rules_solver)
914
+ else:
915
+ # Track tool execution time
916
+ tool_start_time = get_utc_timestamp_ns()
917
+ tool_execution_result = await self._execute_tool(
918
+ tool_name=tool_call_name,
919
+ tool_args=tool_args,
920
+ agent_state=agent_state,
921
+ agent_step_span=agent_step_span,
922
+ step_id=step_id,
923
+ )
924
+ tool_end_time = get_utc_timestamp_ns()
925
+
926
+ # Store tool execution time in metrics
927
+ step_metrics.tool_execution_ns = tool_end_time - tool_start_time
928
+
929
+ log_telemetry(
930
+ self.logger,
931
+ "_handle_ai_response execute tool finish",
932
+ tool_execution_result=tool_execution_result,
933
+ tool_call_id=tool_call_id,
934
+ )
935
+
936
+ # 3. Prepare the function-response payload
937
+ truncate = tool_call_name not in {"conversation_search", "conversation_search_date", "archival_memory_search"}
938
+ return_char_limit = next(
939
+ (t.return_char_limit for t in agent_state.tools if t.name == tool_call_name),
940
+ None,
941
+ )
942
+ function_response_string = validate_function_response(
943
+ tool_execution_result.func_return,
944
+ return_char_limit=return_char_limit,
945
+ truncate=truncate,
946
+ )
947
+ self.last_function_response = package_function_response(
948
+ was_success=tool_execution_result.success_flag,
949
+ response_string=function_response_string,
950
+ timezone=agent_state.timezone,
951
+ )
952
+
953
+ # 4. Decide whether to keep stepping (focal section simplified)
954
+ continue_stepping, heartbeat_reason, stop_reason = self._decide_continuation(
955
+ agent_state=agent_state,
956
+ request_heartbeat=request_heartbeat,
957
+ tool_call_name=tool_call_name,
958
+ tool_rule_violated=tool_rule_violated,
959
+ tool_rules_solver=tool_rules_solver,
960
+ is_final_step=is_final_step,
961
+ )
962
+
963
+ # 5. Create messages (step was already created at the beginning)
964
+ tool_call_messages = create_letta_messages_from_llm_response(
965
+ agent_id=agent_state.id,
966
+ model=agent_state.llm_config.model,
967
+ function_name=tool_call_name,
968
+ function_arguments=tool_args,
969
+ tool_execution_result=tool_execution_result,
970
+ tool_call_id=tool_call_id,
971
+ function_call_success=tool_execution_result.success_flag,
972
+ function_response=function_response_string,
973
+ timezone=agent_state.timezone,
974
+ actor=self.actor,
975
+ continue_stepping=continue_stepping,
976
+ heartbeat_reason=heartbeat_reason,
977
+ reasoning_content=reasoning_content,
978
+ pre_computed_assistant_message_id=pre_computed_assistant_message_id,
979
+ step_id=step_id,
980
+ is_approval_response=is_approval or is_denial,
981
+ )
982
+ messages_to_persist = (initial_messages or []) + tool_call_messages
983
+
984
+ persisted_messages = await self.message_manager.create_many_messages_async(
985
+ messages_to_persist, actor=self.actor, project_id=agent_state.project_id, template_id=agent_state.template_id
986
+ )
987
+
988
+ if run_id:
989
+ await self.job_manager.add_messages_to_job_async(
990
+ job_id=run_id,
991
+ message_ids=[m.id for m in persisted_messages if m.role != "user"],
992
+ actor=self.actor,
993
+ )
994
+
995
+ return persisted_messages, continue_stepping, stop_reason
996
+
997
+ @trace_method
998
+ def _decide_continuation(
999
+ self,
1000
+ agent_state: AgentState,
1001
+ request_heartbeat: bool,
1002
+ tool_call_name: str,
1003
+ tool_rule_violated: bool,
1004
+ tool_rules_solver: ToolRulesSolver,
1005
+ is_final_step: bool | None,
1006
+ ) -> tuple[bool, str | None, LettaStopReason | None]:
1007
+ continue_stepping = request_heartbeat
1008
+ heartbeat_reason: str | None = None
1009
+ stop_reason: LettaStopReason | None = None
1010
+
1011
+ if tool_rule_violated:
1012
+ continue_stepping = True
1013
+ heartbeat_reason = f"{NON_USER_MSG_PREFIX}Continuing: tool rule violation."
1014
+ else:
1015
+ tool_rules_solver.register_tool_call(tool_call_name)
1016
+
1017
+ if tool_rules_solver.is_terminal_tool(tool_call_name):
1018
+ if continue_stepping:
1019
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.tool_rule.value)
1020
+ continue_stepping = False
1021
+
1022
+ elif tool_rules_solver.has_children_tools(tool_call_name):
1023
+ continue_stepping = True
1024
+ heartbeat_reason = f"{NON_USER_MSG_PREFIX}Continuing: child tool rule."
1025
+
1026
+ elif tool_rules_solver.is_continue_tool(tool_call_name):
1027
+ continue_stepping = True
1028
+ heartbeat_reason = f"{NON_USER_MSG_PREFIX}Continuing: continue tool rule."
1029
+
1030
+ # – hard stop overrides –
1031
+ if is_final_step:
1032
+ continue_stepping = False
1033
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.max_steps.value)
1034
+ else:
1035
+ uncalled = tool_rules_solver.get_uncalled_required_tools(available_tools=set([t.name for t in agent_state.tools]))
1036
+ if not continue_stepping and uncalled:
1037
+ continue_stepping = True
1038
+ heartbeat_reason = f"{NON_USER_MSG_PREFIX}Continuing, user expects these tools: [{', '.join(uncalled)}] to be called still."
1039
+
1040
+ stop_reason = None # reset – we’re still going
1041
+
1042
+ return continue_stepping, heartbeat_reason, stop_reason
1043
+
1044
+ @trace_method
1045
+ async def _execute_tool(
1046
+ self,
1047
+ tool_name: str,
1048
+ tool_args: JsonDict,
1049
+ agent_state: AgentState,
1050
+ agent_step_span: Span | None = None,
1051
+ step_id: str | None = None,
1052
+ ) -> "ToolExecutionResult":
1053
+ """
1054
+ Executes a tool and returns the ToolExecutionResult.
1055
+ """
1056
+ from letta.schemas.tool_execution_result import ToolExecutionResult
1057
+
1058
+ # Special memory case
1059
+ target_tool = next((x for x in agent_state.tools if x.name == tool_name), None)
1060
+ if not target_tool:
1061
+ # TODO: fix this error message
1062
+ return ToolExecutionResult(
1063
+ func_return=f"Tool {tool_name} not found",
1064
+ status="error",
1065
+ )
1066
+
1067
+ # TODO: This temp. Move this logic and code to executors
1068
+
1069
+ if agent_step_span:
1070
+ start_time = get_utc_timestamp_ns()
1071
+ agent_step_span.add_event(name="tool_execution_started")
1072
+
1073
+ sandbox_env_vars = {var.key: var.value for var in agent_state.tool_exec_environment_variables}
1074
+ tool_execution_manager = ToolExecutionManager(
1075
+ agent_state=agent_state,
1076
+ message_manager=self.message_manager,
1077
+ agent_manager=self.agent_manager,
1078
+ block_manager=self.block_manager,
1079
+ job_manager=self.job_manager,
1080
+ passage_manager=self.passage_manager,
1081
+ sandbox_env_vars=sandbox_env_vars,
1082
+ actor=self.actor,
1083
+ )
1084
+ # TODO: Integrate sandbox result
1085
+ log_event(name=f"start_{tool_name}_execution", attributes=tool_args)
1086
+ tool_execution_result = await tool_execution_manager.execute_tool_async(
1087
+ function_name=tool_name,
1088
+ function_args=tool_args,
1089
+ tool=target_tool,
1090
+ step_id=step_id,
1091
+ )
1092
+ if agent_step_span:
1093
+ end_time = get_utc_timestamp_ns()
1094
+ agent_step_span.add_event(
1095
+ name="tool_execution_completed",
1096
+ attributes={
1097
+ "tool_name": target_tool.name,
1098
+ "duration_ms": ns_to_ms(end_time - start_time),
1099
+ "success": tool_execution_result.success_flag,
1100
+ "tool_type": target_tool.tool_type,
1101
+ "tool_id": target_tool.id,
1102
+ },
1103
+ )
1104
+ log_event(name=f"finish_{tool_name}_execution", attributes=tool_execution_result.model_dump())
1105
+ return tool_execution_result
1106
+
1107
+ @trace_method
1108
+ async def summarize_conversation_history(
1109
+ self,
1110
+ in_context_messages: list[Message],
1111
+ new_letta_messages: list[Message],
1112
+ total_tokens: int | None = None,
1113
+ force: bool = False,
1114
+ ) -> list[Message]:
1115
+ # If total tokens is reached, we truncate down
1116
+ # TODO: This can be broken by bad configs, e.g. lower bound too high, initial messages too fat, etc.
1117
+ # TODO: `force` and `clear` seem to no longer be used, we should remove
1118
+ if force or (total_tokens and total_tokens > self.agent_state.llm_config.context_window):
1119
+ self.logger.warning(
1120
+ f"Total tokens {total_tokens} exceeds configured max tokens {self.agent_state.llm_config.context_window}, forcefully clearing message history."
1121
+ )
1122
+ new_in_context_messages, updated = await self.summarizer.summarize(
1123
+ in_context_messages=in_context_messages,
1124
+ new_letta_messages=new_letta_messages,
1125
+ force=True,
1126
+ clear=True,
1127
+ )
1128
+ else:
1129
+ # NOTE (Sarah): Seems like this is doing nothing?
1130
+ self.logger.info(
1131
+ f"Total tokens {total_tokens} does not exceed configured max tokens {self.agent_state.llm_config.context_window}, passing summarizing w/o force."
1132
+ )
1133
+ new_in_context_messages, updated = await self.summarizer.summarize(
1134
+ in_context_messages=in_context_messages,
1135
+ new_letta_messages=new_letta_messages,
1136
+ )
1137
+ message_ids = [m.id for m in new_in_context_messages]
1138
+ await self.agent_manager.update_message_ids_async(
1139
+ agent_id=self.agent_state.id,
1140
+ message_ids=message_ids,
1141
+ actor=self.actor,
1142
+ )
1143
+ self.agent_state.message_ids = message_ids
1144
+
1145
+ return new_in_context_messages
1146
+
1147
+ def _record_step_metrics(
1148
+ self,
1149
+ *,
1150
+ step_id: str,
1151
+ step_metrics: StepMetrics,
1152
+ run_id: str | None = None,
1153
+ ):
1154
+ task = safe_create_task(
1155
+ self.step_manager.record_step_metrics_async(
1156
+ actor=self.actor,
1157
+ step_id=step_id,
1158
+ llm_request_ns=step_metrics.llm_request_ns,
1159
+ tool_execution_ns=step_metrics.tool_execution_ns,
1160
+ step_ns=step_metrics.step_ns,
1161
+ agent_id=self.agent_state.id,
1162
+ job_id=run_id,
1163
+ project_id=self.agent_state.project_id,
1164
+ template_id=self.agent_state.template_id,
1165
+ base_template_id=self.agent_state.base_template_id,
1166
+ ),
1167
+ label="record_step_metrics",
1168
+ )
1169
+ return task
1170
+
1171
+ @trace_method
1172
+ async def _log_request(
1173
+ self,
1174
+ request_start_timestamp_ns: int,
1175
+ request_span: "Span | None",
1176
+ job_update_metadata: dict | None,
1177
+ is_error: bool,
1178
+ run_id: str | None = None,
1179
+ ):
1180
+ if request_start_timestamp_ns:
1181
+ now_ns, now = get_utc_timestamp_ns(), get_utc_time()
1182
+ duration_ns = now_ns - request_start_timestamp_ns
1183
+ if request_span:
1184
+ request_span.add_event(name="letta_request_ms", attributes={"duration_ms": ns_to_ms(duration_ns)})
1185
+ await self._update_agent_last_run_metrics(now, ns_to_ms(duration_ns))
1186
+ if settings.track_agent_run and run_id:
1187
+ await self.job_manager.record_response_duration(run_id, duration_ns, self.actor)
1188
+ await self.job_manager.safe_update_job_status_async(
1189
+ job_id=run_id,
1190
+ new_status=JobStatus.failed if is_error else JobStatus.completed,
1191
+ actor=self.actor,
1192
+ metadata=job_update_metadata,
1193
+ )
1194
+ if request_span:
1195
+ request_span.end()
1196
+
1197
+ @trace_method
1198
+ async def _update_agent_last_run_metrics(self, completion_time: datetime, duration_ms: float) -> None:
1199
+ if not settings.track_last_agent_run:
1200
+ return
1201
+ try:
1202
+ await self.agent_manager.update_agent_async(
1203
+ agent_id=self.agent_state.id,
1204
+ agent_update=UpdateAgent(last_run_completion=completion_time, last_run_duration_ms=duration_ms),
1205
+ actor=self.actor,
1206
+ )
1207
+ except Exception as e:
1208
+ self.logger.error(f"Failed to update agent's last run metrics: {e}")
1209
+
1210
+ def get_finish_chunks_for_stream(
1211
+ self,
1212
+ usage: LettaUsageStatistics,
1213
+ stop_reason: LettaStopReason | None = None,
1214
+ ):
1215
+ if stop_reason is None:
1216
+ stop_reason = LettaStopReason(stop_reason=StopReasonType.end_turn.value)
1217
+ return [
1218
+ stop_reason.model_dump_json(),
1219
+ usage.model_dump_json(),
1220
+ MessageStreamStatus.done.value,
1221
+ ]