letta-nightly 0.11.7.dev20250908104137__py3-none-any.whl → 0.11.7.dev20250910104051__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. letta/adapters/letta_llm_adapter.py +81 -0
  2. letta/adapters/letta_llm_request_adapter.py +111 -0
  3. letta/adapters/letta_llm_stream_adapter.py +169 -0
  4. letta/agents/base_agent.py +4 -1
  5. letta/agents/base_agent_v2.py +68 -0
  6. letta/agents/helpers.py +3 -5
  7. letta/agents/letta_agent.py +23 -12
  8. letta/agents/letta_agent_v2.py +1220 -0
  9. letta/agents/voice_agent.py +2 -1
  10. letta/constants.py +1 -1
  11. letta/errors.py +12 -0
  12. letta/functions/function_sets/base.py +53 -12
  13. letta/functions/schema_generator.py +1 -1
  14. letta/groups/sleeptime_multi_agent_v3.py +231 -0
  15. letta/helpers/tool_rule_solver.py +4 -0
  16. letta/helpers/tpuf_client.py +607 -34
  17. letta/interfaces/anthropic_streaming_interface.py +64 -24
  18. letta/interfaces/openai_streaming_interface.py +80 -37
  19. letta/llm_api/openai_client.py +45 -4
  20. letta/orm/block.py +1 -0
  21. letta/orm/group.py +1 -0
  22. letta/orm/source.py +8 -1
  23. letta/orm/step_metrics.py +10 -0
  24. letta/schemas/block.py +4 -0
  25. letta/schemas/enums.py +1 -0
  26. letta/schemas/group.py +8 -0
  27. letta/schemas/letta_message.py +1 -1
  28. letta/schemas/letta_request.py +2 -2
  29. letta/schemas/mcp.py +9 -1
  30. letta/schemas/message.py +23 -0
  31. letta/schemas/providers/ollama.py +1 -1
  32. letta/schemas/providers.py +1 -2
  33. letta/schemas/source.py +6 -0
  34. letta/schemas/step_metrics.py +2 -0
  35. letta/server/rest_api/routers/v1/__init__.py +2 -0
  36. letta/server/rest_api/routers/v1/agents.py +100 -5
  37. letta/server/rest_api/routers/v1/blocks.py +6 -0
  38. letta/server/rest_api/routers/v1/folders.py +23 -5
  39. letta/server/rest_api/routers/v1/groups.py +6 -0
  40. letta/server/rest_api/routers/v1/internal_templates.py +218 -12
  41. letta/server/rest_api/routers/v1/messages.py +14 -19
  42. letta/server/rest_api/routers/v1/runs.py +43 -28
  43. letta/server/rest_api/routers/v1/sources.py +23 -5
  44. letta/server/rest_api/routers/v1/tools.py +42 -0
  45. letta/server/rest_api/streaming_response.py +9 -1
  46. letta/server/server.py +2 -1
  47. letta/services/agent_manager.py +39 -59
  48. letta/services/agent_serialization_manager.py +22 -8
  49. letta/services/archive_manager.py +60 -9
  50. letta/services/block_manager.py +5 -0
  51. letta/services/file_processor/embedder/base_embedder.py +5 -0
  52. letta/services/file_processor/embedder/openai_embedder.py +4 -0
  53. letta/services/file_processor/embedder/pinecone_embedder.py +5 -1
  54. letta/services/file_processor/embedder/turbopuffer_embedder.py +71 -0
  55. letta/services/file_processor/file_processor.py +9 -7
  56. letta/services/group_manager.py +74 -11
  57. letta/services/mcp_manager.py +132 -26
  58. letta/services/message_manager.py +229 -125
  59. letta/services/passage_manager.py +2 -1
  60. letta/services/source_manager.py +23 -1
  61. letta/services/summarizer/summarizer.py +2 -0
  62. letta/services/tool_executor/core_tool_executor.py +2 -120
  63. letta/services/tool_executor/files_tool_executor.py +133 -8
  64. letta/settings.py +6 -0
  65. letta/utils.py +34 -1
  66. {letta_nightly-0.11.7.dev20250908104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/METADATA +2 -2
  67. {letta_nightly-0.11.7.dev20250908104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/RECORD +70 -63
  68. {letta_nightly-0.11.7.dev20250908104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/WHEEL +0 -0
  69. {letta_nightly-0.11.7.dev20250908104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/entry_points.txt +0 -0
  70. {letta_nightly-0.11.7.dev20250908104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,81 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import AsyncGenerator
3
+
4
+ from letta.llm_api.llm_client_base import LLMClientBase
5
+ from letta.schemas.letta_message import LettaMessage
6
+ from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
7
+ from letta.schemas.llm_config import LLMConfig
8
+ from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, ToolCall
9
+ from letta.schemas.usage import LettaUsageStatistics
10
+ from letta.schemas.user import User
11
+ from letta.services.telemetry_manager import TelemetryManager
12
+
13
+
14
+ class LettaLLMAdapter(ABC):
15
+ """
16
+ Base adapter for handling LLM calls in a unified way.
17
+
18
+ This abstract class defines the interface for both blocking and streaming
19
+ LLM interactions, allowing the agent to use different execution modes
20
+ through a consistent API.
21
+ """
22
+
23
+ def __init__(self, llm_client: LLMClientBase, llm_config: LLMConfig) -> None:
24
+ self.llm_client: LLMClientBase = llm_client
25
+ self.llm_config: LLMConfig = llm_config
26
+ self.message_id: str | None = None
27
+ self.request_data: dict | None = None
28
+ self.response_data: dict | None = None
29
+ self.chat_completions_response: ChatCompletionResponse | None = None
30
+ self.reasoning_content: list[TextContent | ReasoningContent | RedactedReasoningContent] | None = None
31
+ self.tool_call: ToolCall | None = None
32
+ self.usage: LettaUsageStatistics = LettaUsageStatistics()
33
+ self.telemetry_manager: TelemetryManager = TelemetryManager()
34
+ self.llm_request_finish_timestamp_ns: int | None = None
35
+
36
+ @abstractmethod
37
+ async def invoke_llm(
38
+ self,
39
+ request_data: dict,
40
+ messages: list,
41
+ tools: list,
42
+ use_assistant_message: bool,
43
+ requires_approval_tools: list[str] = [],
44
+ step_id: str | None = None,
45
+ actor: User | None = None,
46
+ ) -> AsyncGenerator[LettaMessage | None, None]:
47
+ """
48
+ Execute the LLM call and yield results as they become available.
49
+
50
+ Args:
51
+ request_data: The prepared request data for the LLM API
52
+ messages: The messages in context for the request
53
+ tools: The tools available for the LLM to use
54
+ use_assistant_message: If true, use assistant messages when streaming response
55
+ requires_approval_tools: The subset of tools that require approval before use
56
+ step_id: The step ID associated with this request. If provided, logs request and response data.
57
+ actor: The optional actor associated with this request for logging purposes.
58
+
59
+ Yields:
60
+ LettaMessage: Chunks of data for streaming adapters, or None for blocking adapters
61
+ """
62
+ raise NotImplementedError
63
+
64
+ def supports_token_streaming(self) -> bool:
65
+ """
66
+ Check if the adapter supports token-level streaming.
67
+
68
+ Returns:
69
+ bool: True if the adapter can stream back tokens as they are generated, False otherwise
70
+ """
71
+ return False
72
+
73
+ def log_provider_trace(self, step_id: str | None, actor: User | None) -> None:
74
+ """
75
+ Log provider trace data for telemetry purposes.
76
+
77
+ Args:
78
+ step_id: The step ID associated with this request for logging purposes
79
+ actor: The user associated with this request for logging purposes
80
+ """
81
+ raise NotImplementedError
@@ -0,0 +1,111 @@
1
+ import asyncio
2
+ from typing import AsyncGenerator
3
+
4
+ from letta.adapters.letta_llm_adapter import LettaLLMAdapter
5
+ from letta.helpers.datetime_helpers import get_utc_timestamp_ns
6
+ from letta.schemas.letta_message import LettaMessage
7
+ from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, TextContent
8
+ from letta.schemas.provider_trace import ProviderTraceCreate
9
+ from letta.schemas.user import User
10
+ from letta.settings import settings
11
+
12
+
13
+ class LettaLLMRequestAdapter(LettaLLMAdapter):
14
+ """
15
+ Adapter for handling blocking (non-streaming) LLM requests.
16
+
17
+ This adapter makes synchronous requests to the LLM and returns complete
18
+ responses. It extracts reasoning content, tool calls, and usage statistics
19
+ from the response and updates instance variables for access by the agent.
20
+ """
21
+
22
+ async def invoke_llm(
23
+ self,
24
+ request_data: dict,
25
+ messages: list,
26
+ tools: list,
27
+ use_assistant_message: bool,
28
+ requires_approval_tools: list[str] = [],
29
+ step_id: str | None = None,
30
+ actor: str | None = None,
31
+ ) -> AsyncGenerator[LettaMessage | None, None]:
32
+ """
33
+ Execute a blocking LLM request and yield the response.
34
+
35
+ This adapter:
36
+ 1. Makes a blocking request to the LLM
37
+ 2. Converts the response to chat completion format
38
+ 3. Extracts reasoning and tool call information
39
+ 4. Updates all instance variables
40
+ 5. Yields nothing (blocking mode doesn't stream)
41
+ """
42
+ # Store request data
43
+ self.request_data = request_data
44
+
45
+ # Make the blocking LLM request
46
+ self.response_data = await self.llm_client.request_async(request_data, self.llm_config)
47
+ self.llm_request_finish_timestamp_ns = get_utc_timestamp_ns()
48
+
49
+ # Convert response to chat completion format
50
+ self.chat_completions_response = self.llm_client.convert_response_to_chat_completion(self.response_data, messages, self.llm_config)
51
+
52
+ # Extract reasoning content from the response
53
+ if self.chat_completions_response.choices[0].message.reasoning_content:
54
+ self.reasoning_content = [
55
+ ReasoningContent(
56
+ reasoning=self.chat_completions_response.choices[0].message.reasoning_content,
57
+ is_native=True,
58
+ signature=self.chat_completions_response.choices[0].message.reasoning_content_signature,
59
+ )
60
+ ]
61
+ elif self.chat_completions_response.choices[0].message.omitted_reasoning_content:
62
+ self.reasoning_content = [OmittedReasoningContent()]
63
+ elif self.chat_completions_response.choices[0].message.content:
64
+ # Reasoning placed into content for legacy reasons
65
+ self.reasoning_content = [TextContent(text=self.chat_completions_response.choices[0].message.content)]
66
+ else:
67
+ # logger.info("No reasoning content found.")
68
+ self.reasoning_content = None
69
+
70
+ # Extract tool call
71
+ if self.chat_completions_response.choices[0].message.tool_calls:
72
+ self.tool_call = self.chat_completions_response.choices[0].message.tool_calls[0]
73
+ else:
74
+ self.tool_call = None
75
+
76
+ # Extract usage statistics
77
+ self.usage.step_count = 1
78
+ self.usage.completion_tokens = self.chat_completions_response.usage.completion_tokens
79
+ self.usage.prompt_tokens = self.chat_completions_response.usage.prompt_tokens
80
+ self.usage.total_tokens = self.chat_completions_response.usage.total_tokens
81
+
82
+ self.log_provider_trace(step_id=step_id, actor=actor)
83
+
84
+ yield None
85
+ return
86
+
87
+ def log_provider_trace(self, step_id: str | None, actor: User | None) -> None:
88
+ """
89
+ Log provider trace data for telemetry purposes in a fire-and-forget manner.
90
+
91
+ Creates an async task to log the request/response data without blocking
92
+ the main execution flow. The task runs in the background.
93
+
94
+ Args:
95
+ step_id: The step ID associated with this request for logging purposes
96
+ actor: The user associated with this request for logging purposes
97
+ """
98
+ if step_id is None or actor is None or not settings.track_provider_trace:
99
+ return
100
+
101
+ asyncio.create_task(
102
+ self.telemetry_manager.create_provider_trace_async(
103
+ actor=actor,
104
+ provider_trace_create=ProviderTraceCreate(
105
+ request_json=self.request_data,
106
+ response_json=self.response_data,
107
+ step_id=step_id, # Use original step_id for telemetry
108
+ organization_id=actor.organization_id,
109
+ ),
110
+ )
111
+ )
@@ -0,0 +1,169 @@
1
+ import asyncio
2
+ from typing import AsyncGenerator
3
+
4
+ from letta.adapters.letta_llm_adapter import LettaLLMAdapter
5
+ from letta.helpers.datetime_helpers import get_utc_timestamp_ns
6
+ from letta.interfaces.anthropic_streaming_interface import AnthropicStreamingInterface
7
+ from letta.interfaces.openai_streaming_interface import OpenAIStreamingInterface
8
+ from letta.llm_api.llm_client_base import LLMClientBase
9
+ from letta.schemas.enums import ProviderType
10
+ from letta.schemas.letta_message import LettaMessage
11
+ from letta.schemas.llm_config import LLMConfig
12
+ from letta.schemas.provider_trace import ProviderTraceCreate
13
+ from letta.schemas.usage import LettaUsageStatistics
14
+ from letta.schemas.user import User
15
+ from letta.settings import settings
16
+
17
+
18
+ class LettaLLMStreamAdapter(LettaLLMAdapter):
19
+ """
20
+ Adapter for handling streaming LLM requests with immediate token yielding.
21
+
22
+ This adapter supports real-time streaming of tokens from the LLM, providing
23
+ minimal time-to-first-token (TTFT) latency. It uses specialized streaming
24
+ interfaces for different providers (OpenAI, Anthropic) to handle their
25
+ specific streaming formats.
26
+ """
27
+
28
+ def __init__(self, llm_client: LLMClientBase, llm_config: LLMConfig) -> None:
29
+ super().__init__(llm_client, llm_config)
30
+ self.interface: OpenAIStreamingInterface | AnthropicStreamingInterface | None = None
31
+
32
+ async def invoke_llm(
33
+ self,
34
+ request_data: dict,
35
+ messages: list,
36
+ tools: list,
37
+ use_assistant_message: bool,
38
+ requires_approval_tools: list[str] = [],
39
+ step_id: str | None = None,
40
+ actor: User | None = None,
41
+ ) -> AsyncGenerator[LettaMessage, None]:
42
+ """
43
+ Execute a streaming LLM request and yield tokens/chunks as they arrive.
44
+
45
+ This adapter:
46
+ 1. Makes a streaming request to the LLM
47
+ 2. Yields chunks immediately for minimal TTFT
48
+ 3. Accumulates response data through the streaming interface
49
+ 4. Updates all instance variables after streaming completes
50
+ """
51
+ # Store request data
52
+ self.request_data = request_data
53
+
54
+ # Instantiate streaming interface
55
+ if self.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock]:
56
+ self.interface = AnthropicStreamingInterface(
57
+ use_assistant_message=use_assistant_message,
58
+ put_inner_thoughts_in_kwarg=self.llm_config.put_inner_thoughts_in_kwargs,
59
+ requires_approval_tools=requires_approval_tools,
60
+ )
61
+ elif self.llm_config.model_endpoint_type == ProviderType.openai:
62
+ self.interface = OpenAIStreamingInterface(
63
+ use_assistant_message=use_assistant_message,
64
+ is_openai_proxy=self.llm_config.provider_name == "lmstudio_openai",
65
+ put_inner_thoughts_in_kwarg=self.llm_config.put_inner_thoughts_in_kwargs,
66
+ messages=messages,
67
+ tools=tools,
68
+ requires_approval_tools=requires_approval_tools,
69
+ )
70
+ else:
71
+ raise ValueError(f"Streaming not supported for provider {self.llm_config.model_endpoint_type}")
72
+
73
+ # Extract optional parameters
74
+ # ttft_span = kwargs.get('ttft_span', None)
75
+
76
+ # Start the streaming request
77
+ stream = await self.llm_client.stream_async(request_data, self.llm_config)
78
+
79
+ # Process the stream and yield chunks immediately for TTFT
80
+ async for chunk in self.interface.process(stream): # TODO: add ttft span
81
+ # Yield each chunk immediately as it arrives
82
+ yield chunk
83
+
84
+ # After streaming completes, extract the accumulated data
85
+ self.llm_request_finish_timestamp_ns = get_utc_timestamp_ns()
86
+
87
+ # Extract tool call from the interface
88
+ try:
89
+ self.tool_call = self.interface.get_tool_call_object()
90
+ except ValueError as e:
91
+ # No tool call, handle upstream
92
+ self.tool_call = None
93
+
94
+ # Extract reasoning content from the interface
95
+ self.reasoning_content = self.interface.get_reasoning_content()
96
+
97
+ # Extract usage statistics
98
+ # Some providers don't provide usage in streaming, use fallback if needed
99
+ if hasattr(self.interface, "input_tokens") and hasattr(self.interface, "output_tokens"):
100
+ # Handle cases where tokens might not be set (e.g., LMStudio)
101
+ input_tokens = self.interface.input_tokens
102
+ output_tokens = self.interface.output_tokens
103
+
104
+ # Fallback to estimated values if not provided
105
+ if not input_tokens and hasattr(self.interface, "fallback_input_tokens"):
106
+ input_tokens = self.interface.fallback_input_tokens
107
+ if not output_tokens and hasattr(self.interface, "fallback_output_tokens"):
108
+ output_tokens = self.interface.fallback_output_tokens
109
+
110
+ self.usage = LettaUsageStatistics(
111
+ step_count=1,
112
+ completion_tokens=output_tokens or 0,
113
+ prompt_tokens=input_tokens or 0,
114
+ total_tokens=(input_tokens or 0) + (output_tokens or 0),
115
+ )
116
+ else:
117
+ # Default usage statistics if not available
118
+ self.usage = LettaUsageStatistics(step_count=1, completion_tokens=0, prompt_tokens=0, total_tokens=0)
119
+
120
+ # Store any additional data from the interface
121
+ self.message_id = self.interface.letta_message_id
122
+
123
+ # Log request and response data
124
+ self.log_provider_trace(step_id=step_id, actor=actor)
125
+
126
+ def supports_token_streaming(self) -> bool:
127
+ return True
128
+
129
+ def log_provider_trace(self, step_id: str | None, actor: User | None) -> None:
130
+ """
131
+ Log provider trace data for telemetry purposes in a fire-and-forget manner.
132
+
133
+ Creates an async task to log the request/response data without blocking
134
+ the main execution flow. For streaming adapters, this includes the final
135
+ tool call and reasoning content collected during streaming.
136
+
137
+ Args:
138
+ step_id: The step ID associated with this request for logging purposes
139
+ actor: The user associated with this request for logging purposes
140
+ """
141
+ if step_id is None or actor is None or not settings.track_provider_trace:
142
+ return
143
+
144
+ asyncio.create_task(
145
+ self.telemetry_manager.create_provider_trace_async(
146
+ actor=actor,
147
+ provider_trace_create=ProviderTraceCreate(
148
+ request_json=self.request_data,
149
+ response_json={
150
+ "content": {
151
+ "tool_call": self.tool_call.model_dump_json(),
152
+ "reasoning": [content.model_dump_json() for content in self.reasoning_content],
153
+ },
154
+ "id": self.interface.message_id,
155
+ "model": self.interface.model,
156
+ "role": "assistant",
157
+ # "stop_reason": "",
158
+ # "stop_sequence": None,
159
+ "type": "message",
160
+ "usage": {
161
+ "input_tokens": self.usage.prompt_tokens,
162
+ "output_tokens": self.usage.completion_tokens,
163
+ },
164
+ },
165
+ step_id=step_id, # Use original step_id for telemetry
166
+ organization_id=actor.organization_id,
167
+ ),
168
+ )
169
+ )
@@ -175,7 +175,10 @@ class BaseAgent(ABC):
175
175
 
176
176
  # [DB Call] Update Messages
177
177
  new_system_message = await self.message_manager.update_message_by_id_async(
178
- curr_system_message.id, message_update=MessageUpdate(content=new_system_message_str), actor=self.actor
178
+ curr_system_message.id,
179
+ message_update=MessageUpdate(content=new_system_message_str),
180
+ actor=self.actor,
181
+ project_id=agent_state.project_id,
179
182
  )
180
183
  return [new_system_message] + in_context_messages[1:]
181
184
 
@@ -0,0 +1,68 @@
1
+ from abc import ABC, abstractmethod
2
+ from typing import AsyncGenerator
3
+
4
+ from letta.constants import DEFAULT_MAX_STEPS
5
+ from letta.log import get_logger
6
+ from letta.schemas.agent import AgentState
7
+ from letta.schemas.enums import MessageStreamStatus
8
+ from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage, MessageType
9
+ from letta.schemas.letta_response import LettaResponse
10
+ from letta.schemas.message import MessageCreate
11
+ from letta.schemas.user import User
12
+
13
+
14
+ class BaseAgentV2(ABC):
15
+ """
16
+ Abstract base class for the main agent execution loop for letta agents, handling
17
+ message management, llm api request, tool execution, and context tracking.
18
+ """
19
+
20
+ def __init__(self, agent_state: AgentState, actor: User):
21
+ self.agent_state = agent_state
22
+ self.actor = actor
23
+ self.logger = get_logger(agent_state.id)
24
+
25
+ @abstractmethod
26
+ async def build_request(
27
+ self,
28
+ input_messages: list[MessageCreate],
29
+ ) -> dict:
30
+ """
31
+ Execute the agent loop in dry_run mode, returning just the generated request
32
+ payload sent to the underlying llm provider.
33
+ """
34
+ raise NotImplementedError
35
+
36
+ @abstractmethod
37
+ async def step(
38
+ self,
39
+ input_messages: list[MessageCreate],
40
+ max_steps: int = DEFAULT_MAX_STEPS,
41
+ run_id: str | None = None,
42
+ use_assistant_message: bool = True,
43
+ include_return_message_types: list[MessageType] | None = None,
44
+ request_start_timestamp_ns: int | None = None,
45
+ ) -> LettaResponse:
46
+ """
47
+ Execute the agent loop in blocking mode, returning all messages at once.
48
+ """
49
+ raise NotImplementedError
50
+
51
+ @abstractmethod
52
+ async def stream(
53
+ self,
54
+ input_messages: list[MessageCreate],
55
+ max_steps: int = DEFAULT_MAX_STEPS,
56
+ stream_tokens: bool = False,
57
+ run_id: str | None = None,
58
+ use_assistant_message: bool = True,
59
+ include_return_message_types: list[MessageType] | None = None,
60
+ request_start_timestamp_ns: int | None = None,
61
+ ) -> AsyncGenerator[LettaMessage | LegacyLettaMessage | MessageStreamStatus, None]:
62
+ """
63
+ Execute the agent loop in streaming mode, yielding chunks as they become available.
64
+ If stream_tokens is True, individual tokens are streamed as they arrive from the LLM,
65
+ providing the lowest latency experience, otherwise each complete step (reasoning +
66
+ tool call + tool return) is yielded as it completes.
67
+ """
68
+ raise NotImplementedError
letta/agents/helpers.py CHANGED
@@ -3,6 +3,7 @@ import uuid
3
3
  import xml.etree.ElementTree as ET
4
4
  from typing import List, Optional, Tuple
5
5
 
6
+ from letta.errors import PendingApprovalError
6
7
  from letta.helpers import ToolRulesSolver
7
8
  from letta.log import get_logger
8
9
  from letta.schemas.agent import AgentState
@@ -117,7 +118,7 @@ async def _prepare_in_context_messages_async(
117
118
  new_in_context_messages = await message_manager.create_many_messages_async(
118
119
  create_input_messages(input_messages=input_messages, agent_id=agent_state.id, timezone=agent_state.timezone, actor=actor),
119
120
  actor=actor,
120
- embedding_config=agent_state.embedding_config,
121
+ project_id=agent_state.project_id,
121
122
  )
122
123
 
123
124
  return current_in_context_messages, new_in_context_messages
@@ -168,10 +169,7 @@ async def _prepare_in_context_messages_no_persist_async(
168
169
  else:
169
170
  # User is trying to send a regular message
170
171
  if current_in_context_messages[-1].role == "approval":
171
- raise ValueError(
172
- "Cannot send a new message: The agent is waiting for approval on a tool call. "
173
- "Please approve or deny the pending request before continuing."
174
- )
172
+ raise PendingApprovalError(pending_request_id=current_in_context_messages[-1].id)
175
173
 
176
174
  # Create a new user message from the input but dont store it yet
177
175
  new_in_context_messages = create_input_messages(
@@ -495,7 +495,10 @@ class LettaAgent(BaseAgent):
495
495
  message.is_err = True
496
496
  message.step_id = effective_step_id
497
497
  await self.message_manager.create_many_messages_async(
498
- initial_messages, actor=self.actor, embedding_config=agent_state.embedding_config
498
+ initial_messages,
499
+ actor=self.actor,
500
+ project_id=agent_state.project_id,
501
+ template_id=agent_state.template_id,
499
502
  )
500
503
  elif step_progression <= StepProgression.LOGGED_TRACE:
501
504
  if stop_reason is None:
@@ -823,7 +826,10 @@ class LettaAgent(BaseAgent):
823
826
  message.is_err = True
824
827
  message.step_id = effective_step_id
825
828
  await self.message_manager.create_many_messages_async(
826
- initial_messages, actor=self.actor, embedding_config=agent_state.embedding_config
829
+ initial_messages,
830
+ actor=self.actor,
831
+ project_id=agent_state.project_id,
832
+ template_id=agent_state.template_id,
827
833
  )
828
834
  elif step_progression <= StepProgression.LOGGED_TRACE:
829
835
  if stop_reason is None:
@@ -1018,6 +1024,7 @@ class LettaAgent(BaseAgent):
1018
1024
  interface = AnthropicStreamingInterface(
1019
1025
  use_assistant_message=use_assistant_message,
1020
1026
  put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
1027
+ requires_approval_tools=tool_rules_solver.get_requires_approval_tools(valid_tool_names),
1021
1028
  )
1022
1029
  elif agent_state.llm_config.model_endpoint_type == ProviderType.openai:
1023
1030
  interface = OpenAIStreamingInterface(
@@ -1026,6 +1033,7 @@ class LettaAgent(BaseAgent):
1026
1033
  messages=current_in_context_messages + new_in_context_messages,
1027
1034
  tools=request_data.get("tools", []),
1028
1035
  put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
1036
+ requires_approval_tools=tool_rules_solver.get_requires_approval_tools(valid_tool_names),
1029
1037
  )
1030
1038
  else:
1031
1039
  raise ValueError(f"Streaming not supported for {agent_state.llm_config}")
@@ -1170,12 +1178,13 @@ class LettaAgent(BaseAgent):
1170
1178
  )
1171
1179
  step_progression = StepProgression.LOGGED_TRACE
1172
1180
 
1173
- # yields tool response as this is handled from Letta and not the response from the LLM provider
1174
- tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
1175
- if not (use_assistant_message and tool_return.name == "send_message"):
1176
- # Apply message type filtering if specified
1177
- if include_return_message_types is None or tool_return.message_type in include_return_message_types:
1178
- yield f"data: {tool_return.model_dump_json()}\n\n"
1181
+ if persisted_messages[-1].role != "approval":
1182
+ # yields tool response as this is handled from Letta and not the response from the LLM provider
1183
+ tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
1184
+ if not (use_assistant_message and tool_return.name == "send_message"):
1185
+ # Apply message type filtering if specified
1186
+ if include_return_message_types is None or tool_return.message_type in include_return_message_types:
1187
+ yield f"data: {tool_return.model_dump_json()}\n\n"
1179
1188
 
1180
1189
  # TODO (cliandy): consolidate and expand with trace
1181
1190
  MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
@@ -1259,7 +1268,10 @@ class LettaAgent(BaseAgent):
1259
1268
  message.is_err = True
1260
1269
  message.step_id = effective_step_id
1261
1270
  await self.message_manager.create_many_messages_async(
1262
- initial_messages, actor=self.actor, embedding_config=agent_state.embedding_config
1271
+ initial_messages,
1272
+ actor=self.actor,
1273
+ project_id=agent_state.project_id,
1274
+ template_id=agent_state.template_id,
1263
1275
  )
1264
1276
  elif step_progression <= StepProgression.LOGGED_TRACE:
1265
1277
  if stop_reason is None:
@@ -1667,7 +1679,7 @@ class LettaAgent(BaseAgent):
1667
1679
  )
1668
1680
  messages_to_persist = (initial_messages or []) + tool_call_messages
1669
1681
  persisted_messages = await self.message_manager.create_many_messages_async(
1670
- messages_to_persist, actor=self.actor, embedding_config=agent_state.embedding_config
1682
+ messages_to_persist, actor=self.actor, project_id=agent_state.project_id, template_id=agent_state.template_id
1671
1683
  )
1672
1684
  return persisted_messages, continue_stepping, stop_reason
1673
1685
 
@@ -1686,7 +1698,6 @@ class LettaAgent(BaseAgent):
1686
1698
  tool_call_id=tool_call_id,
1687
1699
  request_heartbeat=request_heartbeat,
1688
1700
  )
1689
-
1690
1701
  if not is_approval and tool_rules_solver.is_requires_approval_tool(tool_call_name):
1691
1702
  approval_message = create_approval_request_message_from_llm_response(
1692
1703
  agent_id=agent_state.id,
@@ -1779,7 +1790,7 @@ class LettaAgent(BaseAgent):
1779
1790
  messages_to_persist = (initial_messages or []) + tool_call_messages
1780
1791
 
1781
1792
  persisted_messages = await self.message_manager.create_many_messages_async(
1782
- messages_to_persist, actor=self.actor, embedding_config=agent_state.embedding_config
1793
+ messages_to_persist, actor=self.actor, project_id=agent_state.project_id, template_id=agent_state.template_id
1783
1794
  )
1784
1795
 
1785
1796
  if run_id: