letta-nightly 0.11.7.dev20250909104137__py3-none-any.whl → 0.11.7.dev20250910104051__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/adapters/letta_llm_adapter.py +81 -0
- letta/adapters/letta_llm_request_adapter.py +111 -0
- letta/adapters/letta_llm_stream_adapter.py +169 -0
- letta/agents/base_agent.py +4 -1
- letta/agents/base_agent_v2.py +68 -0
- letta/agents/helpers.py +3 -5
- letta/agents/letta_agent.py +23 -12
- letta/agents/letta_agent_v2.py +1220 -0
- letta/agents/voice_agent.py +2 -1
- letta/constants.py +1 -1
- letta/errors.py +12 -0
- letta/functions/function_sets/base.py +53 -12
- letta/functions/schema_generator.py +1 -1
- letta/groups/sleeptime_multi_agent_v3.py +231 -0
- letta/helpers/tool_rule_solver.py +4 -0
- letta/helpers/tpuf_client.py +607 -34
- letta/interfaces/anthropic_streaming_interface.py +64 -24
- letta/interfaces/openai_streaming_interface.py +80 -37
- letta/llm_api/openai_client.py +45 -4
- letta/orm/block.py +1 -0
- letta/orm/group.py +1 -0
- letta/orm/source.py +8 -1
- letta/orm/step_metrics.py +10 -0
- letta/schemas/block.py +4 -0
- letta/schemas/enums.py +1 -0
- letta/schemas/group.py +8 -0
- letta/schemas/letta_message.py +1 -1
- letta/schemas/letta_request.py +2 -2
- letta/schemas/mcp.py +9 -1
- letta/schemas/message.py +23 -0
- letta/schemas/providers/ollama.py +1 -1
- letta/schemas/providers.py +1 -2
- letta/schemas/source.py +6 -0
- letta/schemas/step_metrics.py +2 -0
- letta/server/rest_api/routers/v1/__init__.py +2 -0
- letta/server/rest_api/routers/v1/agents.py +100 -5
- letta/server/rest_api/routers/v1/blocks.py +6 -0
- letta/server/rest_api/routers/v1/folders.py +23 -5
- letta/server/rest_api/routers/v1/groups.py +6 -0
- letta/server/rest_api/routers/v1/internal_templates.py +218 -12
- letta/server/rest_api/routers/v1/messages.py +14 -19
- letta/server/rest_api/routers/v1/runs.py +43 -28
- letta/server/rest_api/routers/v1/sources.py +23 -5
- letta/server/rest_api/routers/v1/tools.py +42 -0
- letta/server/rest_api/streaming_response.py +9 -1
- letta/server/server.py +2 -1
- letta/services/agent_manager.py +39 -59
- letta/services/agent_serialization_manager.py +22 -8
- letta/services/archive_manager.py +60 -9
- letta/services/block_manager.py +5 -0
- letta/services/file_processor/embedder/base_embedder.py +5 -0
- letta/services/file_processor/embedder/openai_embedder.py +4 -0
- letta/services/file_processor/embedder/pinecone_embedder.py +5 -1
- letta/services/file_processor/embedder/turbopuffer_embedder.py +71 -0
- letta/services/file_processor/file_processor.py +9 -7
- letta/services/group_manager.py +74 -11
- letta/services/mcp_manager.py +132 -26
- letta/services/message_manager.py +229 -125
- letta/services/passage_manager.py +2 -1
- letta/services/source_manager.py +23 -1
- letta/services/summarizer/summarizer.py +2 -0
- letta/services/tool_executor/core_tool_executor.py +2 -120
- letta/services/tool_executor/files_tool_executor.py +133 -8
- letta/settings.py +6 -0
- letta/utils.py +34 -1
- {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/METADATA +2 -2
- {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/RECORD +70 -63
- {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250910104051.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,81 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import AsyncGenerator
|
3
|
+
|
4
|
+
from letta.llm_api.llm_client_base import LLMClientBase
|
5
|
+
from letta.schemas.letta_message import LettaMessage
|
6
|
+
from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
|
7
|
+
from letta.schemas.llm_config import LLMConfig
|
8
|
+
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, ToolCall
|
9
|
+
from letta.schemas.usage import LettaUsageStatistics
|
10
|
+
from letta.schemas.user import User
|
11
|
+
from letta.services.telemetry_manager import TelemetryManager
|
12
|
+
|
13
|
+
|
14
|
+
class LettaLLMAdapter(ABC):
|
15
|
+
"""
|
16
|
+
Base adapter for handling LLM calls in a unified way.
|
17
|
+
|
18
|
+
This abstract class defines the interface for both blocking and streaming
|
19
|
+
LLM interactions, allowing the agent to use different execution modes
|
20
|
+
through a consistent API.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def __init__(self, llm_client: LLMClientBase, llm_config: LLMConfig) -> None:
|
24
|
+
self.llm_client: LLMClientBase = llm_client
|
25
|
+
self.llm_config: LLMConfig = llm_config
|
26
|
+
self.message_id: str | None = None
|
27
|
+
self.request_data: dict | None = None
|
28
|
+
self.response_data: dict | None = None
|
29
|
+
self.chat_completions_response: ChatCompletionResponse | None = None
|
30
|
+
self.reasoning_content: list[TextContent | ReasoningContent | RedactedReasoningContent] | None = None
|
31
|
+
self.tool_call: ToolCall | None = None
|
32
|
+
self.usage: LettaUsageStatistics = LettaUsageStatistics()
|
33
|
+
self.telemetry_manager: TelemetryManager = TelemetryManager()
|
34
|
+
self.llm_request_finish_timestamp_ns: int | None = None
|
35
|
+
|
36
|
+
@abstractmethod
|
37
|
+
async def invoke_llm(
|
38
|
+
self,
|
39
|
+
request_data: dict,
|
40
|
+
messages: list,
|
41
|
+
tools: list,
|
42
|
+
use_assistant_message: bool,
|
43
|
+
requires_approval_tools: list[str] = [],
|
44
|
+
step_id: str | None = None,
|
45
|
+
actor: User | None = None,
|
46
|
+
) -> AsyncGenerator[LettaMessage | None, None]:
|
47
|
+
"""
|
48
|
+
Execute the LLM call and yield results as they become available.
|
49
|
+
|
50
|
+
Args:
|
51
|
+
request_data: The prepared request data for the LLM API
|
52
|
+
messages: The messages in context for the request
|
53
|
+
tools: The tools available for the LLM to use
|
54
|
+
use_assistant_message: If true, use assistant messages when streaming response
|
55
|
+
requires_approval_tools: The subset of tools that require approval before use
|
56
|
+
step_id: The step ID associated with this request. If provided, logs request and response data.
|
57
|
+
actor: The optional actor associated with this request for logging purposes.
|
58
|
+
|
59
|
+
Yields:
|
60
|
+
LettaMessage: Chunks of data for streaming adapters, or None for blocking adapters
|
61
|
+
"""
|
62
|
+
raise NotImplementedError
|
63
|
+
|
64
|
+
def supports_token_streaming(self) -> bool:
|
65
|
+
"""
|
66
|
+
Check if the adapter supports token-level streaming.
|
67
|
+
|
68
|
+
Returns:
|
69
|
+
bool: True if the adapter can stream back tokens as they are generated, False otherwise
|
70
|
+
"""
|
71
|
+
return False
|
72
|
+
|
73
|
+
def log_provider_trace(self, step_id: str | None, actor: User | None) -> None:
|
74
|
+
"""
|
75
|
+
Log provider trace data for telemetry purposes.
|
76
|
+
|
77
|
+
Args:
|
78
|
+
step_id: The step ID associated with this request for logging purposes
|
79
|
+
actor: The user associated with this request for logging purposes
|
80
|
+
"""
|
81
|
+
raise NotImplementedError
|
@@ -0,0 +1,111 @@
|
|
1
|
+
import asyncio
|
2
|
+
from typing import AsyncGenerator
|
3
|
+
|
4
|
+
from letta.adapters.letta_llm_adapter import LettaLLMAdapter
|
5
|
+
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
|
6
|
+
from letta.schemas.letta_message import LettaMessage
|
7
|
+
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, TextContent
|
8
|
+
from letta.schemas.provider_trace import ProviderTraceCreate
|
9
|
+
from letta.schemas.user import User
|
10
|
+
from letta.settings import settings
|
11
|
+
|
12
|
+
|
13
|
+
class LettaLLMRequestAdapter(LettaLLMAdapter):
|
14
|
+
"""
|
15
|
+
Adapter for handling blocking (non-streaming) LLM requests.
|
16
|
+
|
17
|
+
This adapter makes synchronous requests to the LLM and returns complete
|
18
|
+
responses. It extracts reasoning content, tool calls, and usage statistics
|
19
|
+
from the response and updates instance variables for access by the agent.
|
20
|
+
"""
|
21
|
+
|
22
|
+
async def invoke_llm(
|
23
|
+
self,
|
24
|
+
request_data: dict,
|
25
|
+
messages: list,
|
26
|
+
tools: list,
|
27
|
+
use_assistant_message: bool,
|
28
|
+
requires_approval_tools: list[str] = [],
|
29
|
+
step_id: str | None = None,
|
30
|
+
actor: str | None = None,
|
31
|
+
) -> AsyncGenerator[LettaMessage | None, None]:
|
32
|
+
"""
|
33
|
+
Execute a blocking LLM request and yield the response.
|
34
|
+
|
35
|
+
This adapter:
|
36
|
+
1. Makes a blocking request to the LLM
|
37
|
+
2. Converts the response to chat completion format
|
38
|
+
3. Extracts reasoning and tool call information
|
39
|
+
4. Updates all instance variables
|
40
|
+
5. Yields nothing (blocking mode doesn't stream)
|
41
|
+
"""
|
42
|
+
# Store request data
|
43
|
+
self.request_data = request_data
|
44
|
+
|
45
|
+
# Make the blocking LLM request
|
46
|
+
self.response_data = await self.llm_client.request_async(request_data, self.llm_config)
|
47
|
+
self.llm_request_finish_timestamp_ns = get_utc_timestamp_ns()
|
48
|
+
|
49
|
+
# Convert response to chat completion format
|
50
|
+
self.chat_completions_response = self.llm_client.convert_response_to_chat_completion(self.response_data, messages, self.llm_config)
|
51
|
+
|
52
|
+
# Extract reasoning content from the response
|
53
|
+
if self.chat_completions_response.choices[0].message.reasoning_content:
|
54
|
+
self.reasoning_content = [
|
55
|
+
ReasoningContent(
|
56
|
+
reasoning=self.chat_completions_response.choices[0].message.reasoning_content,
|
57
|
+
is_native=True,
|
58
|
+
signature=self.chat_completions_response.choices[0].message.reasoning_content_signature,
|
59
|
+
)
|
60
|
+
]
|
61
|
+
elif self.chat_completions_response.choices[0].message.omitted_reasoning_content:
|
62
|
+
self.reasoning_content = [OmittedReasoningContent()]
|
63
|
+
elif self.chat_completions_response.choices[0].message.content:
|
64
|
+
# Reasoning placed into content for legacy reasons
|
65
|
+
self.reasoning_content = [TextContent(text=self.chat_completions_response.choices[0].message.content)]
|
66
|
+
else:
|
67
|
+
# logger.info("No reasoning content found.")
|
68
|
+
self.reasoning_content = None
|
69
|
+
|
70
|
+
# Extract tool call
|
71
|
+
if self.chat_completions_response.choices[0].message.tool_calls:
|
72
|
+
self.tool_call = self.chat_completions_response.choices[0].message.tool_calls[0]
|
73
|
+
else:
|
74
|
+
self.tool_call = None
|
75
|
+
|
76
|
+
# Extract usage statistics
|
77
|
+
self.usage.step_count = 1
|
78
|
+
self.usage.completion_tokens = self.chat_completions_response.usage.completion_tokens
|
79
|
+
self.usage.prompt_tokens = self.chat_completions_response.usage.prompt_tokens
|
80
|
+
self.usage.total_tokens = self.chat_completions_response.usage.total_tokens
|
81
|
+
|
82
|
+
self.log_provider_trace(step_id=step_id, actor=actor)
|
83
|
+
|
84
|
+
yield None
|
85
|
+
return
|
86
|
+
|
87
|
+
def log_provider_trace(self, step_id: str | None, actor: User | None) -> None:
|
88
|
+
"""
|
89
|
+
Log provider trace data for telemetry purposes in a fire-and-forget manner.
|
90
|
+
|
91
|
+
Creates an async task to log the request/response data without blocking
|
92
|
+
the main execution flow. The task runs in the background.
|
93
|
+
|
94
|
+
Args:
|
95
|
+
step_id: The step ID associated with this request for logging purposes
|
96
|
+
actor: The user associated with this request for logging purposes
|
97
|
+
"""
|
98
|
+
if step_id is None or actor is None or not settings.track_provider_trace:
|
99
|
+
return
|
100
|
+
|
101
|
+
asyncio.create_task(
|
102
|
+
self.telemetry_manager.create_provider_trace_async(
|
103
|
+
actor=actor,
|
104
|
+
provider_trace_create=ProviderTraceCreate(
|
105
|
+
request_json=self.request_data,
|
106
|
+
response_json=self.response_data,
|
107
|
+
step_id=step_id, # Use original step_id for telemetry
|
108
|
+
organization_id=actor.organization_id,
|
109
|
+
),
|
110
|
+
)
|
111
|
+
)
|
@@ -0,0 +1,169 @@
|
|
1
|
+
import asyncio
|
2
|
+
from typing import AsyncGenerator
|
3
|
+
|
4
|
+
from letta.adapters.letta_llm_adapter import LettaLLMAdapter
|
5
|
+
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
|
6
|
+
from letta.interfaces.anthropic_streaming_interface import AnthropicStreamingInterface
|
7
|
+
from letta.interfaces.openai_streaming_interface import OpenAIStreamingInterface
|
8
|
+
from letta.llm_api.llm_client_base import LLMClientBase
|
9
|
+
from letta.schemas.enums import ProviderType
|
10
|
+
from letta.schemas.letta_message import LettaMessage
|
11
|
+
from letta.schemas.llm_config import LLMConfig
|
12
|
+
from letta.schemas.provider_trace import ProviderTraceCreate
|
13
|
+
from letta.schemas.usage import LettaUsageStatistics
|
14
|
+
from letta.schemas.user import User
|
15
|
+
from letta.settings import settings
|
16
|
+
|
17
|
+
|
18
|
+
class LettaLLMStreamAdapter(LettaLLMAdapter):
|
19
|
+
"""
|
20
|
+
Adapter for handling streaming LLM requests with immediate token yielding.
|
21
|
+
|
22
|
+
This adapter supports real-time streaming of tokens from the LLM, providing
|
23
|
+
minimal time-to-first-token (TTFT) latency. It uses specialized streaming
|
24
|
+
interfaces for different providers (OpenAI, Anthropic) to handle their
|
25
|
+
specific streaming formats.
|
26
|
+
"""
|
27
|
+
|
28
|
+
def __init__(self, llm_client: LLMClientBase, llm_config: LLMConfig) -> None:
|
29
|
+
super().__init__(llm_client, llm_config)
|
30
|
+
self.interface: OpenAIStreamingInterface | AnthropicStreamingInterface | None = None
|
31
|
+
|
32
|
+
async def invoke_llm(
|
33
|
+
self,
|
34
|
+
request_data: dict,
|
35
|
+
messages: list,
|
36
|
+
tools: list,
|
37
|
+
use_assistant_message: bool,
|
38
|
+
requires_approval_tools: list[str] = [],
|
39
|
+
step_id: str | None = None,
|
40
|
+
actor: User | None = None,
|
41
|
+
) -> AsyncGenerator[LettaMessage, None]:
|
42
|
+
"""
|
43
|
+
Execute a streaming LLM request and yield tokens/chunks as they arrive.
|
44
|
+
|
45
|
+
This adapter:
|
46
|
+
1. Makes a streaming request to the LLM
|
47
|
+
2. Yields chunks immediately for minimal TTFT
|
48
|
+
3. Accumulates response data through the streaming interface
|
49
|
+
4. Updates all instance variables after streaming completes
|
50
|
+
"""
|
51
|
+
# Store request data
|
52
|
+
self.request_data = request_data
|
53
|
+
|
54
|
+
# Instantiate streaming interface
|
55
|
+
if self.llm_config.model_endpoint_type in [ProviderType.anthropic, ProviderType.bedrock]:
|
56
|
+
self.interface = AnthropicStreamingInterface(
|
57
|
+
use_assistant_message=use_assistant_message,
|
58
|
+
put_inner_thoughts_in_kwarg=self.llm_config.put_inner_thoughts_in_kwargs,
|
59
|
+
requires_approval_tools=requires_approval_tools,
|
60
|
+
)
|
61
|
+
elif self.llm_config.model_endpoint_type == ProviderType.openai:
|
62
|
+
self.interface = OpenAIStreamingInterface(
|
63
|
+
use_assistant_message=use_assistant_message,
|
64
|
+
is_openai_proxy=self.llm_config.provider_name == "lmstudio_openai",
|
65
|
+
put_inner_thoughts_in_kwarg=self.llm_config.put_inner_thoughts_in_kwargs,
|
66
|
+
messages=messages,
|
67
|
+
tools=tools,
|
68
|
+
requires_approval_tools=requires_approval_tools,
|
69
|
+
)
|
70
|
+
else:
|
71
|
+
raise ValueError(f"Streaming not supported for provider {self.llm_config.model_endpoint_type}")
|
72
|
+
|
73
|
+
# Extract optional parameters
|
74
|
+
# ttft_span = kwargs.get('ttft_span', None)
|
75
|
+
|
76
|
+
# Start the streaming request
|
77
|
+
stream = await self.llm_client.stream_async(request_data, self.llm_config)
|
78
|
+
|
79
|
+
# Process the stream and yield chunks immediately for TTFT
|
80
|
+
async for chunk in self.interface.process(stream): # TODO: add ttft span
|
81
|
+
# Yield each chunk immediately as it arrives
|
82
|
+
yield chunk
|
83
|
+
|
84
|
+
# After streaming completes, extract the accumulated data
|
85
|
+
self.llm_request_finish_timestamp_ns = get_utc_timestamp_ns()
|
86
|
+
|
87
|
+
# Extract tool call from the interface
|
88
|
+
try:
|
89
|
+
self.tool_call = self.interface.get_tool_call_object()
|
90
|
+
except ValueError as e:
|
91
|
+
# No tool call, handle upstream
|
92
|
+
self.tool_call = None
|
93
|
+
|
94
|
+
# Extract reasoning content from the interface
|
95
|
+
self.reasoning_content = self.interface.get_reasoning_content()
|
96
|
+
|
97
|
+
# Extract usage statistics
|
98
|
+
# Some providers don't provide usage in streaming, use fallback if needed
|
99
|
+
if hasattr(self.interface, "input_tokens") and hasattr(self.interface, "output_tokens"):
|
100
|
+
# Handle cases where tokens might not be set (e.g., LMStudio)
|
101
|
+
input_tokens = self.interface.input_tokens
|
102
|
+
output_tokens = self.interface.output_tokens
|
103
|
+
|
104
|
+
# Fallback to estimated values if not provided
|
105
|
+
if not input_tokens and hasattr(self.interface, "fallback_input_tokens"):
|
106
|
+
input_tokens = self.interface.fallback_input_tokens
|
107
|
+
if not output_tokens and hasattr(self.interface, "fallback_output_tokens"):
|
108
|
+
output_tokens = self.interface.fallback_output_tokens
|
109
|
+
|
110
|
+
self.usage = LettaUsageStatistics(
|
111
|
+
step_count=1,
|
112
|
+
completion_tokens=output_tokens or 0,
|
113
|
+
prompt_tokens=input_tokens or 0,
|
114
|
+
total_tokens=(input_tokens or 0) + (output_tokens or 0),
|
115
|
+
)
|
116
|
+
else:
|
117
|
+
# Default usage statistics if not available
|
118
|
+
self.usage = LettaUsageStatistics(step_count=1, completion_tokens=0, prompt_tokens=0, total_tokens=0)
|
119
|
+
|
120
|
+
# Store any additional data from the interface
|
121
|
+
self.message_id = self.interface.letta_message_id
|
122
|
+
|
123
|
+
# Log request and response data
|
124
|
+
self.log_provider_trace(step_id=step_id, actor=actor)
|
125
|
+
|
126
|
+
def supports_token_streaming(self) -> bool:
|
127
|
+
return True
|
128
|
+
|
129
|
+
def log_provider_trace(self, step_id: str | None, actor: User | None) -> None:
|
130
|
+
"""
|
131
|
+
Log provider trace data for telemetry purposes in a fire-and-forget manner.
|
132
|
+
|
133
|
+
Creates an async task to log the request/response data without blocking
|
134
|
+
the main execution flow. For streaming adapters, this includes the final
|
135
|
+
tool call and reasoning content collected during streaming.
|
136
|
+
|
137
|
+
Args:
|
138
|
+
step_id: The step ID associated with this request for logging purposes
|
139
|
+
actor: The user associated with this request for logging purposes
|
140
|
+
"""
|
141
|
+
if step_id is None or actor is None or not settings.track_provider_trace:
|
142
|
+
return
|
143
|
+
|
144
|
+
asyncio.create_task(
|
145
|
+
self.telemetry_manager.create_provider_trace_async(
|
146
|
+
actor=actor,
|
147
|
+
provider_trace_create=ProviderTraceCreate(
|
148
|
+
request_json=self.request_data,
|
149
|
+
response_json={
|
150
|
+
"content": {
|
151
|
+
"tool_call": self.tool_call.model_dump_json(),
|
152
|
+
"reasoning": [content.model_dump_json() for content in self.reasoning_content],
|
153
|
+
},
|
154
|
+
"id": self.interface.message_id,
|
155
|
+
"model": self.interface.model,
|
156
|
+
"role": "assistant",
|
157
|
+
# "stop_reason": "",
|
158
|
+
# "stop_sequence": None,
|
159
|
+
"type": "message",
|
160
|
+
"usage": {
|
161
|
+
"input_tokens": self.usage.prompt_tokens,
|
162
|
+
"output_tokens": self.usage.completion_tokens,
|
163
|
+
},
|
164
|
+
},
|
165
|
+
step_id=step_id, # Use original step_id for telemetry
|
166
|
+
organization_id=actor.organization_id,
|
167
|
+
),
|
168
|
+
)
|
169
|
+
)
|
letta/agents/base_agent.py
CHANGED
@@ -175,7 +175,10 @@ class BaseAgent(ABC):
|
|
175
175
|
|
176
176
|
# [DB Call] Update Messages
|
177
177
|
new_system_message = await self.message_manager.update_message_by_id_async(
|
178
|
-
curr_system_message.id,
|
178
|
+
curr_system_message.id,
|
179
|
+
message_update=MessageUpdate(content=new_system_message_str),
|
180
|
+
actor=self.actor,
|
181
|
+
project_id=agent_state.project_id,
|
179
182
|
)
|
180
183
|
return [new_system_message] + in_context_messages[1:]
|
181
184
|
|
@@ -0,0 +1,68 @@
|
|
1
|
+
from abc import ABC, abstractmethod
|
2
|
+
from typing import AsyncGenerator
|
3
|
+
|
4
|
+
from letta.constants import DEFAULT_MAX_STEPS
|
5
|
+
from letta.log import get_logger
|
6
|
+
from letta.schemas.agent import AgentState
|
7
|
+
from letta.schemas.enums import MessageStreamStatus
|
8
|
+
from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage, MessageType
|
9
|
+
from letta.schemas.letta_response import LettaResponse
|
10
|
+
from letta.schemas.message import MessageCreate
|
11
|
+
from letta.schemas.user import User
|
12
|
+
|
13
|
+
|
14
|
+
class BaseAgentV2(ABC):
|
15
|
+
"""
|
16
|
+
Abstract base class for the main agent execution loop for letta agents, handling
|
17
|
+
message management, llm api request, tool execution, and context tracking.
|
18
|
+
"""
|
19
|
+
|
20
|
+
def __init__(self, agent_state: AgentState, actor: User):
|
21
|
+
self.agent_state = agent_state
|
22
|
+
self.actor = actor
|
23
|
+
self.logger = get_logger(agent_state.id)
|
24
|
+
|
25
|
+
@abstractmethod
|
26
|
+
async def build_request(
|
27
|
+
self,
|
28
|
+
input_messages: list[MessageCreate],
|
29
|
+
) -> dict:
|
30
|
+
"""
|
31
|
+
Execute the agent loop in dry_run mode, returning just the generated request
|
32
|
+
payload sent to the underlying llm provider.
|
33
|
+
"""
|
34
|
+
raise NotImplementedError
|
35
|
+
|
36
|
+
@abstractmethod
|
37
|
+
async def step(
|
38
|
+
self,
|
39
|
+
input_messages: list[MessageCreate],
|
40
|
+
max_steps: int = DEFAULT_MAX_STEPS,
|
41
|
+
run_id: str | None = None,
|
42
|
+
use_assistant_message: bool = True,
|
43
|
+
include_return_message_types: list[MessageType] | None = None,
|
44
|
+
request_start_timestamp_ns: int | None = None,
|
45
|
+
) -> LettaResponse:
|
46
|
+
"""
|
47
|
+
Execute the agent loop in blocking mode, returning all messages at once.
|
48
|
+
"""
|
49
|
+
raise NotImplementedError
|
50
|
+
|
51
|
+
@abstractmethod
|
52
|
+
async def stream(
|
53
|
+
self,
|
54
|
+
input_messages: list[MessageCreate],
|
55
|
+
max_steps: int = DEFAULT_MAX_STEPS,
|
56
|
+
stream_tokens: bool = False,
|
57
|
+
run_id: str | None = None,
|
58
|
+
use_assistant_message: bool = True,
|
59
|
+
include_return_message_types: list[MessageType] | None = None,
|
60
|
+
request_start_timestamp_ns: int | None = None,
|
61
|
+
) -> AsyncGenerator[LettaMessage | LegacyLettaMessage | MessageStreamStatus, None]:
|
62
|
+
"""
|
63
|
+
Execute the agent loop in streaming mode, yielding chunks as they become available.
|
64
|
+
If stream_tokens is True, individual tokens are streamed as they arrive from the LLM,
|
65
|
+
providing the lowest latency experience, otherwise each complete step (reasoning +
|
66
|
+
tool call + tool return) is yielded as it completes.
|
67
|
+
"""
|
68
|
+
raise NotImplementedError
|
letta/agents/helpers.py
CHANGED
@@ -3,6 +3,7 @@ import uuid
|
|
3
3
|
import xml.etree.ElementTree as ET
|
4
4
|
from typing import List, Optional, Tuple
|
5
5
|
|
6
|
+
from letta.errors import PendingApprovalError
|
6
7
|
from letta.helpers import ToolRulesSolver
|
7
8
|
from letta.log import get_logger
|
8
9
|
from letta.schemas.agent import AgentState
|
@@ -117,7 +118,7 @@ async def _prepare_in_context_messages_async(
|
|
117
118
|
new_in_context_messages = await message_manager.create_many_messages_async(
|
118
119
|
create_input_messages(input_messages=input_messages, agent_id=agent_state.id, timezone=agent_state.timezone, actor=actor),
|
119
120
|
actor=actor,
|
120
|
-
|
121
|
+
project_id=agent_state.project_id,
|
121
122
|
)
|
122
123
|
|
123
124
|
return current_in_context_messages, new_in_context_messages
|
@@ -168,10 +169,7 @@ async def _prepare_in_context_messages_no_persist_async(
|
|
168
169
|
else:
|
169
170
|
# User is trying to send a regular message
|
170
171
|
if current_in_context_messages[-1].role == "approval":
|
171
|
-
raise
|
172
|
-
"Cannot send a new message: The agent is waiting for approval on a tool call. "
|
173
|
-
"Please approve or deny the pending request before continuing."
|
174
|
-
)
|
172
|
+
raise PendingApprovalError(pending_request_id=current_in_context_messages[-1].id)
|
175
173
|
|
176
174
|
# Create a new user message from the input but dont store it yet
|
177
175
|
new_in_context_messages = create_input_messages(
|
letta/agents/letta_agent.py
CHANGED
@@ -495,7 +495,10 @@ class LettaAgent(BaseAgent):
|
|
495
495
|
message.is_err = True
|
496
496
|
message.step_id = effective_step_id
|
497
497
|
await self.message_manager.create_many_messages_async(
|
498
|
-
initial_messages,
|
498
|
+
initial_messages,
|
499
|
+
actor=self.actor,
|
500
|
+
project_id=agent_state.project_id,
|
501
|
+
template_id=agent_state.template_id,
|
499
502
|
)
|
500
503
|
elif step_progression <= StepProgression.LOGGED_TRACE:
|
501
504
|
if stop_reason is None:
|
@@ -823,7 +826,10 @@ class LettaAgent(BaseAgent):
|
|
823
826
|
message.is_err = True
|
824
827
|
message.step_id = effective_step_id
|
825
828
|
await self.message_manager.create_many_messages_async(
|
826
|
-
initial_messages,
|
829
|
+
initial_messages,
|
830
|
+
actor=self.actor,
|
831
|
+
project_id=agent_state.project_id,
|
832
|
+
template_id=agent_state.template_id,
|
827
833
|
)
|
828
834
|
elif step_progression <= StepProgression.LOGGED_TRACE:
|
829
835
|
if stop_reason is None:
|
@@ -1018,6 +1024,7 @@ class LettaAgent(BaseAgent):
|
|
1018
1024
|
interface = AnthropicStreamingInterface(
|
1019
1025
|
use_assistant_message=use_assistant_message,
|
1020
1026
|
put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
|
1027
|
+
requires_approval_tools=tool_rules_solver.get_requires_approval_tools(valid_tool_names),
|
1021
1028
|
)
|
1022
1029
|
elif agent_state.llm_config.model_endpoint_type == ProviderType.openai:
|
1023
1030
|
interface = OpenAIStreamingInterface(
|
@@ -1026,6 +1033,7 @@ class LettaAgent(BaseAgent):
|
|
1026
1033
|
messages=current_in_context_messages + new_in_context_messages,
|
1027
1034
|
tools=request_data.get("tools", []),
|
1028
1035
|
put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
|
1036
|
+
requires_approval_tools=tool_rules_solver.get_requires_approval_tools(valid_tool_names),
|
1029
1037
|
)
|
1030
1038
|
else:
|
1031
1039
|
raise ValueError(f"Streaming not supported for {agent_state.llm_config}")
|
@@ -1170,12 +1178,13 @@ class LettaAgent(BaseAgent):
|
|
1170
1178
|
)
|
1171
1179
|
step_progression = StepProgression.LOGGED_TRACE
|
1172
1180
|
|
1173
|
-
|
1174
|
-
|
1175
|
-
|
1176
|
-
|
1177
|
-
|
1178
|
-
|
1181
|
+
if persisted_messages[-1].role != "approval":
|
1182
|
+
# yields tool response as this is handled from Letta and not the response from the LLM provider
|
1183
|
+
tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
|
1184
|
+
if not (use_assistant_message and tool_return.name == "send_message"):
|
1185
|
+
# Apply message type filtering if specified
|
1186
|
+
if include_return_message_types is None or tool_return.message_type in include_return_message_types:
|
1187
|
+
yield f"data: {tool_return.model_dump_json()}\n\n"
|
1179
1188
|
|
1180
1189
|
# TODO (cliandy): consolidate and expand with trace
|
1181
1190
|
MetricRegistry().step_execution_time_ms_histogram.record(get_utc_timestamp_ns() - step_start, get_ctx_attributes())
|
@@ -1259,7 +1268,10 @@ class LettaAgent(BaseAgent):
|
|
1259
1268
|
message.is_err = True
|
1260
1269
|
message.step_id = effective_step_id
|
1261
1270
|
await self.message_manager.create_many_messages_async(
|
1262
|
-
initial_messages,
|
1271
|
+
initial_messages,
|
1272
|
+
actor=self.actor,
|
1273
|
+
project_id=agent_state.project_id,
|
1274
|
+
template_id=agent_state.template_id,
|
1263
1275
|
)
|
1264
1276
|
elif step_progression <= StepProgression.LOGGED_TRACE:
|
1265
1277
|
if stop_reason is None:
|
@@ -1667,7 +1679,7 @@ class LettaAgent(BaseAgent):
|
|
1667
1679
|
)
|
1668
1680
|
messages_to_persist = (initial_messages or []) + tool_call_messages
|
1669
1681
|
persisted_messages = await self.message_manager.create_many_messages_async(
|
1670
|
-
messages_to_persist, actor=self.actor,
|
1682
|
+
messages_to_persist, actor=self.actor, project_id=agent_state.project_id, template_id=agent_state.template_id
|
1671
1683
|
)
|
1672
1684
|
return persisted_messages, continue_stepping, stop_reason
|
1673
1685
|
|
@@ -1686,7 +1698,6 @@ class LettaAgent(BaseAgent):
|
|
1686
1698
|
tool_call_id=tool_call_id,
|
1687
1699
|
request_heartbeat=request_heartbeat,
|
1688
1700
|
)
|
1689
|
-
|
1690
1701
|
if not is_approval and tool_rules_solver.is_requires_approval_tool(tool_call_name):
|
1691
1702
|
approval_message = create_approval_request_message_from_llm_response(
|
1692
1703
|
agent_id=agent_state.id,
|
@@ -1779,7 +1790,7 @@ class LettaAgent(BaseAgent):
|
|
1779
1790
|
messages_to_persist = (initial_messages or []) + tool_call_messages
|
1780
1791
|
|
1781
1792
|
persisted_messages = await self.message_manager.create_many_messages_async(
|
1782
|
-
messages_to_persist, actor=self.actor,
|
1793
|
+
messages_to_persist, actor=self.actor, project_id=agent_state.project_id, template_id=agent_state.template_id
|
1783
1794
|
)
|
1784
1795
|
|
1785
1796
|
if run_id:
|