letta-nightly 0.7.20.dev20250520104253__py3-none-any.whl → 0.7.21.dev20250521233415__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +290 -3
- letta/agents/base_agent.py +0 -55
- letta/agents/helpers.py +5 -0
- letta/agents/letta_agent.py +314 -64
- letta/agents/letta_agent_batch.py +102 -55
- letta/agents/voice_agent.py +5 -5
- letta/client/client.py +9 -18
- letta/constants.py +55 -1
- letta/functions/function_sets/builtin.py +27 -0
- letta/functions/mcp_client/stdio_client.py +1 -1
- letta/groups/sleeptime_multi_agent_v2.py +1 -1
- letta/interfaces/anthropic_streaming_interface.py +10 -1
- letta/interfaces/openai_streaming_interface.py +9 -2
- letta/llm_api/anthropic.py +21 -2
- letta/llm_api/anthropic_client.py +33 -6
- letta/llm_api/google_ai_client.py +136 -423
- letta/llm_api/google_vertex_client.py +173 -22
- letta/llm_api/llm_api_tools.py +27 -0
- letta/llm_api/llm_client.py +1 -1
- letta/llm_api/llm_client_base.py +32 -21
- letta/llm_api/openai.py +57 -0
- letta/llm_api/openai_client.py +7 -11
- letta/memory.py +0 -1
- letta/orm/__init__.py +1 -0
- letta/orm/enums.py +1 -0
- letta/orm/provider_trace.py +26 -0
- letta/orm/step.py +1 -0
- letta/schemas/provider_trace.py +43 -0
- letta/schemas/providers.py +210 -65
- letta/schemas/step.py +1 -0
- letta/schemas/tool.py +4 -0
- letta/server/db.py +37 -19
- letta/server/rest_api/routers/v1/__init__.py +2 -0
- letta/server/rest_api/routers/v1/agents.py +57 -34
- letta/server/rest_api/routers/v1/blocks.py +3 -3
- letta/server/rest_api/routers/v1/identities.py +24 -26
- letta/server/rest_api/routers/v1/jobs.py +3 -3
- letta/server/rest_api/routers/v1/llms.py +13 -8
- letta/server/rest_api/routers/v1/sandbox_configs.py +6 -6
- letta/server/rest_api/routers/v1/tags.py +3 -3
- letta/server/rest_api/routers/v1/telemetry.py +18 -0
- letta/server/rest_api/routers/v1/tools.py +6 -6
- letta/server/rest_api/streaming_response.py +105 -0
- letta/server/rest_api/utils.py +4 -0
- letta/server/server.py +140 -0
- letta/services/agent_manager.py +251 -18
- letta/services/block_manager.py +52 -37
- letta/services/helpers/noop_helper.py +10 -0
- letta/services/identity_manager.py +43 -38
- letta/services/job_manager.py +29 -0
- letta/services/message_manager.py +111 -0
- letta/services/sandbox_config_manager.py +36 -0
- letta/services/step_manager.py +146 -0
- letta/services/telemetry_manager.py +58 -0
- letta/services/tool_executor/tool_execution_manager.py +49 -5
- letta/services/tool_executor/tool_execution_sandbox.py +47 -0
- letta/services/tool_executor/tool_executor.py +236 -7
- letta/services/tool_manager.py +160 -1
- letta/services/tool_sandbox/e2b_sandbox.py +65 -3
- letta/settings.py +10 -2
- letta/tracing.py +5 -5
- {letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/METADATA +3 -2
- {letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/RECORD +67 -60
- {letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/LICENSE +0 -0
- {letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/WHEEL +0 -0
- {letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/entry_points.txt +0 -0
letta/agents/letta_agent.py
CHANGED
@@ -8,8 +8,9 @@ from openai.types import CompletionUsage
|
|
8
8
|
from openai.types.chat import ChatCompletion, ChatCompletionChunk
|
9
9
|
|
10
10
|
from letta.agents.base_agent import BaseAgent
|
11
|
-
from letta.agents.helpers import _create_letta_response, _prepare_in_context_messages_async
|
11
|
+
from letta.agents.helpers import _create_letta_response, _prepare_in_context_messages_async, generate_step_id
|
12
12
|
from letta.helpers import ToolRulesSolver
|
13
|
+
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
|
13
14
|
from letta.helpers.tool_execution_helper import enable_strict_mode
|
14
15
|
from letta.interfaces.anthropic_streaming_interface import AnthropicStreamingInterface
|
15
16
|
from letta.interfaces.openai_streaming_interface import OpenAIStreamingInterface
|
@@ -24,7 +25,8 @@ from letta.schemas.letta_message import AssistantMessage
|
|
24
25
|
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
|
25
26
|
from letta.schemas.letta_response import LettaResponse
|
26
27
|
from letta.schemas.message import Message, MessageCreate
|
27
|
-
from letta.schemas.openai.chat_completion_response import ToolCall
|
28
|
+
from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
|
29
|
+
from letta.schemas.provider_trace import ProviderTraceCreate
|
28
30
|
from letta.schemas.usage import LettaUsageStatistics
|
29
31
|
from letta.schemas.user import User
|
30
32
|
from letta.server.rest_api.utils import create_letta_messages_from_llm_response
|
@@ -32,10 +34,11 @@ from letta.services.agent_manager import AgentManager
|
|
32
34
|
from letta.services.block_manager import BlockManager
|
33
35
|
from letta.services.message_manager import MessageManager
|
34
36
|
from letta.services.passage_manager import PassageManager
|
37
|
+
from letta.services.step_manager import NoopStepManager, StepManager
|
38
|
+
from letta.services.telemetry_manager import NoopTelemetryManager, TelemetryManager
|
35
39
|
from letta.services.tool_executor.tool_execution_manager import ToolExecutionManager
|
36
|
-
from letta.settings import settings
|
37
40
|
from letta.system import package_function_response
|
38
|
-
from letta.tracing import log_event, trace_method
|
41
|
+
from letta.tracing import log_event, trace_method, tracer
|
39
42
|
|
40
43
|
logger = get_logger(__name__)
|
41
44
|
|
@@ -50,6 +53,8 @@ class LettaAgent(BaseAgent):
|
|
50
53
|
block_manager: BlockManager,
|
51
54
|
passage_manager: PassageManager,
|
52
55
|
actor: User,
|
56
|
+
step_manager: StepManager = NoopStepManager(),
|
57
|
+
telemetry_manager: TelemetryManager = NoopTelemetryManager(),
|
53
58
|
):
|
54
59
|
super().__init__(agent_id=agent_id, openai_client=None, message_manager=message_manager, agent_manager=agent_manager, actor=actor)
|
55
60
|
|
@@ -57,6 +62,8 @@ class LettaAgent(BaseAgent):
|
|
57
62
|
# Summarizer settings
|
58
63
|
self.block_manager = block_manager
|
59
64
|
self.passage_manager = passage_manager
|
65
|
+
self.step_manager = step_manager
|
66
|
+
self.telemetry_manager = telemetry_manager
|
60
67
|
self.response_messages: List[Message] = []
|
61
68
|
|
62
69
|
self.last_function_response = None
|
@@ -67,17 +74,19 @@ class LettaAgent(BaseAgent):
|
|
67
74
|
|
68
75
|
@trace_method
|
69
76
|
async def step(self, input_messages: List[MessageCreate], max_steps: int = 10, use_assistant_message: bool = True) -> LettaResponse:
|
70
|
-
agent_state = await self.agent_manager.get_agent_by_id_async(
|
71
|
-
|
72
|
-
agent_state=agent_state, input_messages=input_messages, max_steps=max_steps
|
77
|
+
agent_state = await self.agent_manager.get_agent_by_id_async(
|
78
|
+
agent_id=self.agent_id, include_relationships=["tools", "memory"], actor=self.actor
|
73
79
|
)
|
80
|
+
_, new_in_context_messages, usage = await self._step(agent_state=agent_state, input_messages=input_messages, max_steps=max_steps)
|
74
81
|
return _create_letta_response(
|
75
82
|
new_in_context_messages=new_in_context_messages, use_assistant_message=use_assistant_message, usage=usage
|
76
83
|
)
|
77
84
|
|
78
|
-
|
79
|
-
|
80
|
-
|
85
|
+
@trace_method
|
86
|
+
async def step_stream_no_tokens(self, input_messages: List[MessageCreate], max_steps: int = 10, use_assistant_message: bool = True):
|
87
|
+
agent_state = await self.agent_manager.get_agent_by_id_async(
|
88
|
+
agent_id=self.agent_id, include_relationships=["tools", "memory"], actor=self.actor
|
89
|
+
)
|
81
90
|
current_in_context_messages, new_in_context_messages = await _prepare_in_context_messages_async(
|
82
91
|
input_messages, agent_state, self.message_manager, self.actor
|
83
92
|
)
|
@@ -89,23 +98,81 @@ class LettaAgent(BaseAgent):
|
|
89
98
|
)
|
90
99
|
usage = LettaUsageStatistics()
|
91
100
|
for _ in range(max_steps):
|
92
|
-
|
101
|
+
step_id = generate_step_id()
|
102
|
+
|
103
|
+
in_context_messages = await self._rebuild_memory_async(
|
104
|
+
current_in_context_messages + new_in_context_messages,
|
105
|
+
agent_state,
|
106
|
+
num_messages=self.num_messages,
|
107
|
+
num_archival_memories=self.num_archival_memories,
|
108
|
+
)
|
109
|
+
log_event("agent.stream_no_tokens.messages.refreshed") # [1^]
|
110
|
+
|
111
|
+
request_data = await self._create_llm_request_data_async(
|
93
112
|
llm_client=llm_client,
|
94
|
-
in_context_messages=
|
113
|
+
in_context_messages=in_context_messages,
|
95
114
|
agent_state=agent_state,
|
96
115
|
tool_rules_solver=tool_rules_solver,
|
97
|
-
|
98
|
-
# TODO: also pass in reasoning content
|
116
|
+
# TODO: pass in reasoning content
|
99
117
|
)
|
118
|
+
log_event("agent.stream_no_tokens.llm_request.created") # [2^]
|
100
119
|
|
120
|
+
try:
|
121
|
+
response_data = await llm_client.request_async(request_data, agent_state.llm_config)
|
122
|
+
except Exception as e:
|
123
|
+
raise llm_client.handle_llm_error(e)
|
124
|
+
log_event("agent.stream_no_tokens.llm_response.received") # [3^]
|
125
|
+
|
126
|
+
response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
|
127
|
+
|
128
|
+
# update usage
|
129
|
+
# TODO: add run_id
|
130
|
+
usage.step_count += 1
|
131
|
+
usage.completion_tokens += response.usage.completion_tokens
|
132
|
+
usage.prompt_tokens += response.usage.prompt_tokens
|
133
|
+
usage.total_tokens += response.usage.total_tokens
|
134
|
+
|
135
|
+
if not response.choices[0].message.tool_calls:
|
136
|
+
# TODO: make into a real error
|
137
|
+
raise ValueError("No tool calls found in response, model must make a tool call")
|
101
138
|
tool_call = response.choices[0].message.tool_calls[0]
|
102
|
-
|
139
|
+
if response.choices[0].message.reasoning_content:
|
140
|
+
reasoning = [
|
141
|
+
ReasoningContent(
|
142
|
+
reasoning=response.choices[0].message.reasoning_content,
|
143
|
+
is_native=True,
|
144
|
+
signature=response.choices[0].message.reasoning_content_signature,
|
145
|
+
)
|
146
|
+
]
|
147
|
+
else:
|
148
|
+
reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
|
103
149
|
|
104
150
|
persisted_messages, should_continue = await self._handle_ai_response(
|
105
|
-
tool_call, agent_state, tool_rules_solver, reasoning_content=reasoning
|
151
|
+
tool_call, agent_state, tool_rules_solver, response.usage, reasoning_content=reasoning
|
106
152
|
)
|
107
153
|
self.response_messages.extend(persisted_messages)
|
108
154
|
new_in_context_messages.extend(persisted_messages)
|
155
|
+
log_event("agent.stream_no_tokens.llm_response.processed") # [4^]
|
156
|
+
|
157
|
+
# Log LLM Trace
|
158
|
+
await self.telemetry_manager.create_provider_trace_async(
|
159
|
+
actor=self.actor,
|
160
|
+
provider_trace_create=ProviderTraceCreate(
|
161
|
+
request_json=request_data,
|
162
|
+
response_json=response_data,
|
163
|
+
step_id=step_id,
|
164
|
+
organization_id=self.actor.organization_id,
|
165
|
+
),
|
166
|
+
)
|
167
|
+
|
168
|
+
# stream step
|
169
|
+
# TODO: improve TTFT
|
170
|
+
filter_user_messages = [m for m in persisted_messages if m.role != "user"]
|
171
|
+
letta_messages = Message.to_letta_messages_from_list(
|
172
|
+
filter_user_messages, use_assistant_message=use_assistant_message, reverse=False
|
173
|
+
)
|
174
|
+
for message in letta_messages:
|
175
|
+
yield f"data: {message.model_dump_json()}\n\n"
|
109
176
|
|
110
177
|
# update usage
|
111
178
|
# TODO: add run_id
|
@@ -122,17 +189,125 @@ class LettaAgent(BaseAgent):
|
|
122
189
|
message_ids = [m.id for m in (current_in_context_messages + new_in_context_messages)]
|
123
190
|
self.agent_manager.set_in_context_messages(agent_id=self.agent_id, message_ids=message_ids, actor=self.actor)
|
124
191
|
|
192
|
+
# Return back usage
|
193
|
+
yield f"data: {usage.model_dump_json()}\n\n"
|
194
|
+
|
195
|
+
async def _step(
|
196
|
+
self, agent_state: AgentState, input_messages: List[MessageCreate], max_steps: int = 10
|
197
|
+
) -> Tuple[List[Message], List[Message], CompletionUsage]:
|
198
|
+
"""
|
199
|
+
Carries out an invocation of the agent loop. In each step, the agent
|
200
|
+
1. Rebuilds its memory
|
201
|
+
2. Generates a request for the LLM
|
202
|
+
3. Fetches a response from the LLM
|
203
|
+
4. Processes the response
|
204
|
+
"""
|
205
|
+
current_in_context_messages, new_in_context_messages = await _prepare_in_context_messages_async(
|
206
|
+
input_messages, agent_state, self.message_manager, self.actor
|
207
|
+
)
|
208
|
+
tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
|
209
|
+
llm_client = LLMClient.create(
|
210
|
+
provider_type=agent_state.llm_config.model_endpoint_type,
|
211
|
+
put_inner_thoughts_first=True,
|
212
|
+
actor=self.actor,
|
213
|
+
)
|
214
|
+
usage = LettaUsageStatistics()
|
215
|
+
for _ in range(max_steps):
|
216
|
+
step_id = generate_step_id()
|
217
|
+
|
218
|
+
in_context_messages = await self._rebuild_memory_async(
|
219
|
+
current_in_context_messages + new_in_context_messages,
|
220
|
+
agent_state,
|
221
|
+
num_messages=self.num_messages,
|
222
|
+
num_archival_memories=self.num_archival_memories,
|
223
|
+
)
|
224
|
+
log_event("agent.step.messages.refreshed") # [1^]
|
225
|
+
|
226
|
+
request_data = await self._create_llm_request_data_async(
|
227
|
+
llm_client=llm_client,
|
228
|
+
in_context_messages=in_context_messages,
|
229
|
+
agent_state=agent_state,
|
230
|
+
tool_rules_solver=tool_rules_solver,
|
231
|
+
# TODO: pass in reasoning content
|
232
|
+
)
|
233
|
+
log_event("agent.step.llm_request.created") # [2^]
|
234
|
+
|
235
|
+
try:
|
236
|
+
response_data = await llm_client.request_async(request_data, agent_state.llm_config)
|
237
|
+
except Exception as e:
|
238
|
+
raise llm_client.handle_llm_error(e)
|
239
|
+
log_event("agent.step.llm_response.received") # [3^]
|
240
|
+
|
241
|
+
response = llm_client.convert_response_to_chat_completion(response_data, in_context_messages, agent_state.llm_config)
|
242
|
+
|
243
|
+
# TODO: add run_id
|
244
|
+
usage.step_count += 1
|
245
|
+
usage.completion_tokens += response.usage.completion_tokens
|
246
|
+
usage.prompt_tokens += response.usage.prompt_tokens
|
247
|
+
usage.total_tokens += response.usage.total_tokens
|
248
|
+
|
249
|
+
if not response.choices[0].message.tool_calls:
|
250
|
+
# TODO: make into a real error
|
251
|
+
raise ValueError("No tool calls found in response, model must make a tool call")
|
252
|
+
tool_call = response.choices[0].message.tool_calls[0]
|
253
|
+
if response.choices[0].message.reasoning_content:
|
254
|
+
reasoning = [
|
255
|
+
ReasoningContent(
|
256
|
+
reasoning=response.choices[0].message.reasoning_content,
|
257
|
+
is_native=True,
|
258
|
+
signature=response.choices[0].message.reasoning_content_signature,
|
259
|
+
)
|
260
|
+
]
|
261
|
+
else:
|
262
|
+
reasoning = [TextContent(text=response.choices[0].message.content)] # reasoning placed into content for legacy reasons
|
263
|
+
|
264
|
+
persisted_messages, should_continue = await self._handle_ai_response(
|
265
|
+
tool_call, agent_state, tool_rules_solver, response.usage, reasoning_content=reasoning, step_id=step_id
|
266
|
+
)
|
267
|
+
self.response_messages.extend(persisted_messages)
|
268
|
+
new_in_context_messages.extend(persisted_messages)
|
269
|
+
log_event("agent.step.llm_response.processed") # [4^]
|
270
|
+
|
271
|
+
# Log LLM Trace
|
272
|
+
await self.telemetry_manager.create_provider_trace_async(
|
273
|
+
actor=self.actor,
|
274
|
+
provider_trace_create=ProviderTraceCreate(
|
275
|
+
request_json=request_data,
|
276
|
+
response_json=response_data,
|
277
|
+
step_id=step_id,
|
278
|
+
organization_id=self.actor.organization_id,
|
279
|
+
),
|
280
|
+
)
|
281
|
+
|
282
|
+
if not should_continue:
|
283
|
+
break
|
284
|
+
|
285
|
+
# Extend the in context message ids
|
286
|
+
if not agent_state.message_buffer_autoclear:
|
287
|
+
message_ids = [m.id for m in (current_in_context_messages + new_in_context_messages)]
|
288
|
+
self.agent_manager.set_in_context_messages(agent_id=self.agent_id, message_ids=message_ids, actor=self.actor)
|
289
|
+
|
125
290
|
return current_in_context_messages, new_in_context_messages, usage
|
126
291
|
|
127
292
|
@trace_method
|
128
293
|
async def step_stream(
|
129
|
-
self,
|
294
|
+
self,
|
295
|
+
input_messages: List[MessageCreate],
|
296
|
+
max_steps: int = 10,
|
297
|
+
use_assistant_message: bool = True,
|
298
|
+
request_start_timestamp_ns: Optional[int] = None,
|
130
299
|
) -> AsyncGenerator[str, None]:
|
131
300
|
"""
|
132
|
-
|
133
|
-
Whenever we detect a tool call, we yield from _handle_ai_response as well.
|
301
|
+
Carries out an invocation of the agent loop in a streaming fashion that yields partial tokens.
|
302
|
+
Whenever we detect a tool call, we yield from _handle_ai_response as well. At each step, the agent
|
303
|
+
1. Rebuilds its memory
|
304
|
+
2. Generates a request for the LLM
|
305
|
+
3. Fetches a response from the LLM
|
306
|
+
4. Processes the response
|
134
307
|
"""
|
135
|
-
agent_state = await self.agent_manager.get_agent_by_id_async(
|
308
|
+
agent_state = await self.agent_manager.get_agent_by_id_async(
|
309
|
+
agent_id=self.agent_id, include_relationships=["tools", "memory"], actor=self.actor
|
310
|
+
)
|
136
311
|
current_in_context_messages, new_in_context_messages = await _prepare_in_context_messages_async(
|
137
312
|
input_messages, agent_state, self.message_manager, self.actor
|
138
313
|
)
|
@@ -145,13 +320,29 @@ class LettaAgent(BaseAgent):
|
|
145
320
|
usage = LettaUsageStatistics()
|
146
321
|
|
147
322
|
for _ in range(max_steps):
|
148
|
-
|
323
|
+
step_id = generate_step_id()
|
324
|
+
in_context_messages = await self._rebuild_memory_async(
|
325
|
+
current_in_context_messages + new_in_context_messages,
|
326
|
+
agent_state,
|
327
|
+
num_messages=self.num_messages,
|
328
|
+
num_archival_memories=self.num_archival_memories,
|
329
|
+
)
|
330
|
+
log_event("agent.step.messages.refreshed") # [1^]
|
331
|
+
|
332
|
+
request_data = await self._create_llm_request_data_async(
|
149
333
|
llm_client=llm_client,
|
150
|
-
in_context_messages=
|
334
|
+
in_context_messages=in_context_messages,
|
151
335
|
agent_state=agent_state,
|
152
336
|
tool_rules_solver=tool_rules_solver,
|
153
|
-
stream=True,
|
154
337
|
)
|
338
|
+
log_event("agent.stream.llm_request.created") # [2^]
|
339
|
+
|
340
|
+
try:
|
341
|
+
stream = await llm_client.stream_async(request_data, agent_state.llm_config)
|
342
|
+
except Exception as e:
|
343
|
+
raise llm_client.handle_llm_error(e)
|
344
|
+
log_event("agent.stream.llm_response.received") # [3^]
|
345
|
+
|
155
346
|
# TODO: THIS IS INCREDIBLY UGLY
|
156
347
|
# TODO: THERE ARE MULTIPLE COPIES OF THE LLM_CONFIG EVERYWHERE THAT ARE GETTING MANIPULATED
|
157
348
|
if agent_state.llm_config.model_endpoint_type == "anthropic":
|
@@ -164,7 +355,23 @@ class LettaAgent(BaseAgent):
|
|
164
355
|
use_assistant_message=use_assistant_message,
|
165
356
|
put_inner_thoughts_in_kwarg=agent_state.llm_config.put_inner_thoughts_in_kwargs,
|
166
357
|
)
|
358
|
+
else:
|
359
|
+
raise ValueError(f"Streaming not supported for {agent_state.llm_config}")
|
360
|
+
|
361
|
+
first_chunk, ttft_span = True, None
|
362
|
+
if request_start_timestamp_ns is not None:
|
363
|
+
ttft_span = tracer.start_span("time_to_first_token", start_time=request_start_timestamp_ns)
|
364
|
+
ttft_span.set_attributes({f"llm_config.{k}": v for k, v in agent_state.llm_config.model_dump().items() if v is not None})
|
365
|
+
|
167
366
|
async for chunk in interface.process(stream):
|
367
|
+
# Measure time to first token
|
368
|
+
if first_chunk and ttft_span is not None:
|
369
|
+
now = get_utc_timestamp_ns()
|
370
|
+
ttft_ns = now - request_start_timestamp_ns
|
371
|
+
ttft_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ttft_ns // 1_000_000})
|
372
|
+
ttft_span.end()
|
373
|
+
first_chunk = False
|
374
|
+
|
168
375
|
yield f"data: {chunk.model_dump_json()}\n\n"
|
169
376
|
|
170
377
|
# update usage
|
@@ -180,13 +387,46 @@ class LettaAgent(BaseAgent):
|
|
180
387
|
tool_call,
|
181
388
|
agent_state,
|
182
389
|
tool_rules_solver,
|
390
|
+
UsageStatistics(
|
391
|
+
completion_tokens=interface.output_tokens,
|
392
|
+
prompt_tokens=interface.input_tokens,
|
393
|
+
total_tokens=interface.input_tokens + interface.output_tokens,
|
394
|
+
),
|
183
395
|
reasoning_content=reasoning_content,
|
184
396
|
pre_computed_assistant_message_id=interface.letta_assistant_message_id,
|
185
397
|
pre_computed_tool_message_id=interface.letta_tool_message_id,
|
398
|
+
step_id=step_id,
|
186
399
|
)
|
187
400
|
self.response_messages.extend(persisted_messages)
|
188
401
|
new_in_context_messages.extend(persisted_messages)
|
189
402
|
|
403
|
+
# TODO (cliandy): the stream POST request span has ended at this point, we should tie this to the stream
|
404
|
+
# log_event("agent.stream.llm_response.processed") # [4^]
|
405
|
+
|
406
|
+
# Log LLM Trace
|
407
|
+
# TODO (cliandy): we are piecing together the streamed response here. Content here does not match the actual response schema.
|
408
|
+
await self.telemetry_manager.create_provider_trace_async(
|
409
|
+
actor=self.actor,
|
410
|
+
provider_trace_create=ProviderTraceCreate(
|
411
|
+
request_json=request_data,
|
412
|
+
response_json={
|
413
|
+
"content": {
|
414
|
+
"tool_call": tool_call.model_dump_json(),
|
415
|
+
"reasoning": [content.model_dump_json() for content in reasoning_content],
|
416
|
+
},
|
417
|
+
"id": interface.message_id,
|
418
|
+
"model": interface.model,
|
419
|
+
"role": "assistant",
|
420
|
+
# "stop_reason": "",
|
421
|
+
# "stop_sequence": None,
|
422
|
+
"type": "message",
|
423
|
+
"usage": {"input_tokens": interface.input_tokens, "output_tokens": interface.output_tokens},
|
424
|
+
},
|
425
|
+
step_id=step_id,
|
426
|
+
organization_id=self.actor.organization_id,
|
427
|
+
),
|
428
|
+
)
|
429
|
+
|
190
430
|
if not use_assistant_message or should_continue:
|
191
431
|
tool_return = [msg for msg in persisted_messages if msg.role == "tool"][-1].to_letta_messages()[0]
|
192
432
|
yield f"data: {tool_return.model_dump_json()}\n\n"
|
@@ -209,28 +449,20 @@ class LettaAgent(BaseAgent):
|
|
209
449
|
yield f"data: {MessageStreamStatus.done.model_dump_json()}\n\n"
|
210
450
|
|
211
451
|
@trace_method
|
212
|
-
|
213
|
-
async def _get_ai_reply(
|
452
|
+
async def _create_llm_request_data_async(
|
214
453
|
self,
|
215
454
|
llm_client: LLMClientBase,
|
216
455
|
in_context_messages: List[Message],
|
217
456
|
agent_state: AgentState,
|
218
457
|
tool_rules_solver: ToolRulesSolver,
|
219
|
-
stream: bool,
|
220
458
|
) -> ChatCompletion | AsyncStream[ChatCompletionChunk]:
|
221
|
-
|
222
|
-
|
223
|
-
self.
|
224
|
-
|
225
|
-
|
226
|
-
in_context_messages =
|
227
|
-
|
228
|
-
)
|
229
|
-
else:
|
230
|
-
if settings.experimental_skip_rebuild_memory and agent_state.llm_config.model_endpoint_type == "google_vertex":
|
231
|
-
logger.info("Skipping memory rebuild")
|
232
|
-
else:
|
233
|
-
in_context_messages = self._rebuild_memory(in_context_messages, agent_state)
|
459
|
+
self.num_messages = self.num_messages or (await self.message_manager.size_async(actor=self.actor, agent_id=agent_state.id))
|
460
|
+
self.num_archival_memories = self.num_archival_memories or (
|
461
|
+
await self.passage_manager.size_async(actor=self.actor, agent_id=agent_state.id)
|
462
|
+
)
|
463
|
+
in_context_messages = await self._rebuild_memory_async(
|
464
|
+
in_context_messages, agent_state, num_messages=self.num_messages, num_archival_memories=self.num_archival_memories
|
465
|
+
)
|
234
466
|
|
235
467
|
tools = [
|
236
468
|
t
|
@@ -243,8 +475,8 @@ class LettaAgent(BaseAgent):
|
|
243
475
|
ToolType.LETTA_MULTI_AGENT_CORE,
|
244
476
|
ToolType.LETTA_SLEEPTIME_CORE,
|
245
477
|
ToolType.LETTA_VOICE_SLEEPTIME_CORE,
|
478
|
+
ToolType.LETTA_BUILTIN,
|
246
479
|
}
|
247
|
-
or (t.tool_type == ToolType.LETTA_MULTI_AGENT_CORE and t.name == "send_message_to_agents_matching_tags")
|
248
480
|
or (t.tool_type == ToolType.EXTERNAL_COMPOSIO)
|
249
481
|
]
|
250
482
|
|
@@ -264,15 +496,7 @@ class LettaAgent(BaseAgent):
|
|
264
496
|
|
265
497
|
allowed_tools = [enable_strict_mode(t.json_schema) for t in tools if t.name in set(valid_tool_names)]
|
266
498
|
|
267
|
-
|
268
|
-
messages=in_context_messages,
|
269
|
-
llm_config=agent_state.llm_config,
|
270
|
-
tools=allowed_tools,
|
271
|
-
force_tool_call=force_tool_call,
|
272
|
-
stream=stream,
|
273
|
-
)
|
274
|
-
|
275
|
-
return response
|
499
|
+
return llm_client.build_request_data(in_context_messages, agent_state.llm_config, allowed_tools, force_tool_call)
|
276
500
|
|
277
501
|
@trace_method
|
278
502
|
async def _handle_ai_response(
|
@@ -280,9 +504,11 @@ class LettaAgent(BaseAgent):
|
|
280
504
|
tool_call: ToolCall,
|
281
505
|
agent_state: AgentState,
|
282
506
|
tool_rules_solver: ToolRulesSolver,
|
507
|
+
usage: UsageStatistics,
|
283
508
|
reasoning_content: Optional[List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent]]] = None,
|
284
509
|
pre_computed_assistant_message_id: Optional[str] = None,
|
285
510
|
pre_computed_tool_message_id: Optional[str] = None,
|
511
|
+
step_id: str | None = None,
|
286
512
|
) -> Tuple[List[Message], bool]:
|
287
513
|
"""
|
288
514
|
Now that streaming is done, handle the final AI response.
|
@@ -294,8 +520,11 @@ class LettaAgent(BaseAgent):
|
|
294
520
|
|
295
521
|
try:
|
296
522
|
tool_args = json.loads(tool_call_args_str)
|
523
|
+
assert isinstance(tool_args, dict), "tool_args must be a dict"
|
297
524
|
except json.JSONDecodeError:
|
298
525
|
tool_args = {}
|
526
|
+
except AssertionError:
|
527
|
+
tool_args = json.loads(tool_args)
|
299
528
|
|
300
529
|
# Get request heartbeats and coerce to bool
|
301
530
|
request_heartbeat = tool_args.pop("request_heartbeat", False)
|
@@ -329,7 +558,25 @@ class LettaAgent(BaseAgent):
|
|
329
558
|
elif tool_rules_solver.is_continue_tool(tool_name=tool_call_name):
|
330
559
|
continue_stepping = True
|
331
560
|
|
332
|
-
#
|
561
|
+
# 5a. Persist Steps to DB
|
562
|
+
# Following agent loop to persist this before messages
|
563
|
+
# TODO (cliandy): determine what should match old loop w/provider_id, job_id
|
564
|
+
# TODO (cliandy): UsageStatistics and LettaUsageStatistics are used in many places, but are not the same.
|
565
|
+
logged_step = await self.step_manager.log_step_async(
|
566
|
+
actor=self.actor,
|
567
|
+
agent_id=agent_state.id,
|
568
|
+
provider_name=agent_state.llm_config.model_endpoint_type,
|
569
|
+
provider_category=agent_state.llm_config.provider_category or "base",
|
570
|
+
model=agent_state.llm_config.model,
|
571
|
+
model_endpoint=agent_state.llm_config.model_endpoint,
|
572
|
+
context_window_limit=agent_state.llm_config.context_window,
|
573
|
+
usage=usage,
|
574
|
+
provider_id=None,
|
575
|
+
job_id=None,
|
576
|
+
step_id=step_id,
|
577
|
+
)
|
578
|
+
|
579
|
+
# 5b. Persist Messages to DB
|
333
580
|
tool_call_messages = create_letta_messages_from_llm_response(
|
334
581
|
agent_id=agent_state.id,
|
335
582
|
model=agent_state.llm_config.model,
|
@@ -343,6 +590,7 @@ class LettaAgent(BaseAgent):
|
|
343
590
|
reasoning_content=reasoning_content,
|
344
591
|
pre_computed_assistant_message_id=pre_computed_assistant_message_id,
|
345
592
|
pre_computed_tool_message_id=pre_computed_tool_message_id,
|
593
|
+
step_id=logged_step.id if logged_step else None, # TODO (cliandy): eventually move over other agent loops
|
346
594
|
)
|
347
595
|
persisted_messages = await self.message_manager.create_many_messages_async(tool_call_messages, actor=self.actor)
|
348
596
|
self.last_function_response = function_response
|
@@ -361,20 +609,21 @@ class LettaAgent(BaseAgent):
|
|
361
609
|
|
362
610
|
# TODO: This temp. Move this logic and code to executors
|
363
611
|
try:
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
612
|
+
tool_execution_manager = ToolExecutionManager(
|
613
|
+
agent_state=agent_state,
|
614
|
+
message_manager=self.message_manager,
|
615
|
+
agent_manager=self.agent_manager,
|
616
|
+
block_manager=self.block_manager,
|
617
|
+
passage_manager=self.passage_manager,
|
618
|
+
actor=self.actor,
|
619
|
+
)
|
620
|
+
# TODO: Integrate sandbox result
|
621
|
+
log_event(name=f"start_{tool_name}_execution", attributes=tool_args)
|
622
|
+
tool_execution_result = await tool_execution_manager.execute_tool_async(
|
623
|
+
function_name=tool_name, function_args=tool_args, tool=target_tool
|
624
|
+
)
|
625
|
+
log_event(name=f"finish_{tool_name}_execution", attributes=tool_args)
|
626
|
+
return tool_execution_result.func_return, True
|
378
627
|
except Exception as e:
|
379
628
|
return f"Failed to call tool. Error: {e}", False
|
380
629
|
|
@@ -430,6 +679,7 @@ class LettaAgent(BaseAgent):
|
|
430
679
|
results = await asyncio.gather(*tasks)
|
431
680
|
return results
|
432
681
|
|
682
|
+
@trace_method
|
433
683
|
async def _load_last_function_response_async(self):
|
434
684
|
"""Load the last function response from message history"""
|
435
685
|
in_context_messages = await self.agent_manager.get_in_context_messages_async(agent_id=self.agent_id, actor=self.actor)
|