letta-nightly 0.7.15.dev20250515104317__py3-none-any.whl → 0.7.17.dev20250516090339__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +12 -0
- letta/agents/helpers.py +48 -5
- letta/agents/letta_agent.py +64 -28
- letta/agents/letta_agent_batch.py +44 -26
- letta/agents/voice_sleeptime_agent.py +6 -4
- letta/client/client.py +16 -1
- letta/constants.py +3 -0
- letta/functions/async_composio_toolset.py +1 -1
- letta/interfaces/anthropic_streaming_interface.py +40 -6
- letta/interfaces/openai_streaming_interface.py +303 -0
- letta/jobs/llm_batch_job_polling.py +6 -2
- letta/orm/agent.py +102 -1
- letta/orm/block.py +3 -0
- letta/orm/sqlalchemy_base.py +459 -158
- letta/schemas/agent.py +10 -2
- letta/schemas/block.py +3 -0
- letta/schemas/memory.py +7 -2
- letta/server/rest_api/routers/v1/agents.py +29 -27
- letta/server/rest_api/routers/v1/blocks.py +1 -1
- letta/server/rest_api/routers/v1/groups.py +2 -2
- letta/server/rest_api/routers/v1/messages.py +11 -11
- letta/server/rest_api/routers/v1/runs.py +2 -2
- letta/server/rest_api/routers/v1/tools.py +4 -4
- letta/server/rest_api/routers/v1/users.py +9 -9
- letta/server/rest_api/routers/v1/voice.py +1 -1
- letta/server/server.py +74 -0
- letta/services/agent_manager.py +417 -7
- letta/services/block_manager.py +12 -8
- letta/services/helpers/agent_manager_helper.py +19 -0
- letta/services/job_manager.py +99 -0
- letta/services/llm_batch_manager.py +28 -27
- letta/services/message_manager.py +66 -19
- letta/services/passage_manager.py +14 -0
- letta/services/tool_executor/tool_executor.py +19 -1
- letta/services/tool_manager.py +13 -3
- letta/services/user_manager.py +70 -0
- letta/types/__init__.py +0 -0
- {letta_nightly-0.7.15.dev20250515104317.dist-info → letta_nightly-0.7.17.dev20250516090339.dist-info}/METADATA +3 -3
- {letta_nightly-0.7.15.dev20250515104317.dist-info → letta_nightly-0.7.17.dev20250516090339.dist-info}/RECORD +43 -41
- {letta_nightly-0.7.15.dev20250515104317.dist-info → letta_nightly-0.7.17.dev20250516090339.dist-info}/LICENSE +0 -0
- {letta_nightly-0.7.15.dev20250515104317.dist-info → letta_nightly-0.7.17.dev20250516090339.dist-info}/WHEEL +0 -0
- {letta_nightly-0.7.15.dev20250515104317.dist-info → letta_nightly-0.7.17.dev20250516090339.dist-info}/entry_points.txt +0 -0
@@ -108,6 +108,8 @@ class AnthropicStreamingInterface:
|
|
108
108
|
raise
|
109
109
|
|
110
110
|
async def process(self, stream: AsyncStream[BetaRawMessageStreamEvent]) -> AsyncGenerator[LettaMessage, None]:
|
111
|
+
prev_message_type = None
|
112
|
+
message_index = 0
|
111
113
|
try:
|
112
114
|
async with stream:
|
113
115
|
async for event in stream:
|
@@ -137,14 +139,17 @@ class AnthropicStreamingInterface:
|
|
137
139
|
# TODO: Can capture signature, etc.
|
138
140
|
elif isinstance(content, BetaRedactedThinkingBlock):
|
139
141
|
self.anthropic_mode = EventMode.REDACTED_THINKING
|
140
|
-
|
142
|
+
if prev_message_type and prev_message_type != "hidden_reasoning_message":
|
143
|
+
message_index += 1
|
141
144
|
hidden_reasoning_message = HiddenReasoningMessage(
|
142
145
|
id=self.letta_assistant_message_id,
|
143
146
|
state="redacted",
|
144
147
|
hidden_reasoning=content.data,
|
145
148
|
date=datetime.now(timezone.utc).isoformat(),
|
149
|
+
otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
|
146
150
|
)
|
147
151
|
self.reasoning_messages.append(hidden_reasoning_message)
|
152
|
+
prev_message_type = hidden_reasoning_message.message_type
|
148
153
|
yield hidden_reasoning_message
|
149
154
|
|
150
155
|
elif isinstance(event, BetaRawContentBlockDeltaEvent):
|
@@ -175,12 +180,16 @@ class AnthropicStreamingInterface:
|
|
175
180
|
self.partial_tag_buffer = combined_text[-10:] if len(combined_text) > 10 else combined_text
|
176
181
|
self.accumulated_inner_thoughts.append(delta.text)
|
177
182
|
|
183
|
+
if prev_message_type and prev_message_type != "reasoning_message":
|
184
|
+
message_index += 1
|
178
185
|
reasoning_message = ReasoningMessage(
|
179
186
|
id=self.letta_assistant_message_id,
|
180
187
|
reasoning=self.accumulated_inner_thoughts[-1],
|
181
188
|
date=datetime.now(timezone.utc).isoformat(),
|
189
|
+
otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
|
182
190
|
)
|
183
191
|
self.reasoning_messages.append(reasoning_message)
|
192
|
+
prev_message_type = reasoning_message.message_type
|
184
193
|
yield reasoning_message
|
185
194
|
|
186
195
|
elif isinstance(delta, BetaInputJSONDelta):
|
@@ -198,21 +207,30 @@ class AnthropicStreamingInterface:
|
|
198
207
|
inner_thoughts_diff = current_inner_thoughts[len(previous_inner_thoughts) :]
|
199
208
|
|
200
209
|
if inner_thoughts_diff:
|
210
|
+
if prev_message_type and prev_message_type != "reasoning_message":
|
211
|
+
message_index += 1
|
201
212
|
reasoning_message = ReasoningMessage(
|
202
213
|
id=self.letta_assistant_message_id,
|
203
214
|
reasoning=inner_thoughts_diff,
|
204
215
|
date=datetime.now(timezone.utc).isoformat(),
|
216
|
+
otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
|
205
217
|
)
|
206
218
|
self.reasoning_messages.append(reasoning_message)
|
219
|
+
prev_message_type = reasoning_message.message_type
|
207
220
|
yield reasoning_message
|
208
221
|
|
209
222
|
# Check if inner thoughts are complete - if so, flush the buffer
|
210
223
|
if not self.inner_thoughts_complete and self._check_inner_thoughts_complete(self.accumulated_tool_call_args):
|
211
224
|
self.inner_thoughts_complete = True
|
212
225
|
# Flush all buffered tool call messages
|
213
|
-
|
214
|
-
|
215
|
-
|
226
|
+
if len(self.tool_call_buffer) > 0:
|
227
|
+
if prev_message_type and prev_message_type != "tool_call_message":
|
228
|
+
message_index += 1
|
229
|
+
for buffered_msg in self.tool_call_buffer:
|
230
|
+
buffered_msg.otid = Message.generate_otid_from_id(self.letta_tool_message_id, message_index)
|
231
|
+
prev_message_type = buffered_msg.message_type
|
232
|
+
yield buffered_msg
|
233
|
+
self.tool_call_buffer = []
|
216
234
|
|
217
235
|
# Start detecting special case of "send_message"
|
218
236
|
if self.tool_call_name == DEFAULT_MESSAGE_TOOL and self.use_assistant_message:
|
@@ -222,11 +240,16 @@ class AnthropicStreamingInterface:
|
|
222
240
|
|
223
241
|
# Only stream out if it's not an empty string
|
224
242
|
if send_message_diff:
|
225
|
-
|
243
|
+
if prev_message_type and prev_message_type != "assistant_message":
|
244
|
+
message_index += 1
|
245
|
+
assistant_msg = AssistantMessage(
|
226
246
|
id=self.letta_assistant_message_id,
|
227
247
|
content=[TextContent(text=send_message_diff)],
|
228
248
|
date=datetime.now(timezone.utc).isoformat(),
|
249
|
+
otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
|
229
250
|
)
|
251
|
+
prev_message_type = assistant_msg.message_type
|
252
|
+
yield assistant_msg
|
230
253
|
else:
|
231
254
|
# Otherwise, it is a normal tool call - buffer or yield based on inner thoughts status
|
232
255
|
tool_call_msg = ToolCallMessage(
|
@@ -234,8 +257,11 @@ class AnthropicStreamingInterface:
|
|
234
257
|
tool_call=ToolCallDelta(arguments=delta.partial_json),
|
235
258
|
date=datetime.now(timezone.utc).isoformat(),
|
236
259
|
)
|
237
|
-
|
238
260
|
if self.inner_thoughts_complete:
|
261
|
+
if prev_message_type and prev_message_type != "tool_call_message":
|
262
|
+
message_index += 1
|
263
|
+
tool_call_msg.otid = Message.generate_otid_from_id(self.letta_tool_message_id, message_index)
|
264
|
+
prev_message_type = tool_call_msg.message_type
|
239
265
|
yield tool_call_msg
|
240
266
|
else:
|
241
267
|
self.tool_call_buffer.append(tool_call_msg)
|
@@ -249,13 +275,17 @@ class AnthropicStreamingInterface:
|
|
249
275
|
f"Streaming integrity failed - received BetaThinkingBlock object while not in THINKING EventMode: {delta}"
|
250
276
|
)
|
251
277
|
|
278
|
+
if prev_message_type and prev_message_type != "reasoning_message":
|
279
|
+
message_index += 1
|
252
280
|
reasoning_message = ReasoningMessage(
|
253
281
|
id=self.letta_assistant_message_id,
|
254
282
|
source="reasoner_model",
|
255
283
|
reasoning=delta.thinking,
|
256
284
|
date=datetime.now(timezone.utc).isoformat(),
|
285
|
+
otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
|
257
286
|
)
|
258
287
|
self.reasoning_messages.append(reasoning_message)
|
288
|
+
prev_message_type = reasoning_message.message_type
|
259
289
|
yield reasoning_message
|
260
290
|
elif isinstance(delta, BetaSignatureDelta):
|
261
291
|
# Safety check
|
@@ -264,14 +294,18 @@ class AnthropicStreamingInterface:
|
|
264
294
|
f"Streaming integrity failed - received BetaSignatureDelta object while not in THINKING EventMode: {delta}"
|
265
295
|
)
|
266
296
|
|
297
|
+
if prev_message_type and prev_message_type != "reasoning_message":
|
298
|
+
message_index += 1
|
267
299
|
reasoning_message = ReasoningMessage(
|
268
300
|
id=self.letta_assistant_message_id,
|
269
301
|
source="reasoner_model",
|
270
302
|
reasoning="",
|
271
303
|
date=datetime.now(timezone.utc).isoformat(),
|
272
304
|
signature=delta.signature,
|
305
|
+
otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
|
273
306
|
)
|
274
307
|
self.reasoning_messages.append(reasoning_message)
|
308
|
+
prev_message_type = reasoning_message.message_type
|
275
309
|
yield reasoning_message
|
276
310
|
elif isinstance(event, BetaRawMessageStartEvent):
|
277
311
|
self.message_id = event.message.id
|
@@ -0,0 +1,303 @@
|
|
1
|
+
from datetime import datetime, timezone
|
2
|
+
from typing import AsyncGenerator, List, Optional
|
3
|
+
|
4
|
+
from openai import AsyncStream
|
5
|
+
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
6
|
+
|
7
|
+
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
8
|
+
from letta.schemas.letta_message import AssistantMessage, LettaMessage, ReasoningMessage, ToolCallDelta, ToolCallMessage
|
9
|
+
from letta.schemas.letta_message_content import TextContent
|
10
|
+
from letta.schemas.message import Message
|
11
|
+
from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
|
12
|
+
from letta.server.rest_api.json_parser import OptimisticJSONParser
|
13
|
+
from letta.streaming_utils import JSONInnerThoughtsExtractor
|
14
|
+
|
15
|
+
|
16
|
+
class OpenAIStreamingInterface:
|
17
|
+
"""
|
18
|
+
Encapsulates the logic for streaming responses from OpenAI.
|
19
|
+
This class handles parsing of partial tokens, pre-execution messages,
|
20
|
+
and detection of tool call events.
|
21
|
+
"""
|
22
|
+
|
23
|
+
def __init__(self, use_assistant_message: bool = False, put_inner_thoughts_in_kwarg: bool = False):
|
24
|
+
self.use_assistant_message = use_assistant_message
|
25
|
+
self.assistant_message_tool_name = DEFAULT_MESSAGE_TOOL
|
26
|
+
self.assistant_message_tool_kwarg = DEFAULT_MESSAGE_TOOL_KWARG
|
27
|
+
|
28
|
+
self.optimistic_json_parser: OptimisticJSONParser = OptimisticJSONParser()
|
29
|
+
self.function_args_reader = JSONInnerThoughtsExtractor(wait_for_first_key=True) # TODO: pass in kward
|
30
|
+
self.function_name_buffer = None
|
31
|
+
self.function_args_buffer = None
|
32
|
+
self.function_id_buffer = None
|
33
|
+
self.last_flushed_function_name = None
|
34
|
+
|
35
|
+
# Buffer to hold function arguments until inner thoughts are complete
|
36
|
+
self.current_function_arguments = ""
|
37
|
+
self.current_json_parse_result = {}
|
38
|
+
|
39
|
+
# Premake IDs for database writes
|
40
|
+
self.letta_assistant_message_id = Message.generate_id()
|
41
|
+
self.letta_tool_message_id = Message.generate_id()
|
42
|
+
|
43
|
+
# token counters
|
44
|
+
self.input_tokens = 0
|
45
|
+
self.output_tokens = 0
|
46
|
+
|
47
|
+
self.content_buffer: List[str] = []
|
48
|
+
self.tool_call_name: Optional[str] = None
|
49
|
+
self.tool_call_id: Optional[str] = None
|
50
|
+
self.reasoning_messages = []
|
51
|
+
|
52
|
+
def get_reasoning_content(self) -> List[TextContent]:
|
53
|
+
content = "".join(self.reasoning_messages)
|
54
|
+
return [TextContent(text=content)]
|
55
|
+
|
56
|
+
def get_tool_call_object(self) -> ToolCall:
|
57
|
+
"""Useful for agent loop"""
|
58
|
+
return ToolCall(
|
59
|
+
id=self.letta_tool_message_id,
|
60
|
+
function=FunctionCall(arguments=self.current_function_arguments, name=self.last_flushed_function_name),
|
61
|
+
)
|
62
|
+
|
63
|
+
async def process(self, stream: AsyncStream[ChatCompletionChunk]) -> AsyncGenerator[LettaMessage, None]:
|
64
|
+
"""
|
65
|
+
Iterates over the OpenAI stream, yielding SSE events.
|
66
|
+
It also collects tokens and detects if a tool call is triggered.
|
67
|
+
"""
|
68
|
+
async with stream:
|
69
|
+
prev_message_type = None
|
70
|
+
message_index = 0
|
71
|
+
async for chunk in stream:
|
72
|
+
# track usage
|
73
|
+
if chunk.usage:
|
74
|
+
self.input_tokens += len(chunk.usage.prompt_tokens)
|
75
|
+
self.output_tokens += len(chunk.usage.completion_tokens)
|
76
|
+
|
77
|
+
if chunk.choices:
|
78
|
+
choice = chunk.choices[0]
|
79
|
+
message_delta = choice.delta
|
80
|
+
|
81
|
+
if message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
|
82
|
+
tool_call = message_delta.tool_calls[0]
|
83
|
+
|
84
|
+
if tool_call.function.name:
|
85
|
+
# If we're waiting for the first key, then we should hold back the name
|
86
|
+
# ie add it to a buffer instead of returning it as a chunk
|
87
|
+
if self.function_name_buffer is None:
|
88
|
+
self.function_name_buffer = tool_call.function.name
|
89
|
+
else:
|
90
|
+
self.function_name_buffer += tool_call.function.name
|
91
|
+
|
92
|
+
if tool_call.id:
|
93
|
+
# Buffer until next time
|
94
|
+
if self.function_id_buffer is None:
|
95
|
+
self.function_id_buffer = tool_call.id
|
96
|
+
else:
|
97
|
+
self.function_id_buffer += tool_call.id
|
98
|
+
|
99
|
+
if tool_call.function.arguments:
|
100
|
+
# updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
|
101
|
+
self.current_function_arguments += tool_call.function.arguments
|
102
|
+
updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(
|
103
|
+
tool_call.function.arguments
|
104
|
+
)
|
105
|
+
|
106
|
+
# If we have inner thoughts, we should output them as a chunk
|
107
|
+
if updates_inner_thoughts:
|
108
|
+
if prev_message_type and prev_message_type != "reasoning_message":
|
109
|
+
message_index += 1
|
110
|
+
self.reasoning_messages.append(updates_inner_thoughts)
|
111
|
+
reasoning_message = ReasoningMessage(
|
112
|
+
id=self.letta_tool_message_id,
|
113
|
+
date=datetime.now(timezone.utc),
|
114
|
+
reasoning=updates_inner_thoughts,
|
115
|
+
# name=name,
|
116
|
+
otid=Message.generate_otid_from_id(self.letta_tool_message_id, message_index),
|
117
|
+
)
|
118
|
+
prev_message_type = reasoning_message.message_type
|
119
|
+
yield reasoning_message
|
120
|
+
|
121
|
+
# Additionally inner thoughts may stream back with a chunk of main JSON
|
122
|
+
# In that case, since we can only return a chunk at a time, we should buffer it
|
123
|
+
if updates_main_json:
|
124
|
+
if self.function_args_buffer is None:
|
125
|
+
self.function_args_buffer = updates_main_json
|
126
|
+
else:
|
127
|
+
self.function_args_buffer += updates_main_json
|
128
|
+
|
129
|
+
# If we have main_json, we should output a ToolCallMessage
|
130
|
+
elif updates_main_json:
|
131
|
+
|
132
|
+
# If there's something in the function_name buffer, we should release it first
|
133
|
+
# NOTE: we could output it as part of a chunk that has both name and args,
|
134
|
+
# however the frontend may expect name first, then args, so to be
|
135
|
+
# safe we'll output name first in a separate chunk
|
136
|
+
if self.function_name_buffer:
|
137
|
+
|
138
|
+
# use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
|
139
|
+
if self.use_assistant_message and self.function_name_buffer == self.assistant_message_tool_name:
|
140
|
+
|
141
|
+
# Store the ID of the tool call so allow skipping the corresponding response
|
142
|
+
if self.function_id_buffer:
|
143
|
+
self.prev_assistant_message_id = self.function_id_buffer
|
144
|
+
|
145
|
+
else:
|
146
|
+
if prev_message_type and prev_message_type != "tool_call_message":
|
147
|
+
message_index += 1
|
148
|
+
self.tool_call_name = str(self.function_name_buffer)
|
149
|
+
tool_call_msg = ToolCallMessage(
|
150
|
+
id=self.letta_tool_message_id,
|
151
|
+
date=datetime.now(timezone.utc),
|
152
|
+
tool_call=ToolCallDelta(
|
153
|
+
name=self.function_name_buffer,
|
154
|
+
arguments=None,
|
155
|
+
tool_call_id=self.function_id_buffer,
|
156
|
+
),
|
157
|
+
otid=Message.generate_otid_from_id(self.letta_tool_message_id, message_index),
|
158
|
+
)
|
159
|
+
prev_message_type = tool_call_msg.message_type
|
160
|
+
yield tool_call_msg
|
161
|
+
|
162
|
+
# Record what the last function name we flushed was
|
163
|
+
self.last_flushed_function_name = self.function_name_buffer
|
164
|
+
# Clear the buffer
|
165
|
+
self.function_name_buffer = None
|
166
|
+
self.function_id_buffer = None
|
167
|
+
# Since we're clearing the name buffer, we should store
|
168
|
+
# any updates to the arguments inside a separate buffer
|
169
|
+
|
170
|
+
# Add any main_json updates to the arguments buffer
|
171
|
+
if self.function_args_buffer is None:
|
172
|
+
self.function_args_buffer = updates_main_json
|
173
|
+
else:
|
174
|
+
self.function_args_buffer += updates_main_json
|
175
|
+
|
176
|
+
# If there was nothing in the name buffer, we can proceed to
|
177
|
+
# output the arguments chunk as a ToolCallMessage
|
178
|
+
else:
|
179
|
+
|
180
|
+
# use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
|
181
|
+
if self.use_assistant_message and (
|
182
|
+
self.last_flushed_function_name is not None
|
183
|
+
and self.last_flushed_function_name == self.assistant_message_tool_name
|
184
|
+
):
|
185
|
+
# do an additional parse on the updates_main_json
|
186
|
+
if self.function_args_buffer:
|
187
|
+
updates_main_json = self.function_args_buffer + updates_main_json
|
188
|
+
self.function_args_buffer = None
|
189
|
+
|
190
|
+
# Pretty gross hardcoding that assumes that if we're toggling into the keywords, we have the full prefix
|
191
|
+
match_str = '{"' + self.assistant_message_tool_kwarg + '":"'
|
192
|
+
if updates_main_json == match_str:
|
193
|
+
updates_main_json = None
|
194
|
+
|
195
|
+
else:
|
196
|
+
# Some hardcoding to strip off the trailing "}"
|
197
|
+
if updates_main_json in ["}", '"}']:
|
198
|
+
updates_main_json = None
|
199
|
+
if updates_main_json and len(updates_main_json) > 0 and updates_main_json[-1:] == '"':
|
200
|
+
updates_main_json = updates_main_json[:-1]
|
201
|
+
|
202
|
+
if not updates_main_json:
|
203
|
+
# early exit to turn into content mode
|
204
|
+
continue
|
205
|
+
|
206
|
+
# There may be a buffer from a previous chunk, for example
|
207
|
+
# if the previous chunk had arguments but we needed to flush name
|
208
|
+
if self.function_args_buffer:
|
209
|
+
# In this case, we should release the buffer + new data at once
|
210
|
+
combined_chunk = self.function_args_buffer + updates_main_json
|
211
|
+
|
212
|
+
if prev_message_type and prev_message_type != "assistant_message":
|
213
|
+
message_index += 1
|
214
|
+
assistant_message = AssistantMessage(
|
215
|
+
id=self.letta_assistant_message_id,
|
216
|
+
date=datetime.now(timezone.utc),
|
217
|
+
content=combined_chunk,
|
218
|
+
otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
|
219
|
+
)
|
220
|
+
prev_message_type = assistant_message.message_type
|
221
|
+
yield assistant_message
|
222
|
+
# Store the ID of the tool call so allow skipping the corresponding response
|
223
|
+
if self.function_id_buffer:
|
224
|
+
self.prev_assistant_message_id = self.function_id_buffer
|
225
|
+
# clear buffer
|
226
|
+
self.function_args_buffer = None
|
227
|
+
self.function_id_buffer = None
|
228
|
+
|
229
|
+
else:
|
230
|
+
# If there's no buffer to clear, just output a new chunk with new data
|
231
|
+
# TODO: THIS IS HORRIBLE
|
232
|
+
# TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
|
233
|
+
# TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
|
234
|
+
parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
|
235
|
+
|
236
|
+
if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
|
237
|
+
self.assistant_message_tool_kwarg
|
238
|
+
) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
|
239
|
+
new_content = parsed_args.get(self.assistant_message_tool_kwarg)
|
240
|
+
prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
|
241
|
+
# TODO: Assumes consistent state and that prev_content is subset of new_content
|
242
|
+
diff = new_content.replace(prev_content, "", 1)
|
243
|
+
self.current_json_parse_result = parsed_args
|
244
|
+
if prev_message_type and prev_message_type != "assistant_message":
|
245
|
+
message_index += 1
|
246
|
+
assistant_message = AssistantMessage(
|
247
|
+
id=self.letta_assistant_message_id,
|
248
|
+
date=datetime.now(timezone.utc),
|
249
|
+
content=diff,
|
250
|
+
# name=name,
|
251
|
+
otid=Message.generate_otid_from_id(self.letta_assistant_message_id, message_index),
|
252
|
+
)
|
253
|
+
prev_message_type = assistant_message.message_type
|
254
|
+
yield assistant_message
|
255
|
+
|
256
|
+
# Store the ID of the tool call so allow skipping the corresponding response
|
257
|
+
if self.function_id_buffer:
|
258
|
+
self.prev_assistant_message_id = self.function_id_buffer
|
259
|
+
# clear buffers
|
260
|
+
self.function_id_buffer = None
|
261
|
+
else:
|
262
|
+
|
263
|
+
# There may be a buffer from a previous chunk, for example
|
264
|
+
# if the previous chunk had arguments but we needed to flush name
|
265
|
+
if self.function_args_buffer:
|
266
|
+
# In this case, we should release the buffer + new data at once
|
267
|
+
combined_chunk = self.function_args_buffer + updates_main_json
|
268
|
+
if prev_message_type and prev_message_type != "tool_call_message":
|
269
|
+
message_index += 1
|
270
|
+
tool_call_msg = ToolCallMessage(
|
271
|
+
id=self.letta_tool_message_id,
|
272
|
+
date=datetime.now(timezone.utc),
|
273
|
+
tool_call=ToolCallDelta(
|
274
|
+
name=None,
|
275
|
+
arguments=combined_chunk,
|
276
|
+
tool_call_id=self.function_id_buffer,
|
277
|
+
),
|
278
|
+
# name=name,
|
279
|
+
otid=Message.generate_otid_from_id(self.letta_tool_message_id, message_index),
|
280
|
+
)
|
281
|
+
prev_message_type = tool_call_msg.message_type
|
282
|
+
yield tool_call_msg
|
283
|
+
# clear buffer
|
284
|
+
self.function_args_buffer = None
|
285
|
+
self.function_id_buffer = None
|
286
|
+
else:
|
287
|
+
# If there's no buffer to clear, just output a new chunk with new data
|
288
|
+
if prev_message_type and prev_message_type != "tool_call_message":
|
289
|
+
message_index += 1
|
290
|
+
tool_call_msg = ToolCallMessage(
|
291
|
+
id=self.letta_tool_message_id,
|
292
|
+
date=datetime.now(timezone.utc),
|
293
|
+
tool_call=ToolCallDelta(
|
294
|
+
name=None,
|
295
|
+
arguments=updates_main_json,
|
296
|
+
tool_call_id=self.function_id_buffer,
|
297
|
+
),
|
298
|
+
# name=name,
|
299
|
+
otid=Message.generate_otid_from_id(self.letta_tool_message_id, message_index),
|
300
|
+
)
|
301
|
+
prev_message_type = tool_call_msg.message_type
|
302
|
+
yield tool_call_msg
|
303
|
+
self.function_id_buffer = None
|
@@ -180,7 +180,7 @@ async def poll_running_llm_batches(server: "SyncServer") -> List[LettaBatchRespo
|
|
180
180
|
|
181
181
|
try:
|
182
182
|
# 1. Retrieve running batch jobs
|
183
|
-
batches = server.batch_manager.
|
183
|
+
batches = await server.batch_manager.list_running_llm_batches_async()
|
184
184
|
metrics.total_batches = len(batches)
|
185
185
|
|
186
186
|
# TODO: Expand to more providers
|
@@ -220,7 +220,11 @@ async def poll_running_llm_batches(server: "SyncServer") -> List[LettaBatchRespo
|
|
220
220
|
)
|
221
221
|
|
222
222
|
# launch them all at once
|
223
|
-
|
223
|
+
async def get_and_resume(batch_id):
|
224
|
+
batch = await server.batch_manager.get_llm_batch_job_by_id_async(batch_id)
|
225
|
+
return await _resume(batch)
|
226
|
+
|
227
|
+
tasks = [get_and_resume(bid) for bid, *_ in completed]
|
224
228
|
new_batch_responses = await asyncio.gather(*tasks, return_exceptions=True)
|
225
229
|
|
226
230
|
return new_batch_responses
|
letta/orm/agent.py
CHANGED
@@ -2,6 +2,7 @@ import uuid
|
|
2
2
|
from typing import TYPE_CHECKING, List, Optional, Set
|
3
3
|
|
4
4
|
from sqlalchemy import JSON, Boolean, Index, String
|
5
|
+
from sqlalchemy.ext.asyncio import AsyncAttrs
|
5
6
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
6
7
|
|
7
8
|
from letta.orm.block import Block
|
@@ -26,7 +27,7 @@ if TYPE_CHECKING:
|
|
26
27
|
from letta.orm.tool import Tool
|
27
28
|
|
28
29
|
|
29
|
-
class Agent(SqlalchemyBase, OrganizationMixin):
|
30
|
+
class Agent(SqlalchemyBase, OrganizationMixin, AsyncAttrs):
|
30
31
|
__tablename__ = "agents"
|
31
32
|
__pydantic_model__ = PydanticAgentState
|
32
33
|
__table_args__ = (Index("ix_agents_created_at", "created_at", "id"),)
|
@@ -200,3 +201,103 @@ class Agent(SqlalchemyBase, OrganizationMixin):
|
|
200
201
|
state[field_name] = resolver()
|
201
202
|
|
202
203
|
return self.__pydantic_model__(**state)
|
204
|
+
|
205
|
+
async def to_pydantic_async(self, include_relationships: Optional[Set[str]] = None) -> PydanticAgentState:
|
206
|
+
"""
|
207
|
+
Converts the SQLAlchemy Agent model into its Pydantic counterpart.
|
208
|
+
|
209
|
+
The following base fields are always included:
|
210
|
+
- id, agent_type, name, description, system, message_ids, metadata_,
|
211
|
+
llm_config, embedding_config, project_id, template_id, base_template_id,
|
212
|
+
tool_rules, message_buffer_autoclear, tags
|
213
|
+
|
214
|
+
Everything else (e.g., tools, sources, memory, etc.) is optional and only
|
215
|
+
included if specified in `include_fields`.
|
216
|
+
|
217
|
+
Args:
|
218
|
+
include_relationships (Optional[Set[str]]):
|
219
|
+
A set of additional field names to include in the output. If None or empty,
|
220
|
+
no extra fields are loaded beyond the base fields.
|
221
|
+
|
222
|
+
Returns:
|
223
|
+
PydanticAgentState: The Pydantic representation of the agent.
|
224
|
+
"""
|
225
|
+
# Base fields: always included
|
226
|
+
state = {
|
227
|
+
"id": self.id,
|
228
|
+
"agent_type": self.agent_type,
|
229
|
+
"name": self.name,
|
230
|
+
"description": self.description,
|
231
|
+
"system": self.system,
|
232
|
+
"message_ids": self.message_ids,
|
233
|
+
"metadata": self.metadata_, # Exposed as 'metadata' to Pydantic
|
234
|
+
"llm_config": self.llm_config,
|
235
|
+
"embedding_config": self.embedding_config,
|
236
|
+
"project_id": self.project_id,
|
237
|
+
"template_id": self.template_id,
|
238
|
+
"base_template_id": self.base_template_id,
|
239
|
+
"tool_rules": self.tool_rules,
|
240
|
+
"message_buffer_autoclear": self.message_buffer_autoclear,
|
241
|
+
"created_by_id": self.created_by_id,
|
242
|
+
"last_updated_by_id": self.last_updated_by_id,
|
243
|
+
"created_at": self.created_at,
|
244
|
+
"updated_at": self.updated_at,
|
245
|
+
# optional field defaults
|
246
|
+
"tags": [],
|
247
|
+
"tools": [],
|
248
|
+
"sources": [],
|
249
|
+
"memory": Memory(blocks=[]),
|
250
|
+
"identity_ids": [],
|
251
|
+
"multi_agent_group": None,
|
252
|
+
"tool_exec_environment_variables": [],
|
253
|
+
"enable_sleeptime": None,
|
254
|
+
"response_format": self.response_format,
|
255
|
+
}
|
256
|
+
optional_fields = {
|
257
|
+
"tags": [],
|
258
|
+
"tools": [],
|
259
|
+
"sources": [],
|
260
|
+
"memory": Memory(blocks=[]),
|
261
|
+
"identity_ids": [],
|
262
|
+
"multi_agent_group": None,
|
263
|
+
"tool_exec_environment_variables": [],
|
264
|
+
"enable_sleeptime": None,
|
265
|
+
"response_format": self.response_format,
|
266
|
+
}
|
267
|
+
|
268
|
+
# Initialize include_relationships to an empty set if it's None
|
269
|
+
include_relationships = set(optional_fields.keys() if include_relationships is None else include_relationships)
|
270
|
+
|
271
|
+
# Only load requested relationships
|
272
|
+
if "tags" in include_relationships:
|
273
|
+
tags = await self.awaitable_attrs.tags
|
274
|
+
state["tags"] = [t.tag for t in tags]
|
275
|
+
|
276
|
+
if "tools" in include_relationships:
|
277
|
+
state["tools"] = await self.awaitable_attrs.tools
|
278
|
+
|
279
|
+
if "sources" in include_relationships:
|
280
|
+
sources = await self.awaitable_attrs.sources
|
281
|
+
state["sources"] = [s.to_pydantic() for s in sources]
|
282
|
+
|
283
|
+
if "memory" in include_relationships:
|
284
|
+
memory_blocks = await self.awaitable_attrs.core_memory
|
285
|
+
state["memory"] = Memory(
|
286
|
+
blocks=[b.to_pydantic() for b in memory_blocks],
|
287
|
+
prompt_template=get_prompt_template_for_agent_type(self.agent_type),
|
288
|
+
)
|
289
|
+
|
290
|
+
if "identity_ids" in include_relationships:
|
291
|
+
identities = await self.awaitable_attrs.identities
|
292
|
+
state["identity_ids"] = [i.id for i in identities]
|
293
|
+
|
294
|
+
if "multi_agent_group" in include_relationships:
|
295
|
+
state["multi_agent_group"] = await self.awaitable_attrs.multi_agent_group
|
296
|
+
|
297
|
+
if "tool_exec_environment_variables" in include_relationships:
|
298
|
+
state["tool_exec_environment_variables"] = await self.awaitable_attrs.tool_exec_environment_variables
|
299
|
+
|
300
|
+
if "enable_sleeptime" in include_relationships:
|
301
|
+
state["enable_sleeptime"] = await self.awaitable_attrs.enable_sleeptime
|
302
|
+
|
303
|
+
return self.__pydantic_model__(**state)
|
letta/orm/block.py
CHANGED
@@ -39,6 +39,9 @@ class Block(OrganizationMixin, SqlalchemyBase):
|
|
39
39
|
limit: Mapped[BigInteger] = mapped_column(Integer, default=CORE_MEMORY_BLOCK_CHAR_LIMIT, doc="Character limit of the block.")
|
40
40
|
metadata_: Mapped[Optional[dict]] = mapped_column(JSON, default={}, doc="arbitrary information related to the block.")
|
41
41
|
|
42
|
+
# permissions of the agent
|
43
|
+
read_only: Mapped[bool] = mapped_column(doc="whether the agent has read-only access to the block", default=False)
|
44
|
+
|
42
45
|
# history pointers / locking mechanisms
|
43
46
|
current_history_entry_id: Mapped[Optional[str]] = mapped_column(
|
44
47
|
String, ForeignKey("block_history.id", name="fk_block_current_history_entry", use_alter=True), nullable=True, index=True
|