letta-nightly 0.6.48.dev20250407104216__py3-none-any.whl → 0.6.49.dev20250408030511__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -1
- letta/agent.py +47 -12
- letta/agents/base_agent.py +7 -4
- letta/agents/helpers.py +52 -0
- letta/agents/letta_agent.py +105 -42
- letta/agents/voice_agent.py +2 -2
- letta/constants.py +13 -1
- letta/errors.py +10 -3
- letta/functions/function_sets/base.py +65 -0
- letta/functions/interface.py +2 -2
- letta/functions/mcp_client/base_client.py +18 -1
- letta/{dynamic_multi_agent.py → groups/dynamic_multi_agent.py} +3 -0
- letta/groups/helpers.py +113 -0
- letta/{round_robin_multi_agent.py → groups/round_robin_multi_agent.py} +2 -0
- letta/groups/sleeptime_multi_agent.py +259 -0
- letta/{supervisor_multi_agent.py → groups/supervisor_multi_agent.py} +1 -0
- letta/helpers/converters.py +109 -7
- letta/helpers/message_helper.py +1 -0
- letta/helpers/tool_rule_solver.py +40 -23
- letta/interface.py +12 -5
- letta/interfaces/anthropic_streaming_interface.py +329 -0
- letta/llm_api/anthropic.py +12 -1
- letta/llm_api/anthropic_client.py +65 -14
- letta/llm_api/azure_openai.py +2 -2
- letta/llm_api/google_ai_client.py +13 -2
- letta/llm_api/google_constants.py +3 -0
- letta/llm_api/google_vertex_client.py +2 -2
- letta/llm_api/llm_api_tools.py +1 -1
- letta/llm_api/llm_client.py +7 -0
- letta/llm_api/llm_client_base.py +2 -7
- letta/llm_api/openai.py +7 -1
- letta/llm_api/openai_client.py +250 -0
- letta/orm/__init__.py +4 -0
- letta/orm/agent.py +6 -0
- letta/orm/block.py +32 -2
- letta/orm/block_history.py +46 -0
- letta/orm/custom_columns.py +60 -0
- letta/orm/enums.py +7 -0
- letta/orm/group.py +6 -0
- letta/orm/groups_blocks.py +13 -0
- letta/orm/llm_batch_items.py +55 -0
- letta/orm/llm_batch_job.py +48 -0
- letta/orm/message.py +7 -1
- letta/orm/organization.py +2 -0
- letta/orm/sqlalchemy_base.py +18 -15
- letta/prompts/system/memgpt_sleeptime_chat.txt +52 -0
- letta/prompts/system/sleeptime.txt +26 -0
- letta/schemas/agent.py +13 -1
- letta/schemas/enums.py +17 -2
- letta/schemas/group.py +14 -1
- letta/schemas/letta_message.py +5 -3
- letta/schemas/llm_batch_job.py +53 -0
- letta/schemas/llm_config.py +14 -4
- letta/schemas/message.py +44 -0
- letta/schemas/tool.py +3 -0
- letta/schemas/usage.py +1 -0
- letta/server/db.py +2 -0
- letta/server/rest_api/app.py +1 -1
- letta/server/rest_api/chat_completions_interface.py +8 -3
- letta/server/rest_api/interface.py +36 -7
- letta/server/rest_api/routers/v1/agents.py +53 -39
- letta/server/rest_api/routers/v1/runs.py +14 -2
- letta/server/rest_api/utils.py +15 -4
- letta/server/server.py +120 -71
- letta/services/agent_manager.py +70 -6
- letta/services/block_manager.py +190 -2
- letta/services/group_manager.py +68 -0
- letta/services/helpers/agent_manager_helper.py +6 -4
- letta/services/llm_batch_manager.py +139 -0
- letta/services/message_manager.py +17 -31
- letta/services/tool_executor/tool_execution_sandbox.py +1 -3
- letta/services/tool_executor/tool_executor.py +9 -20
- letta/services/tool_manager.py +14 -3
- letta/services/tool_sandbox/__init__.py +0 -0
- letta/services/tool_sandbox/base.py +188 -0
- letta/services/tool_sandbox/e2b_sandbox.py +116 -0
- letta/services/tool_sandbox/local_sandbox.py +221 -0
- letta/sleeptime_agent.py +61 -0
- letta/streaming_interface.py +20 -10
- letta/utils.py +4 -0
- {letta_nightly-0.6.48.dev20250407104216.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/METADATA +2 -2
- {letta_nightly-0.6.48.dev20250407104216.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/RECORD +85 -69
- letta/offline_memory_agent.py +0 -173
- letta/services/tool_executor/async_tool_execution_sandbox.py +0 -397
- {letta_nightly-0.6.48.dev20250407104216.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/LICENSE +0 -0
- {letta_nightly-0.6.48.dev20250407104216.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/WHEEL +0 -0
- {letta_nightly-0.6.48.dev20250407104216.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/entry_points.txt +0 -0
letta/schemas/message.py
CHANGED
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import copy
|
|
4
4
|
import json
|
|
5
|
+
import uuid
|
|
5
6
|
import warnings
|
|
6
7
|
from collections import OrderedDict
|
|
7
8
|
from datetime import datetime, timezone
|
|
@@ -78,6 +79,7 @@ class MessageCreate(BaseModel):
|
|
|
78
79
|
json_schema_extra=get_letta_message_content_union_str_json_schema(),
|
|
79
80
|
)
|
|
80
81
|
name: Optional[str] = Field(None, description="The name of the participant.")
|
|
82
|
+
otid: Optional[str] = Field(None, description="The offline threading id associated with this message")
|
|
81
83
|
|
|
82
84
|
def model_dump(self, to_orm: bool = False, **kwargs) -> Dict[str, Any]:
|
|
83
85
|
data = super().model_dump(**kwargs)
|
|
@@ -168,12 +170,17 @@ class Message(BaseMessage):
|
|
|
168
170
|
json_message["created_at"] = self.created_at.isoformat()
|
|
169
171
|
return json_message
|
|
170
172
|
|
|
173
|
+
@staticmethod
|
|
174
|
+
def generate_otid():
|
|
175
|
+
return str(uuid.uuid4())
|
|
176
|
+
|
|
171
177
|
@staticmethod
|
|
172
178
|
def to_letta_messages_from_list(
|
|
173
179
|
messages: List[Message],
|
|
174
180
|
use_assistant_message: bool = True,
|
|
175
181
|
assistant_message_tool_name: str = DEFAULT_MESSAGE_TOOL,
|
|
176
182
|
assistant_message_tool_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
|
|
183
|
+
reverse: bool = True,
|
|
177
184
|
) -> List[LettaMessage]:
|
|
178
185
|
if use_assistant_message:
|
|
179
186
|
message_ids_to_remove = []
|
|
@@ -203,6 +210,7 @@ class Message(BaseMessage):
|
|
|
203
210
|
use_assistant_message=use_assistant_message,
|
|
204
211
|
assistant_message_tool_name=assistant_message_tool_name,
|
|
205
212
|
assistant_message_tool_kwarg=assistant_message_tool_kwarg,
|
|
213
|
+
reverse=reverse,
|
|
206
214
|
)
|
|
207
215
|
]
|
|
208
216
|
|
|
@@ -211,6 +219,7 @@ class Message(BaseMessage):
|
|
|
211
219
|
use_assistant_message: bool = False,
|
|
212
220
|
assistant_message_tool_name: str = DEFAULT_MESSAGE_TOOL,
|
|
213
221
|
assistant_message_tool_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
|
|
222
|
+
reverse: bool = True,
|
|
214
223
|
) -> List[LettaMessage]:
|
|
215
224
|
"""Convert message object (in DB format) to the style used by the original Letta API"""
|
|
216
225
|
messages = []
|
|
@@ -221,18 +230,21 @@ class Message(BaseMessage):
|
|
|
221
230
|
if self.content:
|
|
222
231
|
# Check for ReACT-style COT inside of TextContent
|
|
223
232
|
if len(self.content) == 1 and isinstance(self.content[0], TextContent):
|
|
233
|
+
otid = Message.generate_otid_from_id(self.id, len(messages))
|
|
224
234
|
messages.append(
|
|
225
235
|
ReasoningMessage(
|
|
226
236
|
id=self.id,
|
|
227
237
|
date=self.created_at,
|
|
228
238
|
reasoning=self.content[0].text,
|
|
229
239
|
name=self.name,
|
|
240
|
+
otid=otid,
|
|
230
241
|
)
|
|
231
242
|
)
|
|
232
243
|
# Otherwise, we may have a list of multiple types
|
|
233
244
|
else:
|
|
234
245
|
# TODO we can probably collapse these two cases into a single loop
|
|
235
246
|
for content_part in self.content:
|
|
247
|
+
otid = Message.generate_otid_from_id(self.id, len(messages))
|
|
236
248
|
if isinstance(content_part, TextContent):
|
|
237
249
|
# COT
|
|
238
250
|
messages.append(
|
|
@@ -241,6 +253,7 @@ class Message(BaseMessage):
|
|
|
241
253
|
date=self.created_at,
|
|
242
254
|
reasoning=content_part.text,
|
|
243
255
|
name=self.name,
|
|
256
|
+
otid=otid,
|
|
244
257
|
)
|
|
245
258
|
)
|
|
246
259
|
elif isinstance(content_part, ReasoningContent):
|
|
@@ -253,6 +266,7 @@ class Message(BaseMessage):
|
|
|
253
266
|
source="reasoner_model", # TODO do we want to tag like this?
|
|
254
267
|
signature=content_part.signature,
|
|
255
268
|
name=self.name,
|
|
269
|
+
otid=otid,
|
|
256
270
|
)
|
|
257
271
|
)
|
|
258
272
|
elif isinstance(content_part, RedactedReasoningContent):
|
|
@@ -264,6 +278,7 @@ class Message(BaseMessage):
|
|
|
264
278
|
state="redacted",
|
|
265
279
|
hidden_reasoning=content_part.data,
|
|
266
280
|
name=self.name,
|
|
281
|
+
otid=otid,
|
|
267
282
|
)
|
|
268
283
|
)
|
|
269
284
|
else:
|
|
@@ -272,6 +287,7 @@ class Message(BaseMessage):
|
|
|
272
287
|
if self.tool_calls is not None:
|
|
273
288
|
# This is type FunctionCall
|
|
274
289
|
for tool_call in self.tool_calls:
|
|
290
|
+
otid = Message.generate_otid_from_id(self.id, len(messages))
|
|
275
291
|
# If we're supporting using assistant message,
|
|
276
292
|
# then we want to treat certain function calls as a special case
|
|
277
293
|
if use_assistant_message and tool_call.function.name == assistant_message_tool_name:
|
|
@@ -287,6 +303,7 @@ class Message(BaseMessage):
|
|
|
287
303
|
date=self.created_at,
|
|
288
304
|
content=message_string,
|
|
289
305
|
name=self.name,
|
|
306
|
+
otid=otid,
|
|
290
307
|
)
|
|
291
308
|
)
|
|
292
309
|
else:
|
|
@@ -300,6 +317,7 @@ class Message(BaseMessage):
|
|
|
300
317
|
tool_call_id=tool_call.id,
|
|
301
318
|
),
|
|
302
319
|
name=self.name,
|
|
320
|
+
otid=otid,
|
|
303
321
|
)
|
|
304
322
|
)
|
|
305
323
|
elif self.role == MessageRole.tool:
|
|
@@ -341,6 +359,7 @@ class Message(BaseMessage):
|
|
|
341
359
|
stdout=self.tool_returns[0].stdout if self.tool_returns else None,
|
|
342
360
|
stderr=self.tool_returns[0].stderr if self.tool_returns else None,
|
|
343
361
|
name=self.name,
|
|
362
|
+
otid=self.id.replace("message-", ""),
|
|
344
363
|
)
|
|
345
364
|
)
|
|
346
365
|
elif self.role == MessageRole.user:
|
|
@@ -357,6 +376,7 @@ class Message(BaseMessage):
|
|
|
357
376
|
date=self.created_at,
|
|
358
377
|
content=message_str or text_content,
|
|
359
378
|
name=self.name,
|
|
379
|
+
otid=self.otid,
|
|
360
380
|
)
|
|
361
381
|
)
|
|
362
382
|
elif self.role == MessageRole.system:
|
|
@@ -372,11 +392,15 @@ class Message(BaseMessage):
|
|
|
372
392
|
date=self.created_at,
|
|
373
393
|
content=text_content,
|
|
374
394
|
name=self.name,
|
|
395
|
+
otid=self.otid,
|
|
375
396
|
)
|
|
376
397
|
)
|
|
377
398
|
else:
|
|
378
399
|
raise ValueError(self.role)
|
|
379
400
|
|
|
401
|
+
if reverse:
|
|
402
|
+
messages.reverse()
|
|
403
|
+
|
|
380
404
|
return messages
|
|
381
405
|
|
|
382
406
|
@staticmethod
|
|
@@ -670,6 +694,9 @@ class Message(BaseMessage):
|
|
|
670
694
|
|
|
671
695
|
def add_xml_tag(string: str, xml_tag: Optional[str]):
|
|
672
696
|
# NOTE: Anthropic docs recommends using <thinking> tag when using CoT + tool use
|
|
697
|
+
if f"<{xml_tag}>" in string and f"</{xml_tag}>" in string:
|
|
698
|
+
# don't nest if tags already exist
|
|
699
|
+
return string
|
|
673
700
|
return f"<{xml_tag}>{string}</{xml_tag}" if xml_tag else string
|
|
674
701
|
|
|
675
702
|
if self.role == "system":
|
|
@@ -988,6 +1015,23 @@ class Message(BaseMessage):
|
|
|
988
1015
|
|
|
989
1016
|
return cohere_message
|
|
990
1017
|
|
|
1018
|
+
@staticmethod
|
|
1019
|
+
def generate_otid_from_id(message_id: str, index: int) -> str:
|
|
1020
|
+
"""
|
|
1021
|
+
Convert message id to bits and change the list bit to the index
|
|
1022
|
+
"""
|
|
1023
|
+
if not 0 <= index < 128:
|
|
1024
|
+
raise ValueError("Index must be between 0 and 127")
|
|
1025
|
+
|
|
1026
|
+
message_uuid = message_id.replace("message-", "")
|
|
1027
|
+
uuid_int = int(message_uuid.replace("-", ""), 16)
|
|
1028
|
+
|
|
1029
|
+
# Clear last 7 bits and set them to index; supports up to 128 unique indices
|
|
1030
|
+
uuid_int = (uuid_int & ~0x7F) | (index & 0x7F)
|
|
1031
|
+
|
|
1032
|
+
hex_str = f"{uuid_int:032x}"
|
|
1033
|
+
return f"{hex_str[:8]}-{hex_str[8:12]}-{hex_str[12:16]}-{hex_str[16:20]}-{hex_str[20:]}"
|
|
1034
|
+
|
|
991
1035
|
|
|
992
1036
|
class ToolReturn(BaseModel):
|
|
993
1037
|
status: Literal["success", "error"] = Field(..., description="The status of the tool call")
|
letta/schemas/tool.py
CHANGED
|
@@ -104,6 +104,9 @@ class Tool(BaseTool):
|
|
|
104
104
|
elif self.tool_type in {ToolType.LETTA_MULTI_AGENT_CORE}:
|
|
105
105
|
# If it's letta multi-agent tool, we also generate the json_schema on the fly here
|
|
106
106
|
self.json_schema = get_json_schema_from_module(module_name=LETTA_MULTI_AGENT_TOOL_MODULE_NAME, function_name=self.name)
|
|
107
|
+
elif self.tool_type in {ToolType.LETTA_SLEEPTIME_CORE}:
|
|
108
|
+
# If it's letta sleeptime core tool, we generate the json_schema on the fly here
|
|
109
|
+
self.json_schema = get_json_schema_from_module(module_name=LETTA_CORE_TOOL_MODULE_NAME, function_name=self.name)
|
|
107
110
|
|
|
108
111
|
# At this point, we need to validate that at least json_schema is populated
|
|
109
112
|
if not self.json_schema:
|
letta/schemas/usage.py
CHANGED
|
@@ -23,3 +23,4 @@ class LettaUsageStatistics(BaseModel):
|
|
|
23
23
|
step_count: int = Field(0, description="The number of steps taken by the agent.")
|
|
24
24
|
# TODO: Optional for now. This field makes everyone's lives easier
|
|
25
25
|
steps_messages: Optional[List[List[Message]]] = Field(None, description="The messages generated per step")
|
|
26
|
+
run_ids: Optional[List[str]] = Field(None, description="The background task run IDs associated with the agent interaction")
|
letta/server/db.py
CHANGED
|
@@ -59,11 +59,13 @@ if settings.letta_pg_uri_no_default:
|
|
|
59
59
|
# create engine
|
|
60
60
|
engine = create_engine(
|
|
61
61
|
settings.letta_pg_uri,
|
|
62
|
+
# f"{settings.letta_pg_uri}?options=-c%20client_encoding=UTF8",
|
|
62
63
|
pool_size=settings.pg_pool_size,
|
|
63
64
|
max_overflow=settings.pg_max_overflow,
|
|
64
65
|
pool_timeout=settings.pg_pool_timeout,
|
|
65
66
|
pool_recycle=settings.pg_pool_recycle,
|
|
66
67
|
echo=settings.pg_echo,
|
|
68
|
+
# connect_args={"client_encoding": "utf8"},
|
|
67
69
|
)
|
|
68
70
|
else:
|
|
69
71
|
# TODO: don't rely on config storage
|
letta/server/rest_api/app.py
CHANGED
|
@@ -139,7 +139,7 @@ def create_application() -> "FastAPI":
|
|
|
139
139
|
|
|
140
140
|
@app.on_event("startup")
|
|
141
141
|
async def configure_executor():
|
|
142
|
-
print(f"Configured event loop executor with {settings.event_loop_threadpool_max_workers} workers.")
|
|
142
|
+
print(f"INFO: Configured event loop executor with {settings.event_loop_threadpool_max_workers} workers.")
|
|
143
143
|
loop = asyncio.get_running_loop()
|
|
144
144
|
executor = concurrent.futures.ThreadPoolExecutor(max_workers=settings.event_loop_threadpool_max_workers)
|
|
145
145
|
loop.set_default_executor(executor)
|
|
@@ -155,7 +155,12 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
|
|
|
155
155
|
return
|
|
156
156
|
|
|
157
157
|
def process_chunk(
|
|
158
|
-
self,
|
|
158
|
+
self,
|
|
159
|
+
chunk: ChatCompletionChunkResponse,
|
|
160
|
+
message_id: str,
|
|
161
|
+
message_date: datetime,
|
|
162
|
+
expect_reasoning_content: bool = False,
|
|
163
|
+
message_index: int = 0,
|
|
159
164
|
) -> None:
|
|
160
165
|
"""
|
|
161
166
|
Called externally with a ChatCompletionChunkResponse. Transforms
|
|
@@ -172,7 +177,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
|
|
|
172
177
|
"""
|
|
173
178
|
return
|
|
174
179
|
|
|
175
|
-
def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None) -> None:
|
|
180
|
+
def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None) -> None:
|
|
176
181
|
"""
|
|
177
182
|
Handle LLM reasoning or internal monologue. Example usage: if you want
|
|
178
183
|
to capture chain-of-thought for debugging in a non-streaming scenario.
|
|
@@ -186,7 +191,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
|
|
|
186
191
|
"""
|
|
187
192
|
return
|
|
188
193
|
|
|
189
|
-
def function_message(self, msg: str, msg_obj: Optional[Message] = None) -> None:
|
|
194
|
+
def function_message(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None) -> None:
|
|
190
195
|
"""
|
|
191
196
|
Handle function-related log messages, typically of the form:
|
|
192
197
|
It's a no-op by default.
|
|
@@ -165,7 +165,7 @@ class QueuingInterface(AgentInterface):
|
|
|
165
165
|
print(vars(msg_obj))
|
|
166
166
|
print(msg_obj.created_at.isoformat())
|
|
167
167
|
|
|
168
|
-
def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None) -> None:
|
|
168
|
+
def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None) -> None:
|
|
169
169
|
"""Handle the agent's internal monologue"""
|
|
170
170
|
assert msg_obj is not None, "QueuingInterface requires msg_obj references for metadata"
|
|
171
171
|
if self.debug:
|
|
@@ -209,7 +209,9 @@ class QueuingInterface(AgentInterface):
|
|
|
209
209
|
|
|
210
210
|
self._queue_push(message_api=new_message, message_obj=msg_obj)
|
|
211
211
|
|
|
212
|
-
def function_message(
|
|
212
|
+
def function_message(
|
|
213
|
+
self, msg: str, msg_obj: Optional[Message] = None, include_ran_messages: bool = False, chunk_index: Optional[int] = None
|
|
214
|
+
) -> None:
|
|
213
215
|
"""Handle the agent calling a function"""
|
|
214
216
|
# TODO handle 'function' messages that indicate the start of a function call
|
|
215
217
|
assert msg_obj is not None, "QueuingInterface requires msg_obj references for metadata"
|
|
@@ -466,6 +468,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
466
468
|
# and `content` needs to be handled outside the interface
|
|
467
469
|
expect_reasoning_content: bool = False,
|
|
468
470
|
name: Optional[str] = None,
|
|
471
|
+
message_index: int = 0,
|
|
469
472
|
) -> Optional[Union[ReasoningMessage, ToolCallMessage, AssistantMessage]]:
|
|
470
473
|
"""
|
|
471
474
|
Example data from non-streaming response looks like:
|
|
@@ -478,6 +481,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
478
481
|
"""
|
|
479
482
|
choice = chunk.choices[0]
|
|
480
483
|
message_delta = choice.delta
|
|
484
|
+
otid = Message.generate_otid_from_id(message_id, message_index)
|
|
481
485
|
|
|
482
486
|
if (
|
|
483
487
|
message_delta.content is None
|
|
@@ -499,6 +503,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
499
503
|
signature=message_delta.reasoning_content_signature,
|
|
500
504
|
source="reasoner_model" if message_delta.reasoning_content_signature else "non_reasoner_model",
|
|
501
505
|
name=name,
|
|
506
|
+
otid=otid,
|
|
502
507
|
)
|
|
503
508
|
elif expect_reasoning_content and message_delta.redacted_reasoning_content is not None:
|
|
504
509
|
processed_chunk = HiddenReasoningMessage(
|
|
@@ -507,6 +512,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
507
512
|
hidden_reasoning=message_delta.redacted_reasoning_content,
|
|
508
513
|
state="redacted",
|
|
509
514
|
name=name,
|
|
515
|
+
otid=otid,
|
|
510
516
|
)
|
|
511
517
|
elif expect_reasoning_content and message_delta.content is not None:
|
|
512
518
|
# "ignore" content if we expect reasoning content
|
|
@@ -534,6 +540,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
534
540
|
tool_call_id=None,
|
|
535
541
|
),
|
|
536
542
|
name=name,
|
|
543
|
+
otid=otid,
|
|
537
544
|
)
|
|
538
545
|
|
|
539
546
|
except json.JSONDecodeError as e:
|
|
@@ -564,6 +571,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
564
571
|
date=message_date,
|
|
565
572
|
reasoning=message_delta.content,
|
|
566
573
|
name=name,
|
|
574
|
+
otid=otid,
|
|
567
575
|
)
|
|
568
576
|
|
|
569
577
|
# tool calls
|
|
@@ -612,7 +620,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
612
620
|
# TODO: Assumes consistent state and that prev_content is subset of new_content
|
|
613
621
|
diff = new_content.replace(prev_content, "", 1)
|
|
614
622
|
self.current_json_parse_result = parsed_args
|
|
615
|
-
processed_chunk = AssistantMessage(id=message_id, date=message_date, content=diff, name=name)
|
|
623
|
+
processed_chunk = AssistantMessage(id=message_id, date=message_date, content=diff, name=name, otid=otid)
|
|
616
624
|
else:
|
|
617
625
|
return None
|
|
618
626
|
|
|
@@ -645,6 +653,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
645
653
|
tool_call_id=tool_call_delta.get("id"),
|
|
646
654
|
),
|
|
647
655
|
name=name,
|
|
656
|
+
otid=otid,
|
|
648
657
|
)
|
|
649
658
|
|
|
650
659
|
elif self.inner_thoughts_in_kwargs and tool_call.function:
|
|
@@ -681,6 +690,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
681
690
|
date=message_date,
|
|
682
691
|
reasoning=updates_inner_thoughts,
|
|
683
692
|
name=name,
|
|
693
|
+
otid=otid,
|
|
684
694
|
)
|
|
685
695
|
# Additionally inner thoughts may stream back with a chunk of main JSON
|
|
686
696
|
# In that case, since we can only return a chunk at a time, we should buffer it
|
|
@@ -717,6 +727,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
717
727
|
tool_call_id=self.function_id_buffer,
|
|
718
728
|
),
|
|
719
729
|
name=name,
|
|
730
|
+
otid=otid,
|
|
720
731
|
)
|
|
721
732
|
|
|
722
733
|
# Record what the last function name we flushed was
|
|
@@ -774,6 +785,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
774
785
|
date=message_date,
|
|
775
786
|
content=combined_chunk,
|
|
776
787
|
name=name,
|
|
788
|
+
otid=otid,
|
|
777
789
|
)
|
|
778
790
|
# Store the ID of the tool call so allow skipping the corresponding response
|
|
779
791
|
if self.function_id_buffer:
|
|
@@ -798,7 +810,9 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
798
810
|
# TODO: Assumes consistent state and that prev_content is subset of new_content
|
|
799
811
|
diff = new_content.replace(prev_content, "", 1)
|
|
800
812
|
self.current_json_parse_result = parsed_args
|
|
801
|
-
processed_chunk = AssistantMessage(
|
|
813
|
+
processed_chunk = AssistantMessage(
|
|
814
|
+
id=message_id, date=message_date, content=diff, name=name, otid=otid
|
|
815
|
+
)
|
|
802
816
|
else:
|
|
803
817
|
return None
|
|
804
818
|
|
|
@@ -823,6 +837,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
823
837
|
tool_call_id=self.function_id_buffer,
|
|
824
838
|
),
|
|
825
839
|
name=name,
|
|
840
|
+
otid=otid,
|
|
826
841
|
)
|
|
827
842
|
# clear buffer
|
|
828
843
|
self.function_args_buffer = None
|
|
@@ -838,6 +853,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
838
853
|
tool_call_id=self.function_id_buffer,
|
|
839
854
|
),
|
|
840
855
|
name=name,
|
|
856
|
+
otid=otid,
|
|
841
857
|
)
|
|
842
858
|
self.function_id_buffer = None
|
|
843
859
|
|
|
@@ -967,6 +983,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
967
983
|
tool_call_id=tool_call_delta.get("id"),
|
|
968
984
|
),
|
|
969
985
|
name=name,
|
|
986
|
+
otid=otid,
|
|
970
987
|
)
|
|
971
988
|
|
|
972
989
|
elif choice.finish_reason is not None:
|
|
@@ -1048,6 +1065,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
1048
1065
|
message_date: datetime,
|
|
1049
1066
|
expect_reasoning_content: bool = False,
|
|
1050
1067
|
name: Optional[str] = None,
|
|
1068
|
+
message_index: int = 0,
|
|
1051
1069
|
):
|
|
1052
1070
|
"""Process a streaming chunk from an OpenAI-compatible server.
|
|
1053
1071
|
|
|
@@ -1074,18 +1092,20 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
1074
1092
|
message_date=message_date,
|
|
1075
1093
|
expect_reasoning_content=expect_reasoning_content,
|
|
1076
1094
|
name=name,
|
|
1095
|
+
message_index=message_index,
|
|
1077
1096
|
)
|
|
1078
|
-
|
|
1079
1097
|
if processed_chunk is None:
|
|
1080
1098
|
return
|
|
1081
1099
|
|
|
1082
1100
|
self._push_to_buffer(processed_chunk)
|
|
1083
1101
|
|
|
1102
|
+
return processed_chunk.message_type
|
|
1103
|
+
|
|
1084
1104
|
def user_message(self, msg: str, msg_obj: Optional[Message] = None):
|
|
1085
1105
|
"""Letta receives a user message"""
|
|
1086
1106
|
return
|
|
1087
1107
|
|
|
1088
|
-
def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None):
|
|
1108
|
+
def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None):
|
|
1089
1109
|
"""Letta generates some internal monologue"""
|
|
1090
1110
|
if not self.streaming_mode:
|
|
1091
1111
|
|
|
@@ -1102,6 +1122,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
1102
1122
|
date=msg_obj.created_at,
|
|
1103
1123
|
reasoning=msg,
|
|
1104
1124
|
name=msg_obj.name,
|
|
1125
|
+
otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
|
|
1105
1126
|
)
|
|
1106
1127
|
|
|
1107
1128
|
self._push_to_buffer(processed_chunk)
|
|
@@ -1113,6 +1134,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
1113
1134
|
date=msg_obj.created_at,
|
|
1114
1135
|
reasoning=content.text,
|
|
1115
1136
|
name=msg_obj.name,
|
|
1137
|
+
otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
|
|
1116
1138
|
)
|
|
1117
1139
|
elif isinstance(content, ReasoningContent):
|
|
1118
1140
|
processed_chunk = ReasoningMessage(
|
|
@@ -1122,6 +1144,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
1122
1144
|
reasoning=content.reasoning,
|
|
1123
1145
|
signature=content.signature,
|
|
1124
1146
|
name=msg_obj.name,
|
|
1147
|
+
otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
|
|
1125
1148
|
)
|
|
1126
1149
|
elif isinstance(content, RedactedReasoningContent):
|
|
1127
1150
|
processed_chunk = HiddenReasoningMessage(
|
|
@@ -1130,6 +1153,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
1130
1153
|
state="redacted",
|
|
1131
1154
|
hidden_reasoning=content.data,
|
|
1132
1155
|
name=msg_obj.name,
|
|
1156
|
+
otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
|
|
1133
1157
|
)
|
|
1134
1158
|
|
|
1135
1159
|
self._push_to_buffer(processed_chunk)
|
|
@@ -1142,7 +1166,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
1142
1166
|
# NOTE: this is a no-op, we handle this special case in function_message instead
|
|
1143
1167
|
return
|
|
1144
1168
|
|
|
1145
|
-
def function_message(self, msg: str, msg_obj: Optional[Message] = None):
|
|
1169
|
+
def function_message(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None):
|
|
1146
1170
|
"""Letta calls a function"""
|
|
1147
1171
|
|
|
1148
1172
|
# TODO handle 'function' messages that indicate the start of a function call
|
|
@@ -1191,6 +1215,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
1191
1215
|
date=msg_obj.created_at,
|
|
1192
1216
|
content=func_args["message"],
|
|
1193
1217
|
name=msg_obj.name,
|
|
1218
|
+
otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
|
|
1194
1219
|
)
|
|
1195
1220
|
self._push_to_buffer(processed_chunk)
|
|
1196
1221
|
except Exception as e:
|
|
@@ -1214,6 +1239,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
1214
1239
|
date=msg_obj.created_at,
|
|
1215
1240
|
content=func_args[self.assistant_message_tool_kwarg],
|
|
1216
1241
|
name=msg_obj.name,
|
|
1242
|
+
otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
|
|
1217
1243
|
)
|
|
1218
1244
|
# Store the ID of the tool call so allow skipping the corresponding response
|
|
1219
1245
|
self.prev_assistant_message_id = function_call.id
|
|
@@ -1227,6 +1253,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
1227
1253
|
tool_call_id=function_call.id,
|
|
1228
1254
|
),
|
|
1229
1255
|
name=msg_obj.name,
|
|
1256
|
+
otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
|
|
1230
1257
|
)
|
|
1231
1258
|
|
|
1232
1259
|
# processed_chunk = {
|
|
@@ -1267,6 +1294,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
1267
1294
|
stdout=msg_obj.tool_returns[0].stdout if msg_obj.tool_returns else None,
|
|
1268
1295
|
stderr=msg_obj.tool_returns[0].stderr if msg_obj.tool_returns else None,
|
|
1269
1296
|
name=msg_obj.name,
|
|
1297
|
+
otid=Message.generate_otid_from_id(msg_obj.id, chunk_index),
|
|
1270
1298
|
)
|
|
1271
1299
|
|
|
1272
1300
|
elif msg.startswith("Error: "):
|
|
@@ -1282,6 +1310,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
1282
1310
|
stdout=msg_obj.tool_returns[0].stdout if msg_obj.tool_returns else None,
|
|
1283
1311
|
stderr=msg_obj.tool_returns[0].stderr if msg_obj.tool_returns else None,
|
|
1284
1312
|
name=msg_obj.name,
|
|
1313
|
+
otid=Message.generate_otid_from_id(msg_obj.id, chunk_index),
|
|
1285
1314
|
)
|
|
1286
1315
|
|
|
1287
1316
|
else:
|
|
@@ -8,6 +8,7 @@ from fastapi.responses import JSONResponse
|
|
|
8
8
|
from marshmallow import ValidationError
|
|
9
9
|
from pydantic import Field
|
|
10
10
|
from sqlalchemy.exc import IntegrityError, OperationalError
|
|
11
|
+
from starlette.responses import StreamingResponse
|
|
11
12
|
|
|
12
13
|
from letta.agents.letta_agent import LettaAgent
|
|
13
14
|
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
|
@@ -30,7 +31,6 @@ from letta.schemas.user import User
|
|
|
30
31
|
from letta.serialize_schemas.pydantic_agent_schema import AgentSchema
|
|
31
32
|
from letta.server.rest_api.utils import get_letta_server
|
|
32
33
|
from letta.server.server import SyncServer
|
|
33
|
-
from letta.settings import settings
|
|
34
34
|
|
|
35
35
|
# These can be forward refs, but because Fastapi needs them at runtime the must be imported normally
|
|
36
36
|
|
|
@@ -130,6 +130,10 @@ async def import_agent_serialized(
|
|
|
130
130
|
description="If set to True, existing tools can get their source code overwritten by the uploaded tool definitions. Note that Letta core tools can never be updated externally.",
|
|
131
131
|
),
|
|
132
132
|
project_id: Optional[str] = Query(None, description="The project ID to associate the uploaded agent with."),
|
|
133
|
+
strip_messages: bool = Query(
|
|
134
|
+
False,
|
|
135
|
+
description="If set to True, strips all messages from the agent before importing.",
|
|
136
|
+
),
|
|
133
137
|
):
|
|
134
138
|
"""
|
|
135
139
|
Import a serialized agent file and recreate the agent in the system.
|
|
@@ -149,6 +153,7 @@ async def import_agent_serialized(
|
|
|
149
153
|
append_copy_suffix=append_copy_suffix,
|
|
150
154
|
override_existing_tools=override_existing_tools,
|
|
151
155
|
project_id=project_id,
|
|
156
|
+
strip_messages=strip_messages,
|
|
152
157
|
)
|
|
153
158
|
return new_agent
|
|
154
159
|
|
|
@@ -585,8 +590,10 @@ async def send_message(
|
|
|
585
590
|
This endpoint accepts a message from a user and processes it through the agent.
|
|
586
591
|
"""
|
|
587
592
|
actor = server.user_manager.get_user_or_default(user_id=actor_id)
|
|
588
|
-
|
|
589
|
-
|
|
593
|
+
# TODO: This is redundant, remove soon
|
|
594
|
+
agent = server.agent_manager.get_agent_by_id(agent_id, actor)
|
|
595
|
+
|
|
596
|
+
if agent.llm_config.model_endpoint_type == "anthropic" and not agent.enable_sleeptime and not agent.multi_agent_group:
|
|
590
597
|
experimental_agent = LettaAgent(
|
|
591
598
|
agent_id=agent_id,
|
|
592
599
|
message_manager=server.message_manager,
|
|
@@ -639,17 +646,38 @@ async def send_message_streaming(
|
|
|
639
646
|
It will stream the steps of the response always, and stream the tokens if 'stream_tokens' is set to True.
|
|
640
647
|
"""
|
|
641
648
|
actor = server.user_manager.get_user_or_default(user_id=actor_id)
|
|
642
|
-
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
649
|
+
# TODO: This is redundant, remove soon
|
|
650
|
+
agent = server.agent_manager.get_agent_by_id(agent_id, actor)
|
|
651
|
+
|
|
652
|
+
if agent.llm_config.model_endpoint_type == "anthropic" and not agent.enable_sleeptime and not agent.multi_agent_group:
|
|
653
|
+
experimental_agent = LettaAgent(
|
|
654
|
+
agent_id=agent_id,
|
|
655
|
+
message_manager=server.message_manager,
|
|
656
|
+
agent_manager=server.agent_manager,
|
|
657
|
+
block_manager=server.block_manager,
|
|
658
|
+
passage_manager=server.passage_manager,
|
|
659
|
+
actor=actor,
|
|
660
|
+
)
|
|
661
|
+
|
|
662
|
+
messages = request.messages
|
|
663
|
+
content = messages[0].content[0].text if messages and not isinstance(messages[0].content, str) else messages[0].content
|
|
664
|
+
result = StreamingResponse(
|
|
665
|
+
experimental_agent.step_stream(UserMessage(content=content), max_steps=10, use_assistant_message=request.use_assistant_message),
|
|
666
|
+
media_type="text/event-stream",
|
|
667
|
+
)
|
|
668
|
+
else:
|
|
669
|
+
result = await server.send_message_to_agent(
|
|
670
|
+
agent_id=agent_id,
|
|
671
|
+
actor=actor,
|
|
672
|
+
messages=request.messages,
|
|
673
|
+
stream_steps=True,
|
|
674
|
+
stream_tokens=request.stream_tokens,
|
|
675
|
+
# Support for AssistantMessage
|
|
676
|
+
use_assistant_message=request.use_assistant_message,
|
|
677
|
+
assistant_message_tool_name=request.assistant_message_tool_name,
|
|
678
|
+
assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
|
|
679
|
+
)
|
|
680
|
+
|
|
653
681
|
return result
|
|
654
682
|
|
|
655
683
|
|
|
@@ -665,31 +693,17 @@ async def process_message_background(
|
|
|
665
693
|
) -> None:
|
|
666
694
|
"""Background task to process the message and update job status."""
|
|
667
695
|
try:
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
content = messages[0].content[0].text if messages and not isinstance(messages[0].content, str) else messages[0].content
|
|
680
|
-
result = await experimental_agent.step(UserMessage(content=content), max_steps=10)
|
|
681
|
-
else:
|
|
682
|
-
result = await server.send_message_to_agent(
|
|
683
|
-
agent_id=agent_id,
|
|
684
|
-
actor=actor,
|
|
685
|
-
messages=messages,
|
|
686
|
-
stream_steps=False, # NOTE(matt)
|
|
687
|
-
stream_tokens=False,
|
|
688
|
-
use_assistant_message=use_assistant_message,
|
|
689
|
-
assistant_message_tool_name=assistant_message_tool_name,
|
|
690
|
-
assistant_message_tool_kwarg=assistant_message_tool_kwarg,
|
|
691
|
-
metadata={"job_id": job_id}, # Pass job_id through metadata
|
|
692
|
-
)
|
|
696
|
+
result = await server.send_message_to_agent(
|
|
697
|
+
agent_id=agent_id,
|
|
698
|
+
actor=actor,
|
|
699
|
+
messages=messages,
|
|
700
|
+
stream_steps=False, # NOTE(matt)
|
|
701
|
+
stream_tokens=False,
|
|
702
|
+
use_assistant_message=use_assistant_message,
|
|
703
|
+
assistant_message_tool_name=assistant_message_tool_name,
|
|
704
|
+
assistant_message_tool_kwarg=assistant_message_tool_kwarg,
|
|
705
|
+
metadata={"job_id": job_id}, # Pass job_id through metadata
|
|
706
|
+
)
|
|
693
707
|
|
|
694
708
|
# Update job status to completed
|
|
695
709
|
job_update = JobUpdate(
|