letta-nightly 0.5.4.dev20241126104249__py3-none-any.whl → 0.5.4.dev20241128000451__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -1
- letta/agent.py +102 -140
- letta/agent_store/chroma.py +2 -0
- letta/cli/cli.py +3 -5
- letta/client/client.py +360 -117
- letta/config.py +2 -2
- letta/constants.py +5 -0
- letta/errors.py +12 -0
- letta/functions/function_sets/base.py +38 -1
- letta/functions/functions.py +4 -6
- letta/functions/schema_generator.py +6 -5
- letta/helpers/tool_rule_solver.py +6 -5
- letta/main.py +1 -1
- letta/metadata.py +45 -42
- letta/o1_agent.py +1 -4
- letta/orm/block.py +2 -1
- letta/orm/blocks_agents.py +4 -1
- letta/orm/sqlalchemy_base.py +13 -0
- letta/persistence_manager.py +1 -0
- letta/schemas/agent.py +57 -52
- letta/schemas/block.py +70 -26
- letta/schemas/enums.py +14 -0
- letta/schemas/letta_base.py +1 -1
- letta/schemas/letta_request.py +11 -23
- letta/schemas/letta_response.py +1 -2
- letta/schemas/memory.py +31 -100
- letta/schemas/message.py +3 -3
- letta/schemas/tool_rule.py +13 -5
- letta/server/rest_api/interface.py +12 -19
- letta/server/rest_api/routers/openai/assistants/threads.py +2 -3
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +0 -2
- letta/server/rest_api/routers/v1/agents.py +100 -94
- letta/server/rest_api/routers/v1/blocks.py +50 -5
- letta/server/rest_api/routers/v1/tools.py +14 -3
- letta/server/server.py +246 -460
- letta/server/static_files/assets/index-9fa459a2.js +1 -1
- letta/services/block_manager.py +23 -4
- letta/services/blocks_agents_manager.py +23 -1
- letta/services/per_agent_lock_manager.py +18 -0
- letta/services/tool_execution_sandbox.py +1 -1
- letta/services/tool_manager.py +2 -1
- {letta_nightly-0.5.4.dev20241126104249.dist-info → letta_nightly-0.5.4.dev20241128000451.dist-info}/METADATA +1 -1
- {letta_nightly-0.5.4.dev20241126104249.dist-info → letta_nightly-0.5.4.dev20241128000451.dist-info}/RECORD +46 -45
- {letta_nightly-0.5.4.dev20241126104249.dist-info → letta_nightly-0.5.4.dev20241128000451.dist-info}/LICENSE +0 -0
- {letta_nightly-0.5.4.dev20241126104249.dist-info → letta_nightly-0.5.4.dev20241128000451.dist-info}/WHEEL +0 -0
- {letta_nightly-0.5.4.dev20241126104249.dist-info → letta_nightly-0.5.4.dev20241128000451.dist-info}/entry_points.txt +0 -0
|
@@ -271,9 +271,8 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
271
271
|
self,
|
|
272
272
|
multi_step=True,
|
|
273
273
|
# Related to if we want to try and pass back the AssistantMessage as a special case function
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
assistant_message_function_kwarg=DEFAULT_MESSAGE_TOOL_KWARG,
|
|
274
|
+
assistant_message_tool_name=DEFAULT_MESSAGE_TOOL,
|
|
275
|
+
assistant_message_tool_kwarg=DEFAULT_MESSAGE_TOOL_KWARG,
|
|
277
276
|
# Related to if we expect inner_thoughts to be in the kwargs
|
|
278
277
|
inner_thoughts_in_kwargs=True,
|
|
279
278
|
inner_thoughts_kwarg=INNER_THOUGHTS_KWARG,
|
|
@@ -287,7 +286,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
287
286
|
self.streaming_chat_completion_mode_function_name = None # NOTE: sadly need to track state during stream
|
|
288
287
|
# If chat completion mode, we need a special stream reader to
|
|
289
288
|
# turn function argument to send_message into a normal text stream
|
|
290
|
-
self.streaming_chat_completion_json_reader = FunctionArgumentsStreamHandler(json_key=
|
|
289
|
+
self.streaming_chat_completion_json_reader = FunctionArgumentsStreamHandler(json_key=assistant_message_tool_kwarg)
|
|
291
290
|
|
|
292
291
|
self._chunks = deque()
|
|
293
292
|
self._event = asyncio.Event() # Use an event to notify when chunks are available
|
|
@@ -300,9 +299,9 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
300
299
|
self.multi_step_gen_indicator = MessageStreamStatus.done_generation
|
|
301
300
|
|
|
302
301
|
# Support for AssistantMessage
|
|
303
|
-
self.use_assistant_message =
|
|
304
|
-
self.
|
|
305
|
-
self.
|
|
302
|
+
self.use_assistant_message = False # TODO: Remove this
|
|
303
|
+
self.assistant_message_tool_name = assistant_message_tool_name
|
|
304
|
+
self.assistant_message_tool_kwarg = assistant_message_tool_kwarg
|
|
306
305
|
|
|
307
306
|
# Support for inner_thoughts_in_kwargs
|
|
308
307
|
self.inner_thoughts_in_kwargs = inner_thoughts_in_kwargs
|
|
@@ -455,17 +454,14 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
455
454
|
|
|
456
455
|
# If we get a "hit" on the special keyword we're looking for, we want to skip to the next chunk
|
|
457
456
|
# TODO I don't think this handles the function name in multi-pieces problem. Instead, we should probably reset the streaming_chat_completion_mode_function_name when we make this hit?
|
|
458
|
-
# if self.streaming_chat_completion_mode_function_name == self.
|
|
459
|
-
if tool_call.function.name == self.
|
|
457
|
+
# if self.streaming_chat_completion_mode_function_name == self.assistant_message_tool_name:
|
|
458
|
+
if tool_call.function.name == self.assistant_message_tool_name:
|
|
460
459
|
self.streaming_chat_completion_json_reader.reset()
|
|
461
460
|
# early exit to turn into content mode
|
|
462
461
|
return None
|
|
463
462
|
|
|
464
463
|
# if we're in the middle of parsing a send_message, we'll keep processing the JSON chunks
|
|
465
|
-
if
|
|
466
|
-
tool_call.function.arguments
|
|
467
|
-
and self.streaming_chat_completion_mode_function_name == self.assistant_message_function_name
|
|
468
|
-
):
|
|
464
|
+
if tool_call.function.arguments and self.streaming_chat_completion_mode_function_name == self.assistant_message_tool_name:
|
|
469
465
|
# Strip out any extras tokens
|
|
470
466
|
cleaned_func_args = self.streaming_chat_completion_json_reader.process_json_chunk(tool_call.function.arguments)
|
|
471
467
|
# In the case that we just have the prefix of something, no message yet, then we should early exit to move to the next chunk
|
|
@@ -500,9 +496,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
500
496
|
)
|
|
501
497
|
|
|
502
498
|
elif self.inner_thoughts_in_kwargs and tool_call.function:
|
|
503
|
-
if self.use_assistant_message:
|
|
504
|
-
raise NotImplementedError("inner_thoughts_in_kwargs with use_assistant_message not yet supported")
|
|
505
|
-
|
|
506
499
|
processed_chunk = None
|
|
507
500
|
|
|
508
501
|
if tool_call.function.name:
|
|
@@ -909,13 +902,13 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
|
909
902
|
|
|
910
903
|
if (
|
|
911
904
|
self.use_assistant_message
|
|
912
|
-
and function_call.function.name == self.
|
|
913
|
-
and self.
|
|
905
|
+
and function_call.function.name == self.assistant_message_tool_name
|
|
906
|
+
and self.assistant_message_tool_kwarg in func_args
|
|
914
907
|
):
|
|
915
908
|
processed_chunk = AssistantMessage(
|
|
916
909
|
id=msg_obj.id,
|
|
917
910
|
date=msg_obj.created_at,
|
|
918
|
-
assistant_message=func_args[self.
|
|
911
|
+
assistant_message=func_args[self.assistant_message_tool_kwarg],
|
|
919
912
|
)
|
|
920
913
|
else:
|
|
921
914
|
processed_chunk = FunctionCallMessage(
|
|
@@ -117,7 +117,7 @@ def create_message(
|
|
|
117
117
|
tool_call_id=None,
|
|
118
118
|
name=None,
|
|
119
119
|
)
|
|
120
|
-
agent = server.
|
|
120
|
+
agent = server.load_agent(agent_id=agent_id)
|
|
121
121
|
# add message to agent
|
|
122
122
|
agent._append_to_messages([message])
|
|
123
123
|
|
|
@@ -161,7 +161,6 @@ def list_messages(
|
|
|
161
161
|
before=before_uuid,
|
|
162
162
|
order_by="created_at",
|
|
163
163
|
reverse=reverse,
|
|
164
|
-
return_message_object=True,
|
|
165
164
|
)
|
|
166
165
|
assert isinstance(json_messages, List)
|
|
167
166
|
assert all([isinstance(message, Message) for message in json_messages])
|
|
@@ -247,7 +246,7 @@ def create_run(
|
|
|
247
246
|
# TODO: add request.instructions as a message?
|
|
248
247
|
agent_id = thread_id
|
|
249
248
|
# TODO: override preset of agent with request.assistant_id
|
|
250
|
-
agent = server.
|
|
249
|
+
agent = server.load_agent(agent_id=agent_id)
|
|
251
250
|
agent.inner_step(messages=[]) # already has messages added
|
|
252
251
|
run_id = str(uuid.uuid4())
|
|
253
252
|
create_time = int(get_utc_time().timestamp())
|
|
@@ -68,7 +68,6 @@ async def create_chat_completion(
|
|
|
68
68
|
stream_tokens=True,
|
|
69
69
|
# Turn on ChatCompletion mode (eg remaps send_message to content)
|
|
70
70
|
chat_completion_mode=True,
|
|
71
|
-
return_message_object=False,
|
|
72
71
|
)
|
|
73
72
|
|
|
74
73
|
else:
|
|
@@ -86,7 +85,6 @@ async def create_chat_completion(
|
|
|
86
85
|
# Turn streaming OFF
|
|
87
86
|
stream_steps=False,
|
|
88
87
|
stream_tokens=False,
|
|
89
|
-
return_message_object=False,
|
|
90
88
|
)
|
|
91
89
|
# print(response_messages)
|
|
92
90
|
|
|
@@ -1,24 +1,28 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import warnings
|
|
2
3
|
from datetime import datetime
|
|
3
|
-
from typing import
|
|
4
|
+
from typing import List, Optional, Union
|
|
4
5
|
|
|
5
6
|
from fastapi import APIRouter, Body, Depends, Header, HTTPException, Query, status
|
|
6
7
|
from fastapi.responses import JSONResponse, StreamingResponse
|
|
7
8
|
|
|
8
9
|
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
|
9
10
|
from letta.schemas.agent import AgentState, CreateAgent, UpdateAgentState
|
|
10
|
-
from letta.schemas.block import
|
|
11
|
+
from letta.schemas.block import ( # , BlockLabelUpdate, BlockLimitUpdate
|
|
12
|
+
Block,
|
|
13
|
+
BlockUpdate,
|
|
14
|
+
CreateBlock,
|
|
15
|
+
)
|
|
11
16
|
from letta.schemas.enums import MessageStreamStatus
|
|
12
17
|
from letta.schemas.letta_message import (
|
|
13
18
|
LegacyLettaMessage,
|
|
14
19
|
LettaMessage,
|
|
15
20
|
LettaMessageUnion,
|
|
16
21
|
)
|
|
17
|
-
from letta.schemas.letta_request import LettaRequest
|
|
22
|
+
from letta.schemas.letta_request import LettaRequest, LettaStreamingRequest
|
|
18
23
|
from letta.schemas.letta_response import LettaResponse
|
|
19
24
|
from letta.schemas.memory import (
|
|
20
25
|
ArchivalMemorySummary,
|
|
21
|
-
BasicBlockMemory,
|
|
22
26
|
ContextWindowOverview,
|
|
23
27
|
CreateArchivalMemory,
|
|
24
28
|
Memory,
|
|
@@ -31,7 +35,6 @@ from letta.schemas.tool import Tool
|
|
|
31
35
|
from letta.server.rest_api.interface import StreamingServerInterface
|
|
32
36
|
from letta.server.rest_api.utils import get_letta_server, sse_async_generator
|
|
33
37
|
from letta.server.server import SyncServer
|
|
34
|
-
from letta.utils import deduplicate
|
|
35
38
|
|
|
36
39
|
# These can be forward refs, but because Fastapi needs them at runtime the must be imported normally
|
|
37
40
|
|
|
@@ -83,13 +86,6 @@ def create_agent(
|
|
|
83
86
|
Create a new agent with the specified configuration.
|
|
84
87
|
"""
|
|
85
88
|
actor = server.get_user_or_default(user_id=user_id)
|
|
86
|
-
agent.user_id = actor.id
|
|
87
|
-
# TODO: sarah make general
|
|
88
|
-
# TODO: eventually remove this
|
|
89
|
-
assert agent.memory is not None # TODO: dont force this, can be None (use default human/person)
|
|
90
|
-
blocks = agent.memory.get_blocks()
|
|
91
|
-
agent.memory = BasicBlockMemory(blocks=blocks)
|
|
92
|
-
|
|
93
89
|
return server.create_agent(agent, actor=actor)
|
|
94
90
|
|
|
95
91
|
|
|
@@ -196,6 +192,7 @@ def get_agent_in_context_messages(
|
|
|
196
192
|
return server.get_in_context_messages(agent_id=agent_id)
|
|
197
193
|
|
|
198
194
|
|
|
195
|
+
# TODO: remove? can also get with agent blocks
|
|
199
196
|
@router.get("/{agent_id}/memory", response_model=Memory, operation_id="get_agent_memory")
|
|
200
197
|
def get_agent_memory(
|
|
201
198
|
agent_id: str,
|
|
@@ -209,47 +206,40 @@ def get_agent_memory(
|
|
|
209
206
|
return server.get_agent_memory(agent_id=agent_id)
|
|
210
207
|
|
|
211
208
|
|
|
212
|
-
@router.
|
|
213
|
-
def
|
|
209
|
+
@router.get("/{agent_id}/memory/block/{block_label}", response_model=Block, operation_id="get_agent_memory_block")
|
|
210
|
+
def get_agent_memory_block(
|
|
214
211
|
agent_id: str,
|
|
215
|
-
|
|
212
|
+
block_label: str,
|
|
216
213
|
server: "SyncServer" = Depends(get_letta_server),
|
|
217
214
|
user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
218
215
|
):
|
|
219
216
|
"""
|
|
220
|
-
|
|
221
|
-
This endpoint accepts new memory contents (labels as keys, and values as values) and updates the core memory of the agent identified by the user ID and agent ID.
|
|
222
|
-
This endpoint accepts new memory contents to update the core memory of the agent.
|
|
223
|
-
This endpoint only supports modifying existing blocks; it does not support deleting/unlinking or creating/linking blocks.
|
|
217
|
+
Retrieve a memory block from an agent.
|
|
224
218
|
"""
|
|
225
219
|
actor = server.get_user_or_default(user_id=user_id)
|
|
226
220
|
|
|
227
|
-
|
|
228
|
-
return
|
|
221
|
+
block_id = server.blocks_agents_manager.get_block_id_for_label(agent_id=agent_id, block_label=block_label)
|
|
222
|
+
return server.block_manager.get_block_by_id(block_id, actor=actor)
|
|
229
223
|
|
|
230
224
|
|
|
231
|
-
@router.
|
|
232
|
-
def
|
|
225
|
+
@router.get("/{agent_id}/memory/block", response_model=List[Block], operation_id="get_agent_memory_blocks")
|
|
226
|
+
def get_agent_memory_blocks(
|
|
233
227
|
agent_id: str,
|
|
234
|
-
update_label: BlockLabelUpdate = Body(...),
|
|
235
228
|
server: "SyncServer" = Depends(get_letta_server),
|
|
236
229
|
user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
237
230
|
):
|
|
238
231
|
"""
|
|
239
|
-
|
|
232
|
+
Retrieve the memory blocks of a specific agent.
|
|
240
233
|
"""
|
|
241
234
|
actor = server.get_user_or_default(user_id=user_id)
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
user_id=actor.id, agent_id=agent_id, current_block_label=update_label.current_label, new_block_label=update_label.new_label
|
|
245
|
-
)
|
|
246
|
-
return memory
|
|
235
|
+
block_ids = server.blocks_agents_manager.list_block_ids_for_agent(agent_id=agent_id)
|
|
236
|
+
return [server.block_manager.get_block_by_id(block_id, actor=actor) for block_id in block_ids]
|
|
247
237
|
|
|
248
238
|
|
|
249
239
|
@router.post("/{agent_id}/memory/block", response_model=Memory, operation_id="add_agent_memory_block")
|
|
250
240
|
def add_agent_memory_block(
|
|
251
241
|
agent_id: str,
|
|
252
|
-
create_block:
|
|
242
|
+
create_block: CreateBlock = Body(...),
|
|
253
243
|
server: "SyncServer" = Depends(get_letta_server),
|
|
254
244
|
user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
255
245
|
):
|
|
@@ -268,7 +258,7 @@ def add_agent_memory_block(
|
|
|
268
258
|
return updated_memory
|
|
269
259
|
|
|
270
260
|
|
|
271
|
-
@router.delete("/{agent_id}/memory/block/{block_label}", response_model=Memory, operation_id="
|
|
261
|
+
@router.delete("/{agent_id}/memory/block/{block_label}", response_model=Memory, operation_id="remove_agent_memory_block_by_label")
|
|
272
262
|
def remove_agent_memory_block(
|
|
273
263
|
agent_id: str,
|
|
274
264
|
# TODO should this be block_id, or the label?
|
|
@@ -288,25 +278,24 @@ def remove_agent_memory_block(
|
|
|
288
278
|
return updated_memory
|
|
289
279
|
|
|
290
280
|
|
|
291
|
-
@router.patch("/{agent_id}/memory/
|
|
292
|
-
def
|
|
281
|
+
@router.patch("/{agent_id}/memory/block/{block_label}", response_model=Block, operation_id="update_agent_memory_block_by_label")
|
|
282
|
+
def update_agent_memory_block(
|
|
293
283
|
agent_id: str,
|
|
294
|
-
|
|
284
|
+
block_label: str,
|
|
285
|
+
update_block: BlockUpdate = Body(...),
|
|
295
286
|
server: "SyncServer" = Depends(get_letta_server),
|
|
296
287
|
user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
297
288
|
):
|
|
298
289
|
"""
|
|
299
|
-
|
|
290
|
+
Removes a memory block from an agent by unlnking it. If the block is not linked to any other agent, it is deleted.
|
|
300
291
|
"""
|
|
301
292
|
actor = server.get_user_or_default(user_id=user_id)
|
|
302
293
|
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
)
|
|
309
|
-
return memory
|
|
294
|
+
# get the block_id from the label
|
|
295
|
+
block_id = server.blocks_agents_manager.get_block_id_for_label(agent_id=agent_id, block_label=block_label)
|
|
296
|
+
|
|
297
|
+
# update the block
|
|
298
|
+
return server.block_manager.update_block(block_id=block_id, block_update=update_block, actor=actor)
|
|
310
299
|
|
|
311
300
|
|
|
312
301
|
@router.get("/{agent_id}/memory/recall", response_model=RecallMemorySummary, operation_id="get_agent_recall_memory_summary")
|
|
@@ -402,17 +391,13 @@ def get_agent_messages(
|
|
|
402
391
|
limit: int = Query(10, description="Maximum number of messages to retrieve."),
|
|
403
392
|
msg_object: bool = Query(False, description="If true, returns Message objects. If false, return LettaMessage objects."),
|
|
404
393
|
# Flags to support the use of AssistantMessage message types
|
|
405
|
-
|
|
406
|
-
False,
|
|
407
|
-
description="[Only applicable if msg_object is False] If true, returns AssistantMessage objects when the agent calls a designated message tool. If false, return FunctionCallMessage objects for all tool calls.",
|
|
408
|
-
),
|
|
409
|
-
assistant_message_function_name: str = Query(
|
|
394
|
+
assistant_message_tool_name: str = Query(
|
|
410
395
|
DEFAULT_MESSAGE_TOOL,
|
|
411
|
-
description="
|
|
396
|
+
description="The name of the designated message tool.",
|
|
412
397
|
),
|
|
413
|
-
|
|
398
|
+
assistant_message_tool_kwarg: str = Query(
|
|
414
399
|
DEFAULT_MESSAGE_TOOL_KWARG,
|
|
415
|
-
description="
|
|
400
|
+
description="The name of the message argument in the designated message tool.",
|
|
416
401
|
),
|
|
417
402
|
user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
418
403
|
):
|
|
@@ -428,9 +413,8 @@ def get_agent_messages(
|
|
|
428
413
|
limit=limit,
|
|
429
414
|
reverse=True,
|
|
430
415
|
return_message_object=msg_object,
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
assistant_message_function_kwarg=assistant_message_function_kwarg,
|
|
416
|
+
assistant_message_tool_name=assistant_message_tool_name,
|
|
417
|
+
assistant_message_tool_kwarg=assistant_message_tool_kwarg,
|
|
434
418
|
)
|
|
435
419
|
|
|
436
420
|
|
|
@@ -450,45 +434,77 @@ def update_message(
|
|
|
450
434
|
|
|
451
435
|
@router.post(
|
|
452
436
|
"/{agent_id}/messages",
|
|
437
|
+
response_model=LettaResponse,
|
|
438
|
+
operation_id="create_agent_message",
|
|
439
|
+
)
|
|
440
|
+
async def send_message(
|
|
441
|
+
agent_id: str,
|
|
442
|
+
server: SyncServer = Depends(get_letta_server),
|
|
443
|
+
request: LettaRequest = Body(...),
|
|
444
|
+
user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
445
|
+
):
|
|
446
|
+
"""
|
|
447
|
+
Process a user message and return the agent's response.
|
|
448
|
+
This endpoint accepts a message from a user and processes it through the agent.
|
|
449
|
+
"""
|
|
450
|
+
actor = server.get_user_or_default(user_id=user_id)
|
|
451
|
+
|
|
452
|
+
agent_lock = server.per_agent_lock_manager.get_lock(agent_id)
|
|
453
|
+
async with agent_lock:
|
|
454
|
+
result = await send_message_to_agent(
|
|
455
|
+
server=server,
|
|
456
|
+
agent_id=agent_id,
|
|
457
|
+
user_id=actor.id,
|
|
458
|
+
messages=request.messages,
|
|
459
|
+
stream_steps=False,
|
|
460
|
+
stream_tokens=False,
|
|
461
|
+
# Support for AssistantMessage
|
|
462
|
+
assistant_message_tool_name=request.assistant_message_tool_name,
|
|
463
|
+
assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
|
|
464
|
+
)
|
|
465
|
+
return result
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
@router.post(
|
|
469
|
+
"/{agent_id}/messages/stream",
|
|
453
470
|
response_model=None,
|
|
454
471
|
operation_id="create_agent_message",
|
|
455
472
|
responses={
|
|
456
473
|
200: {
|
|
457
474
|
"description": "Successful response",
|
|
458
475
|
"content": {
|
|
459
|
-
"application/json": {"$ref": "#/components/schemas/LettaResponse"}, # Use model_json_schema() instead of model directly
|
|
460
476
|
"text/event-stream": {"description": "Server-Sent Events stream"},
|
|
461
477
|
},
|
|
462
478
|
}
|
|
463
479
|
},
|
|
464
480
|
)
|
|
465
|
-
async def
|
|
481
|
+
async def send_message_streaming(
|
|
466
482
|
agent_id: str,
|
|
467
483
|
server: SyncServer = Depends(get_letta_server),
|
|
468
|
-
request:
|
|
484
|
+
request: LettaStreamingRequest = Body(...),
|
|
469
485
|
user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
470
486
|
):
|
|
471
487
|
"""
|
|
472
488
|
Process a user message and return the agent's response.
|
|
473
489
|
This endpoint accepts a message from a user and processes it through the agent.
|
|
474
|
-
It
|
|
490
|
+
It will stream the steps of the response always, and stream the tokens if 'stream_tokens' is set to True.
|
|
475
491
|
"""
|
|
476
492
|
actor = server.get_user_or_default(user_id=user_id)
|
|
477
493
|
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
494
|
+
agent_lock = server.per_agent_lock_manager.get_lock(agent_id)
|
|
495
|
+
async with agent_lock:
|
|
496
|
+
result = await send_message_to_agent(
|
|
497
|
+
server=server,
|
|
498
|
+
agent_id=agent_id,
|
|
499
|
+
user_id=actor.id,
|
|
500
|
+
messages=request.messages,
|
|
501
|
+
stream_steps=True,
|
|
502
|
+
stream_tokens=request.stream_tokens,
|
|
503
|
+
# Support for AssistantMessage
|
|
504
|
+
assistant_message_tool_name=request.assistant_message_tool_name,
|
|
505
|
+
assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
|
|
506
|
+
)
|
|
507
|
+
return result
|
|
492
508
|
|
|
493
509
|
|
|
494
510
|
# TODO: move this into server.py?
|
|
@@ -501,13 +517,11 @@ async def send_message_to_agent(
|
|
|
501
517
|
stream_steps: bool,
|
|
502
518
|
stream_tokens: bool,
|
|
503
519
|
# related to whether or not we return `LettaMessage`s or `Message`s
|
|
504
|
-
return_message_object: bool, # Should be True for Python Client, False for REST API
|
|
505
520
|
chat_completion_mode: bool = False,
|
|
506
521
|
timestamp: Optional[datetime] = None,
|
|
507
522
|
# Support for AssistantMessage
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
assistant_message_function_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
|
|
523
|
+
assistant_message_tool_name: str = DEFAULT_MESSAGE_TOOL,
|
|
524
|
+
assistant_message_tool_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
|
|
511
525
|
) -> Union[StreamingResponse, LettaResponse]:
|
|
512
526
|
"""Split off into a separate function so that it can be imported in the /chat/completion proxy."""
|
|
513
527
|
|
|
@@ -524,13 +538,16 @@ async def send_message_to_agent(
|
|
|
524
538
|
|
|
525
539
|
# Get the generator object off of the agent's streaming interface
|
|
526
540
|
# This will be attached to the POST SSE request used under-the-hood
|
|
527
|
-
letta_agent = server.
|
|
541
|
+
# letta_agent = server.load_agent(agent_id=agent_id)
|
|
542
|
+
letta_agent = server.load_agent(agent_id=agent_id)
|
|
528
543
|
|
|
529
544
|
# Disable token streaming if not OpenAI
|
|
530
545
|
# TODO: cleanup this logic
|
|
531
546
|
llm_config = letta_agent.agent_state.llm_config
|
|
532
|
-
if llm_config.model_endpoint_type != "openai" or "inference.memgpt.ai" in llm_config.model_endpoint:
|
|
533
|
-
|
|
547
|
+
if stream_tokens and (llm_config.model_endpoint_type != "openai" or "inference.memgpt.ai" in llm_config.model_endpoint):
|
|
548
|
+
warnings.warn(
|
|
549
|
+
"Token streaming is only supported for models with type 'openai' or `inference.memgpt.ai` in the model_endpoint: agent has endpoint type {llm_config.model_endpoint_type} and {llm_config.model_endpoint}. Setting stream_tokens to False."
|
|
550
|
+
)
|
|
534
551
|
stream_tokens = False
|
|
535
552
|
|
|
536
553
|
# Create a new interface per request
|
|
@@ -548,9 +565,8 @@ async def send_message_to_agent(
|
|
|
548
565
|
# streaming_interface.function_call_legacy_mode = stream
|
|
549
566
|
|
|
550
567
|
# Allow AssistantMessage is desired by client
|
|
551
|
-
streaming_interface.
|
|
552
|
-
streaming_interface.
|
|
553
|
-
streaming_interface.assistant_message_function_kwarg = assistant_message_function_kwarg
|
|
568
|
+
streaming_interface.assistant_message_tool_name = assistant_message_tool_name
|
|
569
|
+
streaming_interface.assistant_message_tool_kwarg = assistant_message_tool_kwarg
|
|
554
570
|
|
|
555
571
|
# Related to JSON buffer reader
|
|
556
572
|
streaming_interface.inner_thoughts_in_kwargs = (
|
|
@@ -565,14 +581,11 @@ async def send_message_to_agent(
|
|
|
565
581
|
user_id=user_id,
|
|
566
582
|
agent_id=agent_id,
|
|
567
583
|
messages=messages,
|
|
584
|
+
interface=streaming_interface,
|
|
568
585
|
)
|
|
569
586
|
)
|
|
570
587
|
|
|
571
588
|
if stream_steps:
|
|
572
|
-
if return_message_object:
|
|
573
|
-
# TODO implement returning `Message`s in a stream, not just `LettaMessage` format
|
|
574
|
-
raise NotImplementedError
|
|
575
|
-
|
|
576
589
|
# return a stream
|
|
577
590
|
return StreamingResponse(
|
|
578
591
|
sse_async_generator(
|
|
@@ -602,14 +615,7 @@ async def send_message_to_agent(
|
|
|
602
615
|
# If we want to convert these to Message, we can use the attached IDs
|
|
603
616
|
# NOTE: we will need to de-duplicate the Messsage IDs though (since Assistant->Inner+Func_Call)
|
|
604
617
|
# TODO: eventually update the interface to use `Message` and `MessageChunk` (new) inside the deque instead
|
|
605
|
-
|
|
606
|
-
message_ids = [m.id for m in filtered_stream]
|
|
607
|
-
message_ids = deduplicate(message_ids)
|
|
608
|
-
message_objs = [server.get_agent_message(agent_id=agent_id, message_id=m_id) for m_id in message_ids]
|
|
609
|
-
message_objs = [m for m in message_objs if m is not None]
|
|
610
|
-
return LettaResponse(messages=message_objs, usage=usage)
|
|
611
|
-
else:
|
|
612
|
-
return LettaResponse(messages=filtered_stream, usage=usage)
|
|
618
|
+
return LettaResponse(messages=filtered_stream, usage=usage)
|
|
613
619
|
|
|
614
620
|
except HTTPException:
|
|
615
621
|
raise
|
|
@@ -3,7 +3,8 @@ from typing import TYPE_CHECKING, List, Optional
|
|
|
3
3
|
from fastapi import APIRouter, Body, Depends, Header, HTTPException, Query
|
|
4
4
|
|
|
5
5
|
from letta.orm.errors import NoResultFound
|
|
6
|
-
from letta.schemas.block import Block,
|
|
6
|
+
from letta.schemas.block import Block, BlockUpdate, CreateBlock
|
|
7
|
+
from letta.schemas.memory import Memory
|
|
7
8
|
from letta.server.rest_api.utils import get_letta_server
|
|
8
9
|
from letta.server.server import SyncServer
|
|
9
10
|
|
|
@@ -28,7 +29,7 @@ def list_blocks(
|
|
|
28
29
|
|
|
29
30
|
@router.post("/", response_model=Block, operation_id="create_memory_block")
|
|
30
31
|
def create_block(
|
|
31
|
-
create_block:
|
|
32
|
+
create_block: CreateBlock = Body(...),
|
|
32
33
|
server: SyncServer = Depends(get_letta_server),
|
|
33
34
|
user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
34
35
|
):
|
|
@@ -40,12 +41,12 @@ def create_block(
|
|
|
40
41
|
@router.patch("/{block_id}", response_model=Block, operation_id="update_memory_block")
|
|
41
42
|
def update_block(
|
|
42
43
|
block_id: str,
|
|
43
|
-
|
|
44
|
+
update_block: BlockUpdate = Body(...),
|
|
44
45
|
server: SyncServer = Depends(get_letta_server),
|
|
45
46
|
user_id: Optional[str] = Header(None, alias="user_id"),
|
|
46
47
|
):
|
|
47
48
|
actor = server.get_user_or_default(user_id=user_id)
|
|
48
|
-
return server.block_manager.update_block(block_id=block_id, block_update=
|
|
49
|
+
return server.block_manager.update_block(block_id=block_id, block_update=update_block, actor=actor)
|
|
49
50
|
|
|
50
51
|
|
|
51
52
|
@router.delete("/{block_id}", response_model=Block, operation_id="delete_memory_block")
|
|
@@ -64,8 +65,52 @@ def get_block(
|
|
|
64
65
|
server: SyncServer = Depends(get_letta_server),
|
|
65
66
|
user_id: Optional[str] = Header(None, alias="user_id"),
|
|
66
67
|
):
|
|
68
|
+
print("call get block", block_id)
|
|
67
69
|
actor = server.get_user_or_default(user_id=user_id)
|
|
68
70
|
try:
|
|
69
|
-
|
|
71
|
+
block = server.block_manager.get_block_by_id(block_id=block_id, actor=actor)
|
|
72
|
+
if block is None:
|
|
73
|
+
raise HTTPException(status_code=404, detail="Block not found")
|
|
74
|
+
return block
|
|
70
75
|
except NoResultFound:
|
|
71
76
|
raise HTTPException(status_code=404, detail="Block not found")
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@router.patch("/{block_id}/attach", response_model=Block, operation_id="update_agent_memory_block")
|
|
80
|
+
def link_agent_memory_block(
|
|
81
|
+
block_id: str,
|
|
82
|
+
agent_id: str = Query(..., description="The unique identifier of the agent to attach the source to."),
|
|
83
|
+
server: "SyncServer" = Depends(get_letta_server),
|
|
84
|
+
user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
85
|
+
):
|
|
86
|
+
"""
|
|
87
|
+
Link a memory block to an agent.
|
|
88
|
+
"""
|
|
89
|
+
actor = server.get_user_or_default(user_id=user_id)
|
|
90
|
+
|
|
91
|
+
block = server.block_manager.get_block_by_id(block_id=block_id, actor=actor)
|
|
92
|
+
if block is None:
|
|
93
|
+
raise HTTPException(status_code=404, detail="Block not found")
|
|
94
|
+
|
|
95
|
+
server.blocks_agents_manager.add_block_to_agent(agent_id=agent_id, block_id=block_id, block_label=block.label)
|
|
96
|
+
return block
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
@router.patch("/{block_id}/detach", response_model=Memory, operation_id="update_agent_memory_block")
|
|
100
|
+
def unlink_agent_memory_block(
|
|
101
|
+
block_id: str,
|
|
102
|
+
agent_id: str = Query(..., description="The unique identifier of the agent to attach the source to."),
|
|
103
|
+
server: "SyncServer" = Depends(get_letta_server),
|
|
104
|
+
user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
105
|
+
):
|
|
106
|
+
"""
|
|
107
|
+
Unlink a memory block from an agent
|
|
108
|
+
"""
|
|
109
|
+
actor = server.get_user_or_default(user_id=user_id)
|
|
110
|
+
|
|
111
|
+
block = server.block_manager.get_block_by_id(block_id=block_id, actor=actor)
|
|
112
|
+
if block is None:
|
|
113
|
+
raise HTTPException(status_code=404, detail="Block not found")
|
|
114
|
+
# Link the block to the agent
|
|
115
|
+
server.blocks_agents_manager.remove_block_with_id_from_agent(agent_id=agent_id, block_id=block_id)
|
|
116
|
+
return block
|
|
@@ -2,6 +2,7 @@ from typing import List, Optional
|
|
|
2
2
|
|
|
3
3
|
from fastapi import APIRouter, Body, Depends, Header, HTTPException
|
|
4
4
|
|
|
5
|
+
from letta.errors import LettaToolCreateError
|
|
5
6
|
from letta.orm.errors import UniqueConstraintViolationError
|
|
6
7
|
from letta.schemas.tool import Tool, ToolCreate, ToolUpdate
|
|
7
8
|
from letta.server.rest_api.utils import get_letta_server
|
|
@@ -14,12 +15,13 @@ router = APIRouter(prefix="/tools", tags=["tools"])
|
|
|
14
15
|
def delete_tool(
|
|
15
16
|
tool_id: str,
|
|
16
17
|
server: SyncServer = Depends(get_letta_server),
|
|
18
|
+
user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
17
19
|
):
|
|
18
20
|
"""
|
|
19
21
|
Delete a tool by name
|
|
20
22
|
"""
|
|
21
|
-
|
|
22
|
-
server.tool_manager.
|
|
23
|
+
actor = server.get_user_or_default(user_id=user_id)
|
|
24
|
+
server.tool_manager.delete_tool_by_id(tool_id=tool_id, actor=actor)
|
|
23
25
|
|
|
24
26
|
|
|
25
27
|
@router.get("/{tool_id}", response_model=Tool, operation_id="get_tool")
|
|
@@ -91,7 +93,16 @@ def create_tool(
|
|
|
91
93
|
except UniqueConstraintViolationError as e:
|
|
92
94
|
# Log or print the full exception here for debugging
|
|
93
95
|
print(f"Error occurred: {e}")
|
|
94
|
-
|
|
96
|
+
clean_error_message = f"Tool with name {request.name} already exists."
|
|
97
|
+
raise HTTPException(status_code=409, detail=clean_error_message)
|
|
98
|
+
except LettaToolCreateError as e:
|
|
99
|
+
# HTTP 400 == Bad Request
|
|
100
|
+
print(f"Error occurred during tool creation: {e}")
|
|
101
|
+
# print the full stack trace
|
|
102
|
+
import traceback
|
|
103
|
+
|
|
104
|
+
print(traceback.format_exc())
|
|
105
|
+
raise HTTPException(status_code=400, detail=str(e))
|
|
95
106
|
except Exception as e:
|
|
96
107
|
# Catch other unexpected errors and raise an internal server error
|
|
97
108
|
print(f"Unexpected error occurred: {e}")
|