letta-nightly 0.5.4.dev20241126104249__py3-none-any.whl → 0.5.4.dev20241128000451__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (46) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +102 -140
  3. letta/agent_store/chroma.py +2 -0
  4. letta/cli/cli.py +3 -5
  5. letta/client/client.py +360 -117
  6. letta/config.py +2 -2
  7. letta/constants.py +5 -0
  8. letta/errors.py +12 -0
  9. letta/functions/function_sets/base.py +38 -1
  10. letta/functions/functions.py +4 -6
  11. letta/functions/schema_generator.py +6 -5
  12. letta/helpers/tool_rule_solver.py +6 -5
  13. letta/main.py +1 -1
  14. letta/metadata.py +45 -42
  15. letta/o1_agent.py +1 -4
  16. letta/orm/block.py +2 -1
  17. letta/orm/blocks_agents.py +4 -1
  18. letta/orm/sqlalchemy_base.py +13 -0
  19. letta/persistence_manager.py +1 -0
  20. letta/schemas/agent.py +57 -52
  21. letta/schemas/block.py +70 -26
  22. letta/schemas/enums.py +14 -0
  23. letta/schemas/letta_base.py +1 -1
  24. letta/schemas/letta_request.py +11 -23
  25. letta/schemas/letta_response.py +1 -2
  26. letta/schemas/memory.py +31 -100
  27. letta/schemas/message.py +3 -3
  28. letta/schemas/tool_rule.py +13 -5
  29. letta/server/rest_api/interface.py +12 -19
  30. letta/server/rest_api/routers/openai/assistants/threads.py +2 -3
  31. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +0 -2
  32. letta/server/rest_api/routers/v1/agents.py +100 -94
  33. letta/server/rest_api/routers/v1/blocks.py +50 -5
  34. letta/server/rest_api/routers/v1/tools.py +14 -3
  35. letta/server/server.py +246 -460
  36. letta/server/static_files/assets/index-9fa459a2.js +1 -1
  37. letta/services/block_manager.py +23 -4
  38. letta/services/blocks_agents_manager.py +23 -1
  39. letta/services/per_agent_lock_manager.py +18 -0
  40. letta/services/tool_execution_sandbox.py +1 -1
  41. letta/services/tool_manager.py +2 -1
  42. {letta_nightly-0.5.4.dev20241126104249.dist-info → letta_nightly-0.5.4.dev20241128000451.dist-info}/METADATA +1 -1
  43. {letta_nightly-0.5.4.dev20241126104249.dist-info → letta_nightly-0.5.4.dev20241128000451.dist-info}/RECORD +46 -45
  44. {letta_nightly-0.5.4.dev20241126104249.dist-info → letta_nightly-0.5.4.dev20241128000451.dist-info}/LICENSE +0 -0
  45. {letta_nightly-0.5.4.dev20241126104249.dist-info → letta_nightly-0.5.4.dev20241128000451.dist-info}/WHEEL +0 -0
  46. {letta_nightly-0.5.4.dev20241126104249.dist-info → letta_nightly-0.5.4.dev20241128000451.dist-info}/entry_points.txt +0 -0
@@ -271,9 +271,8 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
271
271
  self,
272
272
  multi_step=True,
273
273
  # Related to if we want to try and pass back the AssistantMessage as a special case function
274
- use_assistant_message=False,
275
- assistant_message_function_name=DEFAULT_MESSAGE_TOOL,
276
- assistant_message_function_kwarg=DEFAULT_MESSAGE_TOOL_KWARG,
274
+ assistant_message_tool_name=DEFAULT_MESSAGE_TOOL,
275
+ assistant_message_tool_kwarg=DEFAULT_MESSAGE_TOOL_KWARG,
277
276
  # Related to if we expect inner_thoughts to be in the kwargs
278
277
  inner_thoughts_in_kwargs=True,
279
278
  inner_thoughts_kwarg=INNER_THOUGHTS_KWARG,
@@ -287,7 +286,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
287
286
  self.streaming_chat_completion_mode_function_name = None # NOTE: sadly need to track state during stream
288
287
  # If chat completion mode, we need a special stream reader to
289
288
  # turn function argument to send_message into a normal text stream
290
- self.streaming_chat_completion_json_reader = FunctionArgumentsStreamHandler(json_key=assistant_message_function_kwarg)
289
+ self.streaming_chat_completion_json_reader = FunctionArgumentsStreamHandler(json_key=assistant_message_tool_kwarg)
291
290
 
292
291
  self._chunks = deque()
293
292
  self._event = asyncio.Event() # Use an event to notify when chunks are available
@@ -300,9 +299,9 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
300
299
  self.multi_step_gen_indicator = MessageStreamStatus.done_generation
301
300
 
302
301
  # Support for AssistantMessage
303
- self.use_assistant_message = use_assistant_message
304
- self.assistant_message_function_name = assistant_message_function_name
305
- self.assistant_message_function_kwarg = assistant_message_function_kwarg
302
+ self.use_assistant_message = False # TODO: Remove this
303
+ self.assistant_message_tool_name = assistant_message_tool_name
304
+ self.assistant_message_tool_kwarg = assistant_message_tool_kwarg
306
305
 
307
306
  # Support for inner_thoughts_in_kwargs
308
307
  self.inner_thoughts_in_kwargs = inner_thoughts_in_kwargs
@@ -455,17 +454,14 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
455
454
 
456
455
  # If we get a "hit" on the special keyword we're looking for, we want to skip to the next chunk
457
456
  # TODO I don't think this handles the function name in multi-pieces problem. Instead, we should probably reset the streaming_chat_completion_mode_function_name when we make this hit?
458
- # if self.streaming_chat_completion_mode_function_name == self.assistant_message_function_name:
459
- if tool_call.function.name == self.assistant_message_function_name:
457
+ # if self.streaming_chat_completion_mode_function_name == self.assistant_message_tool_name:
458
+ if tool_call.function.name == self.assistant_message_tool_name:
460
459
  self.streaming_chat_completion_json_reader.reset()
461
460
  # early exit to turn into content mode
462
461
  return None
463
462
 
464
463
  # if we're in the middle of parsing a send_message, we'll keep processing the JSON chunks
465
- if (
466
- tool_call.function.arguments
467
- and self.streaming_chat_completion_mode_function_name == self.assistant_message_function_name
468
- ):
464
+ if tool_call.function.arguments and self.streaming_chat_completion_mode_function_name == self.assistant_message_tool_name:
469
465
  # Strip out any extras tokens
470
466
  cleaned_func_args = self.streaming_chat_completion_json_reader.process_json_chunk(tool_call.function.arguments)
471
467
  # In the case that we just have the prefix of something, no message yet, then we should early exit to move to the next chunk
@@ -500,9 +496,6 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
500
496
  )
501
497
 
502
498
  elif self.inner_thoughts_in_kwargs and tool_call.function:
503
- if self.use_assistant_message:
504
- raise NotImplementedError("inner_thoughts_in_kwargs with use_assistant_message not yet supported")
505
-
506
499
  processed_chunk = None
507
500
 
508
501
  if tool_call.function.name:
@@ -909,13 +902,13 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
909
902
 
910
903
  if (
911
904
  self.use_assistant_message
912
- and function_call.function.name == self.assistant_message_function_name
913
- and self.assistant_message_function_kwarg in func_args
905
+ and function_call.function.name == self.assistant_message_tool_name
906
+ and self.assistant_message_tool_kwarg in func_args
914
907
  ):
915
908
  processed_chunk = AssistantMessage(
916
909
  id=msg_obj.id,
917
910
  date=msg_obj.created_at,
918
- assistant_message=func_args[self.assistant_message_function_kwarg],
911
+ assistant_message=func_args[self.assistant_message_tool_kwarg],
919
912
  )
920
913
  else:
921
914
  processed_chunk = FunctionCallMessage(
@@ -117,7 +117,7 @@ def create_message(
117
117
  tool_call_id=None,
118
118
  name=None,
119
119
  )
120
- agent = server._get_or_load_agent(agent_id=agent_id)
120
+ agent = server.load_agent(agent_id=agent_id)
121
121
  # add message to agent
122
122
  agent._append_to_messages([message])
123
123
 
@@ -161,7 +161,6 @@ def list_messages(
161
161
  before=before_uuid,
162
162
  order_by="created_at",
163
163
  reverse=reverse,
164
- return_message_object=True,
165
164
  )
166
165
  assert isinstance(json_messages, List)
167
166
  assert all([isinstance(message, Message) for message in json_messages])
@@ -247,7 +246,7 @@ def create_run(
247
246
  # TODO: add request.instructions as a message?
248
247
  agent_id = thread_id
249
248
  # TODO: override preset of agent with request.assistant_id
250
- agent = server._get_or_load_agent(agent_id=agent_id)
249
+ agent = server.load_agent(agent_id=agent_id)
251
250
  agent.inner_step(messages=[]) # already has messages added
252
251
  run_id = str(uuid.uuid4())
253
252
  create_time = int(get_utc_time().timestamp())
@@ -68,7 +68,6 @@ async def create_chat_completion(
68
68
  stream_tokens=True,
69
69
  # Turn on ChatCompletion mode (eg remaps send_message to content)
70
70
  chat_completion_mode=True,
71
- return_message_object=False,
72
71
  )
73
72
 
74
73
  else:
@@ -86,7 +85,6 @@ async def create_chat_completion(
86
85
  # Turn streaming OFF
87
86
  stream_steps=False,
88
87
  stream_tokens=False,
89
- return_message_object=False,
90
88
  )
91
89
  # print(response_messages)
92
90
 
@@ -1,24 +1,28 @@
1
1
  import asyncio
2
+ import warnings
2
3
  from datetime import datetime
3
- from typing import Dict, List, Optional, Union
4
+ from typing import List, Optional, Union
4
5
 
5
6
  from fastapi import APIRouter, Body, Depends, Header, HTTPException, Query, status
6
7
  from fastapi.responses import JSONResponse, StreamingResponse
7
8
 
8
9
  from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
9
10
  from letta.schemas.agent import AgentState, CreateAgent, UpdateAgentState
10
- from letta.schemas.block import Block, BlockCreate, BlockLabelUpdate, BlockLimitUpdate
11
+ from letta.schemas.block import ( # , BlockLabelUpdate, BlockLimitUpdate
12
+ Block,
13
+ BlockUpdate,
14
+ CreateBlock,
15
+ )
11
16
  from letta.schemas.enums import MessageStreamStatus
12
17
  from letta.schemas.letta_message import (
13
18
  LegacyLettaMessage,
14
19
  LettaMessage,
15
20
  LettaMessageUnion,
16
21
  )
17
- from letta.schemas.letta_request import LettaRequest
22
+ from letta.schemas.letta_request import LettaRequest, LettaStreamingRequest
18
23
  from letta.schemas.letta_response import LettaResponse
19
24
  from letta.schemas.memory import (
20
25
  ArchivalMemorySummary,
21
- BasicBlockMemory,
22
26
  ContextWindowOverview,
23
27
  CreateArchivalMemory,
24
28
  Memory,
@@ -31,7 +35,6 @@ from letta.schemas.tool import Tool
31
35
  from letta.server.rest_api.interface import StreamingServerInterface
32
36
  from letta.server.rest_api.utils import get_letta_server, sse_async_generator
33
37
  from letta.server.server import SyncServer
34
- from letta.utils import deduplicate
35
38
 
36
39
  # These can be forward refs, but because Fastapi needs them at runtime the must be imported normally
37
40
 
@@ -83,13 +86,6 @@ def create_agent(
83
86
  Create a new agent with the specified configuration.
84
87
  """
85
88
  actor = server.get_user_or_default(user_id=user_id)
86
- agent.user_id = actor.id
87
- # TODO: sarah make general
88
- # TODO: eventually remove this
89
- assert agent.memory is not None # TODO: dont force this, can be None (use default human/person)
90
- blocks = agent.memory.get_blocks()
91
- agent.memory = BasicBlockMemory(blocks=blocks)
92
-
93
89
  return server.create_agent(agent, actor=actor)
94
90
 
95
91
 
@@ -196,6 +192,7 @@ def get_agent_in_context_messages(
196
192
  return server.get_in_context_messages(agent_id=agent_id)
197
193
 
198
194
 
195
+ # TODO: remove? can also get with agent blocks
199
196
  @router.get("/{agent_id}/memory", response_model=Memory, operation_id="get_agent_memory")
200
197
  def get_agent_memory(
201
198
  agent_id: str,
@@ -209,47 +206,40 @@ def get_agent_memory(
209
206
  return server.get_agent_memory(agent_id=agent_id)
210
207
 
211
208
 
212
- @router.patch("/{agent_id}/memory", response_model=Memory, operation_id="update_agent_memory")
213
- def update_agent_memory(
209
+ @router.get("/{agent_id}/memory/block/{block_label}", response_model=Block, operation_id="get_agent_memory_block")
210
+ def get_agent_memory_block(
214
211
  agent_id: str,
215
- request: Dict = Body(...),
212
+ block_label: str,
216
213
  server: "SyncServer" = Depends(get_letta_server),
217
214
  user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
218
215
  ):
219
216
  """
220
- Update the core memory of a specific agent.
221
- This endpoint accepts new memory contents (labels as keys, and values as values) and updates the core memory of the agent identified by the user ID and agent ID.
222
- This endpoint accepts new memory contents to update the core memory of the agent.
223
- This endpoint only supports modifying existing blocks; it does not support deleting/unlinking or creating/linking blocks.
217
+ Retrieve a memory block from an agent.
224
218
  """
225
219
  actor = server.get_user_or_default(user_id=user_id)
226
220
 
227
- memory = server.update_agent_core_memory(user_id=actor.id, agent_id=agent_id, new_memory_contents=request)
228
- return memory
221
+ block_id = server.blocks_agents_manager.get_block_id_for_label(agent_id=agent_id, block_label=block_label)
222
+ return server.block_manager.get_block_by_id(block_id, actor=actor)
229
223
 
230
224
 
231
- @router.patch("/{agent_id}/memory/label", response_model=Memory, operation_id="update_agent_memory_label")
232
- def update_agent_memory_label(
225
+ @router.get("/{agent_id}/memory/block", response_model=List[Block], operation_id="get_agent_memory_blocks")
226
+ def get_agent_memory_blocks(
233
227
  agent_id: str,
234
- update_label: BlockLabelUpdate = Body(...),
235
228
  server: "SyncServer" = Depends(get_letta_server),
236
229
  user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
237
230
  ):
238
231
  """
239
- Update the label of a block in an agent's memory.
232
+ Retrieve the memory blocks of a specific agent.
240
233
  """
241
234
  actor = server.get_user_or_default(user_id=user_id)
242
-
243
- memory = server.update_agent_memory_label(
244
- user_id=actor.id, agent_id=agent_id, current_block_label=update_label.current_label, new_block_label=update_label.new_label
245
- )
246
- return memory
235
+ block_ids = server.blocks_agents_manager.list_block_ids_for_agent(agent_id=agent_id)
236
+ return [server.block_manager.get_block_by_id(block_id, actor=actor) for block_id in block_ids]
247
237
 
248
238
 
249
239
  @router.post("/{agent_id}/memory/block", response_model=Memory, operation_id="add_agent_memory_block")
250
240
  def add_agent_memory_block(
251
241
  agent_id: str,
252
- create_block: BlockCreate = Body(...),
242
+ create_block: CreateBlock = Body(...),
253
243
  server: "SyncServer" = Depends(get_letta_server),
254
244
  user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
255
245
  ):
@@ -268,7 +258,7 @@ def add_agent_memory_block(
268
258
  return updated_memory
269
259
 
270
260
 
271
- @router.delete("/{agent_id}/memory/block/{block_label}", response_model=Memory, operation_id="remove_agent_memory_block")
261
+ @router.delete("/{agent_id}/memory/block/{block_label}", response_model=Memory, operation_id="remove_agent_memory_block_by_label")
272
262
  def remove_agent_memory_block(
273
263
  agent_id: str,
274
264
  # TODO should this be block_id, or the label?
@@ -288,25 +278,24 @@ def remove_agent_memory_block(
288
278
  return updated_memory
289
279
 
290
280
 
291
- @router.patch("/{agent_id}/memory/limit", response_model=Memory, operation_id="update_agent_memory_limit")
292
- def update_agent_memory_limit(
281
+ @router.patch("/{agent_id}/memory/block/{block_label}", response_model=Block, operation_id="update_agent_memory_block_by_label")
282
+ def update_agent_memory_block(
293
283
  agent_id: str,
294
- update_label: BlockLimitUpdate = Body(...),
284
+ block_label: str,
285
+ update_block: BlockUpdate = Body(...),
295
286
  server: "SyncServer" = Depends(get_letta_server),
296
287
  user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
297
288
  ):
298
289
  """
299
- Update the limit of a block in an agent's memory.
290
+ Removes a memory block from an agent by unlnking it. If the block is not linked to any other agent, it is deleted.
300
291
  """
301
292
  actor = server.get_user_or_default(user_id=user_id)
302
293
 
303
- memory = server.update_agent_memory_limit(
304
- user_id=actor.id,
305
- agent_id=agent_id,
306
- block_label=update_label.label,
307
- limit=update_label.limit,
308
- )
309
- return memory
294
+ # get the block_id from the label
295
+ block_id = server.blocks_agents_manager.get_block_id_for_label(agent_id=agent_id, block_label=block_label)
296
+
297
+ # update the block
298
+ return server.block_manager.update_block(block_id=block_id, block_update=update_block, actor=actor)
310
299
 
311
300
 
312
301
  @router.get("/{agent_id}/memory/recall", response_model=RecallMemorySummary, operation_id="get_agent_recall_memory_summary")
@@ -402,17 +391,13 @@ def get_agent_messages(
402
391
  limit: int = Query(10, description="Maximum number of messages to retrieve."),
403
392
  msg_object: bool = Query(False, description="If true, returns Message objects. If false, return LettaMessage objects."),
404
393
  # Flags to support the use of AssistantMessage message types
405
- use_assistant_message: bool = Query(
406
- False,
407
- description="[Only applicable if msg_object is False] If true, returns AssistantMessage objects when the agent calls a designated message tool. If false, return FunctionCallMessage objects for all tool calls.",
408
- ),
409
- assistant_message_function_name: str = Query(
394
+ assistant_message_tool_name: str = Query(
410
395
  DEFAULT_MESSAGE_TOOL,
411
- description="[Only applicable if use_assistant_message is True] The name of the designated message tool.",
396
+ description="The name of the designated message tool.",
412
397
  ),
413
- assistant_message_function_kwarg: str = Query(
398
+ assistant_message_tool_kwarg: str = Query(
414
399
  DEFAULT_MESSAGE_TOOL_KWARG,
415
- description="[Only applicable if use_assistant_message is True] The name of the message argument in the designated message tool.",
400
+ description="The name of the message argument in the designated message tool.",
416
401
  ),
417
402
  user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
418
403
  ):
@@ -428,9 +413,8 @@ def get_agent_messages(
428
413
  limit=limit,
429
414
  reverse=True,
430
415
  return_message_object=msg_object,
431
- use_assistant_message=use_assistant_message,
432
- assistant_message_function_name=assistant_message_function_name,
433
- assistant_message_function_kwarg=assistant_message_function_kwarg,
416
+ assistant_message_tool_name=assistant_message_tool_name,
417
+ assistant_message_tool_kwarg=assistant_message_tool_kwarg,
434
418
  )
435
419
 
436
420
 
@@ -450,45 +434,77 @@ def update_message(
450
434
 
451
435
  @router.post(
452
436
  "/{agent_id}/messages",
437
+ response_model=LettaResponse,
438
+ operation_id="create_agent_message",
439
+ )
440
+ async def send_message(
441
+ agent_id: str,
442
+ server: SyncServer = Depends(get_letta_server),
443
+ request: LettaRequest = Body(...),
444
+ user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
445
+ ):
446
+ """
447
+ Process a user message and return the agent's response.
448
+ This endpoint accepts a message from a user and processes it through the agent.
449
+ """
450
+ actor = server.get_user_or_default(user_id=user_id)
451
+
452
+ agent_lock = server.per_agent_lock_manager.get_lock(agent_id)
453
+ async with agent_lock:
454
+ result = await send_message_to_agent(
455
+ server=server,
456
+ agent_id=agent_id,
457
+ user_id=actor.id,
458
+ messages=request.messages,
459
+ stream_steps=False,
460
+ stream_tokens=False,
461
+ # Support for AssistantMessage
462
+ assistant_message_tool_name=request.assistant_message_tool_name,
463
+ assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
464
+ )
465
+ return result
466
+
467
+
468
+ @router.post(
469
+ "/{agent_id}/messages/stream",
453
470
  response_model=None,
454
471
  operation_id="create_agent_message",
455
472
  responses={
456
473
  200: {
457
474
  "description": "Successful response",
458
475
  "content": {
459
- "application/json": {"$ref": "#/components/schemas/LettaResponse"}, # Use model_json_schema() instead of model directly
460
476
  "text/event-stream": {"description": "Server-Sent Events stream"},
461
477
  },
462
478
  }
463
479
  },
464
480
  )
465
- async def send_message(
481
+ async def send_message_streaming(
466
482
  agent_id: str,
467
483
  server: SyncServer = Depends(get_letta_server),
468
- request: LettaRequest = Body(...),
484
+ request: LettaStreamingRequest = Body(...),
469
485
  user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
470
486
  ):
471
487
  """
472
488
  Process a user message and return the agent's response.
473
489
  This endpoint accepts a message from a user and processes it through the agent.
474
- It can optionally stream the response if 'stream_steps' or 'stream_tokens' is set to True.
490
+ It will stream the steps of the response always, and stream the tokens if 'stream_tokens' is set to True.
475
491
  """
476
492
  actor = server.get_user_or_default(user_id=user_id)
477
493
 
478
- result = await send_message_to_agent(
479
- server=server,
480
- agent_id=agent_id,
481
- user_id=actor.id,
482
- messages=request.messages,
483
- stream_steps=request.stream_steps,
484
- stream_tokens=request.stream_tokens,
485
- return_message_object=request.return_message_object,
486
- # Support for AssistantMessage
487
- use_assistant_message=request.use_assistant_message,
488
- assistant_message_function_name=request.assistant_message_function_name,
489
- assistant_message_function_kwarg=request.assistant_message_function_kwarg,
490
- )
491
- return result
494
+ agent_lock = server.per_agent_lock_manager.get_lock(agent_id)
495
+ async with agent_lock:
496
+ result = await send_message_to_agent(
497
+ server=server,
498
+ agent_id=agent_id,
499
+ user_id=actor.id,
500
+ messages=request.messages,
501
+ stream_steps=True,
502
+ stream_tokens=request.stream_tokens,
503
+ # Support for AssistantMessage
504
+ assistant_message_tool_name=request.assistant_message_tool_name,
505
+ assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
506
+ )
507
+ return result
492
508
 
493
509
 
494
510
  # TODO: move this into server.py?
@@ -501,13 +517,11 @@ async def send_message_to_agent(
501
517
  stream_steps: bool,
502
518
  stream_tokens: bool,
503
519
  # related to whether or not we return `LettaMessage`s or `Message`s
504
- return_message_object: bool, # Should be True for Python Client, False for REST API
505
520
  chat_completion_mode: bool = False,
506
521
  timestamp: Optional[datetime] = None,
507
522
  # Support for AssistantMessage
508
- use_assistant_message: bool = False,
509
- assistant_message_function_name: str = DEFAULT_MESSAGE_TOOL,
510
- assistant_message_function_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
523
+ assistant_message_tool_name: str = DEFAULT_MESSAGE_TOOL,
524
+ assistant_message_tool_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
511
525
  ) -> Union[StreamingResponse, LettaResponse]:
512
526
  """Split off into a separate function so that it can be imported in the /chat/completion proxy."""
513
527
 
@@ -524,13 +538,16 @@ async def send_message_to_agent(
524
538
 
525
539
  # Get the generator object off of the agent's streaming interface
526
540
  # This will be attached to the POST SSE request used under-the-hood
527
- letta_agent = server._get_or_load_agent(agent_id=agent_id)
541
+ # letta_agent = server.load_agent(agent_id=agent_id)
542
+ letta_agent = server.load_agent(agent_id=agent_id)
528
543
 
529
544
  # Disable token streaming if not OpenAI
530
545
  # TODO: cleanup this logic
531
546
  llm_config = letta_agent.agent_state.llm_config
532
- if llm_config.model_endpoint_type != "openai" or "inference.memgpt.ai" in llm_config.model_endpoint:
533
- print("Warning: token streaming is only supported for OpenAI models. Setting to False.")
547
+ if stream_tokens and (llm_config.model_endpoint_type != "openai" or "inference.memgpt.ai" in llm_config.model_endpoint):
548
+ warnings.warn(
549
+ "Token streaming is only supported for models with type 'openai' or `inference.memgpt.ai` in the model_endpoint: agent has endpoint type {llm_config.model_endpoint_type} and {llm_config.model_endpoint}. Setting stream_tokens to False."
550
+ )
534
551
  stream_tokens = False
535
552
 
536
553
  # Create a new interface per request
@@ -548,9 +565,8 @@ async def send_message_to_agent(
548
565
  # streaming_interface.function_call_legacy_mode = stream
549
566
 
550
567
  # Allow AssistantMessage is desired by client
551
- streaming_interface.use_assistant_message = use_assistant_message
552
- streaming_interface.assistant_message_function_name = assistant_message_function_name
553
- streaming_interface.assistant_message_function_kwarg = assistant_message_function_kwarg
568
+ streaming_interface.assistant_message_tool_name = assistant_message_tool_name
569
+ streaming_interface.assistant_message_tool_kwarg = assistant_message_tool_kwarg
554
570
 
555
571
  # Related to JSON buffer reader
556
572
  streaming_interface.inner_thoughts_in_kwargs = (
@@ -565,14 +581,11 @@ async def send_message_to_agent(
565
581
  user_id=user_id,
566
582
  agent_id=agent_id,
567
583
  messages=messages,
584
+ interface=streaming_interface,
568
585
  )
569
586
  )
570
587
 
571
588
  if stream_steps:
572
- if return_message_object:
573
- # TODO implement returning `Message`s in a stream, not just `LettaMessage` format
574
- raise NotImplementedError
575
-
576
589
  # return a stream
577
590
  return StreamingResponse(
578
591
  sse_async_generator(
@@ -602,14 +615,7 @@ async def send_message_to_agent(
602
615
  # If we want to convert these to Message, we can use the attached IDs
603
616
  # NOTE: we will need to de-duplicate the Messsage IDs though (since Assistant->Inner+Func_Call)
604
617
  # TODO: eventually update the interface to use `Message` and `MessageChunk` (new) inside the deque instead
605
- if return_message_object:
606
- message_ids = [m.id for m in filtered_stream]
607
- message_ids = deduplicate(message_ids)
608
- message_objs = [server.get_agent_message(agent_id=agent_id, message_id=m_id) for m_id in message_ids]
609
- message_objs = [m for m in message_objs if m is not None]
610
- return LettaResponse(messages=message_objs, usage=usage)
611
- else:
612
- return LettaResponse(messages=filtered_stream, usage=usage)
618
+ return LettaResponse(messages=filtered_stream, usage=usage)
613
619
 
614
620
  except HTTPException:
615
621
  raise
@@ -3,7 +3,8 @@ from typing import TYPE_CHECKING, List, Optional
3
3
  from fastapi import APIRouter, Body, Depends, Header, HTTPException, Query
4
4
 
5
5
  from letta.orm.errors import NoResultFound
6
- from letta.schemas.block import Block, BlockCreate, BlockUpdate
6
+ from letta.schemas.block import Block, BlockUpdate, CreateBlock
7
+ from letta.schemas.memory import Memory
7
8
  from letta.server.rest_api.utils import get_letta_server
8
9
  from letta.server.server import SyncServer
9
10
 
@@ -28,7 +29,7 @@ def list_blocks(
28
29
 
29
30
  @router.post("/", response_model=Block, operation_id="create_memory_block")
30
31
  def create_block(
31
- create_block: BlockCreate = Body(...),
32
+ create_block: CreateBlock = Body(...),
32
33
  server: SyncServer = Depends(get_letta_server),
33
34
  user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
34
35
  ):
@@ -40,12 +41,12 @@ def create_block(
40
41
  @router.patch("/{block_id}", response_model=Block, operation_id="update_memory_block")
41
42
  def update_block(
42
43
  block_id: str,
43
- updated_block: BlockUpdate = Body(...),
44
+ update_block: BlockUpdate = Body(...),
44
45
  server: SyncServer = Depends(get_letta_server),
45
46
  user_id: Optional[str] = Header(None, alias="user_id"),
46
47
  ):
47
48
  actor = server.get_user_or_default(user_id=user_id)
48
- return server.block_manager.update_block(block_id=block_id, block_update=updated_block, actor=actor)
49
+ return server.block_manager.update_block(block_id=block_id, block_update=update_block, actor=actor)
49
50
 
50
51
 
51
52
  @router.delete("/{block_id}", response_model=Block, operation_id="delete_memory_block")
@@ -64,8 +65,52 @@ def get_block(
64
65
  server: SyncServer = Depends(get_letta_server),
65
66
  user_id: Optional[str] = Header(None, alias="user_id"),
66
67
  ):
68
+ print("call get block", block_id)
67
69
  actor = server.get_user_or_default(user_id=user_id)
68
70
  try:
69
- return server.block_manager.get_block_by_id(block_id=block_id, actor=actor)
71
+ block = server.block_manager.get_block_by_id(block_id=block_id, actor=actor)
72
+ if block is None:
73
+ raise HTTPException(status_code=404, detail="Block not found")
74
+ return block
70
75
  except NoResultFound:
71
76
  raise HTTPException(status_code=404, detail="Block not found")
77
+
78
+
79
+ @router.patch("/{block_id}/attach", response_model=Block, operation_id="update_agent_memory_block")
80
+ def link_agent_memory_block(
81
+ block_id: str,
82
+ agent_id: str = Query(..., description="The unique identifier of the agent to attach the source to."),
83
+ server: "SyncServer" = Depends(get_letta_server),
84
+ user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
85
+ ):
86
+ """
87
+ Link a memory block to an agent.
88
+ """
89
+ actor = server.get_user_or_default(user_id=user_id)
90
+
91
+ block = server.block_manager.get_block_by_id(block_id=block_id, actor=actor)
92
+ if block is None:
93
+ raise HTTPException(status_code=404, detail="Block not found")
94
+
95
+ server.blocks_agents_manager.add_block_to_agent(agent_id=agent_id, block_id=block_id, block_label=block.label)
96
+ return block
97
+
98
+
99
+ @router.patch("/{block_id}/detach", response_model=Memory, operation_id="update_agent_memory_block")
100
+ def unlink_agent_memory_block(
101
+ block_id: str,
102
+ agent_id: str = Query(..., description="The unique identifier of the agent to attach the source to."),
103
+ server: "SyncServer" = Depends(get_letta_server),
104
+ user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
105
+ ):
106
+ """
107
+ Unlink a memory block from an agent
108
+ """
109
+ actor = server.get_user_or_default(user_id=user_id)
110
+
111
+ block = server.block_manager.get_block_by_id(block_id=block_id, actor=actor)
112
+ if block is None:
113
+ raise HTTPException(status_code=404, detail="Block not found")
114
+ # Link the block to the agent
115
+ server.blocks_agents_manager.remove_block_with_id_from_agent(agent_id=agent_id, block_id=block_id)
116
+ return block
@@ -2,6 +2,7 @@ from typing import List, Optional
2
2
 
3
3
  from fastapi import APIRouter, Body, Depends, Header, HTTPException
4
4
 
5
+ from letta.errors import LettaToolCreateError
5
6
  from letta.orm.errors import UniqueConstraintViolationError
6
7
  from letta.schemas.tool import Tool, ToolCreate, ToolUpdate
7
8
  from letta.server.rest_api.utils import get_letta_server
@@ -14,12 +15,13 @@ router = APIRouter(prefix="/tools", tags=["tools"])
14
15
  def delete_tool(
15
16
  tool_id: str,
16
17
  server: SyncServer = Depends(get_letta_server),
18
+ user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
17
19
  ):
18
20
  """
19
21
  Delete a tool by name
20
22
  """
21
- # actor = server.get_user_or_default(user_id=user_id)
22
- server.tool_manager.delete_tool(tool_id=tool_id)
23
+ actor = server.get_user_or_default(user_id=user_id)
24
+ server.tool_manager.delete_tool_by_id(tool_id=tool_id, actor=actor)
23
25
 
24
26
 
25
27
  @router.get("/{tool_id}", response_model=Tool, operation_id="get_tool")
@@ -91,7 +93,16 @@ def create_tool(
91
93
  except UniqueConstraintViolationError as e:
92
94
  # Log or print the full exception here for debugging
93
95
  print(f"Error occurred: {e}")
94
- raise HTTPException(status_code=409, detail=str(e))
96
+ clean_error_message = f"Tool with name {request.name} already exists."
97
+ raise HTTPException(status_code=409, detail=clean_error_message)
98
+ except LettaToolCreateError as e:
99
+ # HTTP 400 == Bad Request
100
+ print(f"Error occurred during tool creation: {e}")
101
+ # print the full stack trace
102
+ import traceback
103
+
104
+ print(traceback.format_exc())
105
+ raise HTTPException(status_code=400, detail=str(e))
95
106
  except Exception as e:
96
107
  # Catch other unexpected errors and raise an internal server error
97
108
  print(f"Unexpected error occurred: {e}")