letta-nightly 0.6.48.dev20250406104033__py3-none-any.whl → 0.6.49.dev20250408030511__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (87) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +47 -12
  3. letta/agents/base_agent.py +7 -4
  4. letta/agents/helpers.py +52 -0
  5. letta/agents/letta_agent.py +105 -42
  6. letta/agents/voice_agent.py +2 -2
  7. letta/constants.py +13 -1
  8. letta/errors.py +10 -3
  9. letta/functions/function_sets/base.py +65 -0
  10. letta/functions/interface.py +2 -2
  11. letta/functions/mcp_client/base_client.py +18 -1
  12. letta/{dynamic_multi_agent.py → groups/dynamic_multi_agent.py} +3 -0
  13. letta/groups/helpers.py +113 -0
  14. letta/{round_robin_multi_agent.py → groups/round_robin_multi_agent.py} +2 -0
  15. letta/groups/sleeptime_multi_agent.py +259 -0
  16. letta/{supervisor_multi_agent.py → groups/supervisor_multi_agent.py} +1 -0
  17. letta/helpers/converters.py +109 -7
  18. letta/helpers/message_helper.py +1 -0
  19. letta/helpers/tool_rule_solver.py +40 -23
  20. letta/interface.py +12 -5
  21. letta/interfaces/anthropic_streaming_interface.py +329 -0
  22. letta/llm_api/anthropic.py +12 -1
  23. letta/llm_api/anthropic_client.py +65 -14
  24. letta/llm_api/azure_openai.py +2 -2
  25. letta/llm_api/google_ai_client.py +13 -2
  26. letta/llm_api/google_constants.py +3 -0
  27. letta/llm_api/google_vertex_client.py +2 -2
  28. letta/llm_api/llm_api_tools.py +1 -1
  29. letta/llm_api/llm_client.py +7 -0
  30. letta/llm_api/llm_client_base.py +2 -7
  31. letta/llm_api/openai.py +7 -1
  32. letta/llm_api/openai_client.py +250 -0
  33. letta/orm/__init__.py +4 -0
  34. letta/orm/agent.py +6 -0
  35. letta/orm/block.py +32 -2
  36. letta/orm/block_history.py +46 -0
  37. letta/orm/custom_columns.py +60 -0
  38. letta/orm/enums.py +7 -0
  39. letta/orm/group.py +6 -0
  40. letta/orm/groups_blocks.py +13 -0
  41. letta/orm/llm_batch_items.py +55 -0
  42. letta/orm/llm_batch_job.py +48 -0
  43. letta/orm/message.py +7 -1
  44. letta/orm/organization.py +2 -0
  45. letta/orm/sqlalchemy_base.py +18 -15
  46. letta/prompts/system/memgpt_sleeptime_chat.txt +52 -0
  47. letta/prompts/system/sleeptime.txt +26 -0
  48. letta/schemas/agent.py +13 -1
  49. letta/schemas/enums.py +17 -2
  50. letta/schemas/group.py +14 -1
  51. letta/schemas/letta_message.py +5 -3
  52. letta/schemas/llm_batch_job.py +53 -0
  53. letta/schemas/llm_config.py +14 -4
  54. letta/schemas/message.py +44 -0
  55. letta/schemas/tool.py +3 -0
  56. letta/schemas/usage.py +1 -0
  57. letta/server/db.py +2 -0
  58. letta/server/rest_api/app.py +1 -1
  59. letta/server/rest_api/chat_completions_interface.py +8 -3
  60. letta/server/rest_api/interface.py +36 -7
  61. letta/server/rest_api/routers/v1/agents.py +53 -39
  62. letta/server/rest_api/routers/v1/runs.py +14 -2
  63. letta/server/rest_api/utils.py +15 -4
  64. letta/server/server.py +120 -71
  65. letta/services/agent_manager.py +70 -6
  66. letta/services/block_manager.py +190 -2
  67. letta/services/group_manager.py +68 -0
  68. letta/services/helpers/agent_manager_helper.py +6 -4
  69. letta/services/llm_batch_manager.py +139 -0
  70. letta/services/message_manager.py +17 -31
  71. letta/services/tool_executor/tool_execution_sandbox.py +1 -3
  72. letta/services/tool_executor/tool_executor.py +9 -20
  73. letta/services/tool_manager.py +14 -3
  74. letta/services/tool_sandbox/__init__.py +0 -0
  75. letta/services/tool_sandbox/base.py +188 -0
  76. letta/services/tool_sandbox/e2b_sandbox.py +116 -0
  77. letta/services/tool_sandbox/local_sandbox.py +221 -0
  78. letta/sleeptime_agent.py +61 -0
  79. letta/streaming_interface.py +20 -10
  80. letta/utils.py +4 -0
  81. {letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/METADATA +2 -2
  82. {letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/RECORD +85 -69
  83. letta/offline_memory_agent.py +0 -173
  84. letta/services/tool_executor/async_tool_execution_sandbox.py +0 -397
  85. {letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/LICENSE +0 -0
  86. {letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/WHEEL +0 -0
  87. {letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/entry_points.txt +0 -0
@@ -19,6 +19,7 @@ router = APIRouter(prefix="/runs", tags=["runs"])
19
19
  @router.get("/", response_model=List[Run], operation_id="list_runs")
20
20
  def list_runs(
21
21
  server: "SyncServer" = Depends(get_letta_server),
22
+ agent_ids: Optional[List[str]] = Query(None, description="The unique identifier of the agent associated with the run."),
22
23
  actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
23
24
  ):
24
25
  """
@@ -26,12 +27,18 @@ def list_runs(
26
27
  """
27
28
  actor = server.user_manager.get_user_or_default(user_id=actor_id)
28
29
 
29
- return [Run.from_job(job) for job in server.job_manager.list_jobs(actor=actor, job_type=JobType.RUN)]
30
+ runs = [Run.from_job(job) for job in server.job_manager.list_jobs(actor=actor, job_type=JobType.RUN)]
31
+
32
+ if not agent_ids:
33
+ return runs
34
+
35
+ return [run for run in runs if "agent_id" in run.metadata and run.metadata["agent_id"] in agent_ids]
30
36
 
31
37
 
32
38
  @router.get("/active", response_model=List[Run], operation_id="list_active_runs")
33
39
  def list_active_runs(
34
40
  server: "SyncServer" = Depends(get_letta_server),
41
+ agent_ids: Optional[List[str]] = Query(None, description="The unique identifier of the agent associated with the run."),
35
42
  actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
36
43
  ):
37
44
  """
@@ -41,7 +48,12 @@ def list_active_runs(
41
48
 
42
49
  active_runs = server.job_manager.list_jobs(actor=actor, statuses=[JobStatus.created, JobStatus.running], job_type=JobType.RUN)
43
50
 
44
- return [Run.from_job(job) for job in active_runs]
51
+ active_runs = [Run.from_job(job) for job in active_runs]
52
+
53
+ if not agent_ids:
54
+ return active_runs
55
+
56
+ return [run for run in active_runs if "agent_id" in run.metadata and run.metadata["agent_id"] in agent_ids]
45
57
 
46
58
 
47
59
  @router.get("/{run_id}", response_model=Run, operation_id="retrieve_run")
@@ -18,7 +18,7 @@ from letta.errors import ContextWindowExceededError, RateLimitExceededError
18
18
  from letta.helpers.datetime_helpers import get_utc_time
19
19
  from letta.log import get_logger
20
20
  from letta.schemas.enums import MessageRole
21
- from letta.schemas.letta_message_content import TextContent
21
+ from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
22
22
  from letta.schemas.message import Message
23
23
  from letta.schemas.usage import LettaUsageStatistics
24
24
  from letta.schemas.user import User
@@ -167,7 +167,7 @@ def create_user_message(input_message: dict, agent_id: str, actor: User) -> Mess
167
167
  return user_message
168
168
 
169
169
 
170
- def create_tool_call_messages_from_openai_response(
170
+ def create_letta_messages_from_llm_response(
171
171
  agent_id: str,
172
172
  model: str,
173
173
  function_name: str,
@@ -177,6 +177,9 @@ def create_tool_call_messages_from_openai_response(
177
177
  function_response: Optional[str],
178
178
  actor: User,
179
179
  add_heartbeat_request_system_message: bool = False,
180
+ reasoning_content: Optional[List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent]]] = None,
181
+ pre_computed_assistant_message_id: Optional[str] = None,
182
+ pre_computed_tool_message_id: Optional[str] = None,
180
183
  ) -> List[Message]:
181
184
  messages = []
182
185
 
@@ -190,9 +193,11 @@ def create_tool_call_messages_from_openai_response(
190
193
  ),
191
194
  type="function",
192
195
  )
196
+ # TODO: Use ToolCallContent instead of tool_calls
197
+ # TODO: This helps preserve ordering
193
198
  assistant_message = Message(
194
199
  role=MessageRole.assistant,
195
- content=[],
200
+ content=reasoning_content if reasoning_content else [],
196
201
  organization_id=actor.organization_id,
197
202
  agent_id=agent_id,
198
203
  model=model,
@@ -200,8 +205,12 @@ def create_tool_call_messages_from_openai_response(
200
205
  tool_call_id=tool_call_id,
201
206
  created_at=get_utc_time(),
202
207
  )
208
+ if pre_computed_assistant_message_id:
209
+ assistant_message.id = pre_computed_assistant_message_id
203
210
  messages.append(assistant_message)
204
211
 
212
+ # TODO: Use ToolReturnContent instead of TextContent
213
+ # TODO: This helps preserve ordering
205
214
  tool_message = Message(
206
215
  role=MessageRole.tool,
207
216
  content=[TextContent(text=package_function_response(function_call_success, function_response))],
@@ -212,6 +221,8 @@ def create_tool_call_messages_from_openai_response(
212
221
  tool_call_id=tool_call_id,
213
222
  created_at=get_utc_time(),
214
223
  )
224
+ if pre_computed_tool_message_id:
225
+ tool_message.id = pre_computed_tool_message_id
215
226
  messages.append(tool_message)
216
227
 
217
228
  if add_heartbeat_request_system_message:
@@ -243,7 +254,7 @@ def create_assistant_messages_from_openai_response(
243
254
  """
244
255
  tool_call_id = str(uuid.uuid4())
245
256
 
246
- return create_tool_call_messages_from_openai_response(
257
+ return create_letta_messages_from_llm_response(
247
258
  agent_id=agent_id,
248
259
  model=model,
249
260
  function_name=DEFAULT_MESSAGE_TOOL,
letta/server/server.py CHANGED
@@ -19,11 +19,11 @@ import letta.system as system
19
19
  from letta.agent import Agent, save_agent
20
20
  from letta.config import LettaConfig
21
21
  from letta.data_sources.connectors import DataConnector, load_data
22
- from letta.dynamic_multi_agent import DynamicMultiAgent
23
22
  from letta.functions.mcp_client.base_client import BaseMCPClient
24
23
  from letta.functions.mcp_client.sse_client import MCP_CONFIG_TOPLEVEL_KEY, SSEMCPClient
25
24
  from letta.functions.mcp_client.stdio_client import StdioMCPClient
26
25
  from letta.functions.mcp_client.types import MCPServerType, MCPTool, SSEServerConfig, StdioServerConfig
26
+ from letta.groups.helpers import load_multi_agent
27
27
  from letta.helpers.datetime_helpers import get_utc_time
28
28
  from letta.helpers.json_helpers import json_dumps, json_loads
29
29
  from letta.helpers.message_helper import prepare_input_message_create
@@ -32,17 +32,15 @@ from letta.helpers.message_helper import prepare_input_message_create
32
32
  from letta.interface import AgentInterface # abstract
33
33
  from letta.interface import CLIInterface # for printing to terminal
34
34
  from letta.log import get_logger
35
- from letta.offline_memory_agent import OfflineMemoryAgent
36
35
  from letta.orm.errors import NoResultFound
37
- from letta.round_robin_multi_agent import RoundRobinMultiAgent
38
36
  from letta.schemas.agent import AgentState, AgentType, CreateAgent, UpdateAgent
39
- from letta.schemas.block import BlockUpdate
37
+ from letta.schemas.block import BlockUpdate, CreateBlock
40
38
  from letta.schemas.embedding_config import EmbeddingConfig
41
39
 
42
40
  # openai schemas
43
41
  from letta.schemas.enums import JobStatus, MessageStreamStatus
44
42
  from letta.schemas.environment_variables import SandboxEnvironmentVariableCreate
45
- from letta.schemas.group import Group, ManagerType
43
+ from letta.schemas.group import GroupCreate, SleeptimeManager
46
44
  from letta.schemas.job import Job, JobUpdate
47
45
  from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage, ToolReturnMessage
48
46
  from letta.schemas.letta_message_content import TextContent
@@ -83,6 +81,7 @@ from letta.services.block_manager import BlockManager
83
81
  from letta.services.group_manager import GroupManager
84
82
  from letta.services.identity_manager import IdentityManager
85
83
  from letta.services.job_manager import JobManager
84
+ from letta.services.llm_batch_manager import LLMBatchManager
86
85
  from letta.services.message_manager import MessageManager
87
86
  from letta.services.organization_manager import OrganizationManager
88
87
  from letta.services.passage_manager import PassageManager
@@ -94,9 +93,9 @@ from letta.services.tool_executor.tool_execution_sandbox import ToolExecutionSan
94
93
  from letta.services.tool_manager import ToolManager
95
94
  from letta.services.user_manager import UserManager
96
95
  from letta.settings import model_settings, settings, tool_settings
97
- from letta.supervisor_multi_agent import SupervisorMultiAgent
96
+ from letta.sleeptime_agent import SleeptimeAgent
98
97
  from letta.tracing import trace_method
99
- from letta.utils import get_friendly_error_msg
98
+ from letta.utils import get_friendly_error_msg, make_key
100
99
 
101
100
  config = LettaConfig.load()
102
101
  logger = get_logger(__name__)
@@ -209,6 +208,7 @@ class SyncServer(Server):
209
208
  self.step_manager = StepManager()
210
209
  self.identity_manager = IdentityManager()
211
210
  self.group_manager = GroupManager()
211
+ self.batch_manager = LLMBatchManager()
212
212
 
213
213
  # Make default user and org
214
214
  if init_with_default_org_and_user:
@@ -348,65 +348,28 @@ class SyncServer(Server):
348
348
  logger.info(f"MCP tools connected: {', '.join([t.name for t in mcp_tools])}")
349
349
  logger.debug(f"MCP tools: {', '.join([str(t) for t in mcp_tools])}")
350
350
 
351
+ # TODO: Remove these in memory caches
352
+ self._llm_config_cache = {}
353
+ self._embedding_config_cache = {}
354
+
351
355
  def load_agent(self, agent_id: str, actor: User, interface: Union[AgentInterface, None] = None) -> Agent:
352
356
  """Updated method to load agents from persisted storage"""
353
357
  agent_state = self.agent_manager.get_agent_by_id(agent_id=agent_id, actor=actor)
354
358
  if agent_state.multi_agent_group:
355
- return self.load_multi_agent(agent_state.multi_agent_group, actor, interface, agent_state)
359
+ return load_multi_agent(
360
+ group=agent_state.multi_agent_group, agent_state=agent_state, actor=actor, interface=interface, mcp_clients=self.mcp_clients
361
+ )
356
362
 
357
363
  interface = interface or self.default_interface_factory()
358
364
  if agent_state.agent_type == AgentType.memgpt_agent:
359
365
  agent = Agent(agent_state=agent_state, interface=interface, user=actor, mcp_clients=self.mcp_clients)
360
- elif agent_state.agent_type == AgentType.offline_memory_agent:
361
- agent = OfflineMemoryAgent(agent_state=agent_state, interface=interface, user=actor)
366
+ elif agent_state.agent_type == AgentType.sleeptime_agent:
367
+ agent = SleeptimeAgent(agent_state=agent_state, interface=interface, user=actor)
362
368
  else:
363
369
  raise ValueError(f"Invalid agent type {agent_state.agent_type}")
364
370
 
365
371
  return agent
366
372
 
367
- def load_multi_agent(
368
- self, group: Group, actor: User, interface: Union[AgentInterface, None] = None, agent_state: Optional[AgentState] = None
369
- ) -> Agent:
370
- if len(group.agent_ids) == 0:
371
- raise ValueError("Empty group: group must have at least one agent")
372
-
373
- match group.manager_type:
374
- case ManagerType.round_robin:
375
- agent_state = agent_state or self.agent_manager.get_agent_by_id(agent_id=group.agent_ids[0], actor=actor)
376
- return RoundRobinMultiAgent(
377
- agent_state=agent_state,
378
- interface=interface,
379
- user=actor,
380
- group_id=group.id,
381
- agent_ids=group.agent_ids,
382
- description=group.description,
383
- max_turns=group.max_turns,
384
- )
385
- case ManagerType.dynamic:
386
- agent_state = agent_state or self.agent_manager.get_agent_by_id(agent_id=group.manager_agent_id, actor=actor)
387
- return DynamicMultiAgent(
388
- agent_state=agent_state,
389
- interface=interface,
390
- user=actor,
391
- group_id=group.id,
392
- agent_ids=group.agent_ids,
393
- description=group.description,
394
- max_turns=group.max_turns,
395
- termination_token=group.termination_token,
396
- )
397
- case ManagerType.supervisor:
398
- agent_state = agent_state or self.agent_manager.get_agent_by_id(agent_id=group.manager_agent_id, actor=actor)
399
- return SupervisorMultiAgent(
400
- agent_state=agent_state,
401
- interface=interface,
402
- user=actor,
403
- group_id=group.id,
404
- agent_ids=group.agent_ids,
405
- description=group.description,
406
- )
407
- case _:
408
- raise ValueError(f"Type {group.manager_type} is not supported.")
409
-
410
373
  def _step(
411
374
  self,
412
375
  actor: User,
@@ -739,6 +702,18 @@ class SyncServer(Server):
739
702
  command = command[1:] # strip the prefix
740
703
  return self._command(user_id=user_id, agent_id=agent_id, command=command)
741
704
 
705
+ def get_cached_llm_config(self, **kwargs):
706
+ key = make_key(**kwargs)
707
+ if key not in self._llm_config_cache:
708
+ self._llm_config_cache[key] = self.get_llm_config_from_handle(**kwargs)
709
+ return self._llm_config_cache[key]
710
+
711
+ def get_cached_embedding_config(self, **kwargs):
712
+ key = make_key(**kwargs)
713
+ if key not in self._embedding_config_cache:
714
+ self._embedding_config_cache[key] = self.get_embedding_config_from_handle(**kwargs)
715
+ return self._embedding_config_cache[key]
716
+
742
717
  def create_agent(
743
718
  self,
744
719
  request: CreateAgent,
@@ -749,7 +724,7 @@ class SyncServer(Server):
749
724
  if request.llm_config is None:
750
725
  if request.model is None:
751
726
  raise ValueError("Must specify either model or llm_config in request")
752
- request.llm_config = self.get_llm_config_from_handle(
727
+ request.llm_config = self.get_cached_llm_config(
753
728
  handle=request.model,
754
729
  context_window_limit=request.context_window_limit,
755
730
  max_tokens=request.max_tokens,
@@ -760,17 +735,21 @@ class SyncServer(Server):
760
735
  if request.embedding_config is None:
761
736
  if request.embedding is None:
762
737
  raise ValueError("Must specify either embedding or embedding_config in request")
763
- request.embedding_config = self.get_embedding_config_from_handle(
764
- handle=request.embedding, embedding_chunk_size=request.embedding_chunk_size or constants.DEFAULT_EMBEDDING_CHUNK_SIZE
738
+ request.embedding_config = self.get_cached_embedding_config(
739
+ handle=request.embedding,
740
+ embedding_chunk_size=request.embedding_chunk_size or constants.DEFAULT_EMBEDDING_CHUNK_SIZE,
765
741
  )
766
742
 
767
- """Create a new agent using a config"""
768
- # Invoke manager
769
- return self.agent_manager.create_agent(
743
+ main_agent = self.agent_manager.create_agent(
770
744
  agent_create=request,
771
745
  actor=actor,
772
746
  )
773
747
 
748
+ if request.enable_sleeptime:
749
+ main_agent = self.create_sleeptime_agent(main_agent=main_agent, actor=actor)
750
+
751
+ return main_agent
752
+
774
753
  def update_agent(
775
754
  self,
776
755
  agent_id: str,
@@ -783,13 +762,54 @@ class SyncServer(Server):
783
762
  if request.embedding is not None:
784
763
  request.embedding_config = self.get_embedding_config_from_handle(handle=request.embedding)
785
764
 
786
- # Invoke manager
765
+ if request.enable_sleeptime:
766
+ agent = self.agent_manager.get_agent_by_id(agent_id=agent_id, actor=actor)
767
+ if agent.multi_agent_group is None:
768
+ self.create_sleeptime_agent(main_agent=agent, actor=actor)
769
+
787
770
  return self.agent_manager.update_agent(
788
771
  agent_id=agent_id,
789
772
  agent_update=request,
790
773
  actor=actor,
791
774
  )
792
775
 
776
+ def create_sleeptime_agent(self, main_agent: AgentState, actor: User) -> AgentState:
777
+ request = CreateAgent(
778
+ name=main_agent.name,
779
+ agent_type=AgentType.sleeptime_agent,
780
+ block_ids=[block.id for block in main_agent.memory.blocks],
781
+ memory_blocks=[
782
+ CreateBlock(
783
+ label="memory_persona",
784
+ value=(
785
+ "I am an expert conversation memory manager. "
786
+ "I manage the memory blocks such that they "
787
+ "contain everything that is important about "
788
+ "the conversation."
789
+ ),
790
+ ),
791
+ ],
792
+ llm_config=main_agent.llm_config,
793
+ embedding_config=main_agent.embedding_config,
794
+ project_id=main_agent.project_id,
795
+ )
796
+ sleeptime_agent = self.agent_manager.create_agent(
797
+ agent_create=request,
798
+ actor=actor,
799
+ )
800
+ self.group_manager.create_group(
801
+ group=GroupCreate(
802
+ description="",
803
+ agent_ids=[sleeptime_agent.id],
804
+ manager_config=SleeptimeManager(
805
+ manager_agent_id=main_agent.id,
806
+ sleeptime_agent_frequency=5,
807
+ ),
808
+ ),
809
+ actor=actor,
810
+ )
811
+ return self.agent_manager.get_agent_by_id(agent_id=main_agent.id, actor=actor)
812
+
793
813
  # convert name->id
794
814
 
795
815
  # TODO: These can be moved to agent_manager
@@ -892,6 +912,7 @@ class SyncServer(Server):
892
912
  use_assistant_message=use_assistant_message,
893
913
  assistant_message_tool_name=assistant_message_tool_name,
894
914
  assistant_message_tool_kwarg=assistant_message_tool_kwarg,
915
+ reverse=reverse,
895
916
  )
896
917
 
897
918
  if reverse:
@@ -1102,6 +1123,8 @@ class SyncServer(Server):
1102
1123
  raise ValueError(f"LLM model {model_name} is not supported by {provider_name}")
1103
1124
  except ValueError as e:
1104
1125
  llm_configs = [config for config in self.get_local_llm_configs() if config.handle == handle]
1126
+ if not llm_configs:
1127
+ llm_configs = [config for config in self.get_local_llm_configs() if config.model == model_name]
1105
1128
  if not llm_configs:
1106
1129
  raise e
1107
1130
 
@@ -1133,20 +1156,25 @@ class SyncServer(Server):
1133
1156
  def get_embedding_config_from_handle(
1134
1157
  self, handle: str, embedding_chunk_size: int = constants.DEFAULT_EMBEDDING_CHUNK_SIZE
1135
1158
  ) -> EmbeddingConfig:
1136
- provider_name, model_name = handle.split("/", 1)
1137
- provider = self.get_provider_from_name(provider_name)
1159
+ try:
1160
+ provider_name, model_name = handle.split("/", 1)
1161
+ provider = self.get_provider_from_name(provider_name)
1162
+
1163
+ embedding_configs = [config for config in provider.list_embedding_models() if config.handle == handle]
1164
+ if not embedding_configs:
1165
+ raise ValueError(f"Embedding model {model_name} is not supported by {provider_name}")
1166
+ except ValueError as e:
1167
+ # search local configs
1168
+ embedding_configs = [config for config in self.get_local_embedding_configs() if config.handle == handle]
1169
+ if not embedding_configs:
1170
+ raise e
1138
1171
 
1139
- embedding_configs = [config for config in provider.list_embedding_models() if config.handle == handle]
1140
1172
  if len(embedding_configs) == 1:
1141
1173
  embedding_config = embedding_configs[0]
1174
+ elif len(embedding_configs) > 1:
1175
+ raise ValueError(f"Multiple embedding models with name {model_name} supported by {provider_name}")
1142
1176
  else:
1143
- embedding_configs = [config for config in provider.list_embedding_models() if config.embedding_model == model_name]
1144
- if not embedding_configs:
1145
- raise ValueError(f"Embedding model {model_name} is not supported by {provider_name}")
1146
- elif len(embedding_configs) > 1:
1147
- raise ValueError(f"Multiple embedding models with name {model_name} supported by {provider_name}")
1148
- else:
1149
- embedding_config = embedding_configs[0]
1177
+ embedding_config = embedding_configs[0]
1150
1178
 
1151
1179
  if embedding_chunk_size:
1152
1180
  embedding_config.embedding_chunk_size = embedding_chunk_size
@@ -1183,6 +1211,25 @@ class SyncServer(Server):
1183
1211
  warnings.warn(f"Error reading LLM configs directory: {e}")
1184
1212
  return llm_models
1185
1213
 
1214
+ def get_local_embedding_configs(self):
1215
+ embedding_models = []
1216
+ try:
1217
+ embedding_configs_dir = os.path.expanduser("~/.letta/embedding_configs")
1218
+ if os.path.exists(embedding_configs_dir):
1219
+ for filename in os.listdir(embedding_configs_dir):
1220
+ if filename.endswith(".json"):
1221
+ filepath = os.path.join(embedding_configs_dir, filename)
1222
+ try:
1223
+ with open(filepath, "r") as f:
1224
+ config_data = json.load(f)
1225
+ embedding_config = EmbeddingConfig(**config_data)
1226
+ embedding_models.append(embedding_config)
1227
+ except (json.JSONDecodeError, ValueError) as e:
1228
+ warnings.warn(f"Error parsing embedding config file {filename}: {e}")
1229
+ except Exception as e:
1230
+ warnings.warn(f"Error reading embedding configs directory: {e}")
1231
+ return embedding_models
1232
+
1186
1233
  def add_llm_model(self, request: LLMConfig) -> LLMConfig:
1187
1234
  """Add a new LLM model"""
1188
1235
 
@@ -1599,7 +1646,9 @@ class SyncServer(Server):
1599
1646
  raise ValueError("stream_steps must be 'true' if stream_tokens is 'true'")
1600
1647
 
1601
1648
  group = self.group_manager.retrieve_group(group_id=group_id, actor=actor)
1602
- letta_multi_agent = self.load_multi_agent(group=group, actor=actor)
1649
+ agent_state_id = group.manager_agent_id or (group.agent_ids[0] if len(group.agent_ids) > 0 else None)
1650
+ agent_state = self.agent_manager.get_agent_by_id(agent_id=agent_state_id, actor=actor) if agent_state_id else None
1651
+ letta_multi_agent = load_multi_agent(group=group, agent_state=agent_state, actor=actor)
1603
1652
 
1604
1653
  llm_config = letta_multi_agent.agent_state.llm_config
1605
1654
  supports_token_streaming = ["openai", "anthropic", "deepseek"]
@@ -4,13 +4,22 @@ from typing import Dict, List, Optional
4
4
  import numpy as np
5
5
  from sqlalchemy import Select, and_, func, literal, or_, select, union_all
6
6
 
7
- from letta.constants import BASE_MEMORY_TOOLS, BASE_TOOLS, DATA_SOURCE_ATTACH_ALERT, MAX_EMBEDDING_DIM, MULTI_AGENT_TOOLS
7
+ from letta.constants import (
8
+ BASE_MEMORY_TOOLS,
9
+ BASE_SLEEPTIME_CHAT_TOOLS,
10
+ BASE_SLEEPTIME_TOOLS,
11
+ BASE_TOOLS,
12
+ DATA_SOURCE_ATTACH_ALERT,
13
+ MAX_EMBEDDING_DIM,
14
+ MULTI_AGENT_TOOLS,
15
+ )
8
16
  from letta.embeddings import embedding_model
9
17
  from letta.helpers.datetime_helpers import get_utc_time
10
18
  from letta.log import get_logger
11
19
  from letta.orm import Agent as AgentModel
12
20
  from letta.orm import AgentPassage, AgentsTags
13
21
  from letta.orm import Block as BlockModel
22
+ from letta.orm import Group as GroupModel
14
23
  from letta.orm import Identity as IdentityModel
15
24
  from letta.orm import Source as SourceModel
16
25
  from letta.orm import SourcePassage, SourcesAgents
@@ -25,6 +34,7 @@ from letta.schemas.agent import AgentType, CreateAgent, UpdateAgent
25
34
  from letta.schemas.block import Block as PydanticBlock
26
35
  from letta.schemas.block import BlockUpdate
27
36
  from letta.schemas.embedding_config import EmbeddingConfig
37
+ from letta.schemas.group import ManagerType
28
38
  from letta.schemas.llm_config import LLMConfig
29
39
  from letta.schemas.memory import Memory
30
40
  from letta.schemas.message import Message as PydanticMessage
@@ -32,6 +42,7 @@ from letta.schemas.message import MessageCreate
32
42
  from letta.schemas.passage import Passage as PydanticPassage
33
43
  from letta.schemas.source import Source as PydanticSource
34
44
  from letta.schemas.tool import Tool as PydanticTool
45
+ from letta.schemas.tool_rule import ChildToolRule as PydanticChildToolRule
35
46
  from letta.schemas.tool_rule import ContinueToolRule as PydanticContinueToolRule
36
47
  from letta.schemas.tool_rule import TerminalToolRule as PydanticTerminalToolRule
37
48
  from letta.schemas.tool_rule import ToolRule as PydanticToolRule
@@ -88,7 +99,11 @@ class AgentManager:
88
99
  agent_create: CreateAgent,
89
100
  actor: PydanticUser,
90
101
  ) -> PydanticAgentState:
91
- system = derive_system_message(agent_type=agent_create.agent_type, system=agent_create.system)
102
+ system = derive_system_message(
103
+ agent_type=agent_create.agent_type,
104
+ enable_sleeptime=agent_create.enable_sleeptime,
105
+ system=agent_create.system,
106
+ )
92
107
 
93
108
  if not agent_create.llm_config or not agent_create.embedding_config:
94
109
  raise ValueError("llm_config and embedding_config are required")
@@ -104,7 +119,13 @@ class AgentManager:
104
119
  # create passed in `tools`
105
120
  tool_names = []
106
121
  if agent_create.include_base_tools:
107
- tool_names.extend(BASE_TOOLS + BASE_MEMORY_TOOLS)
122
+ if agent_create.agent_type == AgentType.sleeptime_agent:
123
+ tool_names.extend(BASE_SLEEPTIME_TOOLS)
124
+ else:
125
+ if agent_create.enable_sleeptime:
126
+ tool_names.extend(BASE_SLEEPTIME_CHAT_TOOLS)
127
+ else:
128
+ tool_names.extend(BASE_TOOLS + BASE_MEMORY_TOOLS)
108
129
  if agent_create.include_multi_agent_tools:
109
130
  tool_names.extend(MULTI_AGENT_TOOLS)
110
131
  if agent_create.tools:
@@ -121,10 +142,14 @@ class AgentManager:
121
142
 
122
143
  # apply default tool rules
123
144
  for tool_name in tool_names:
124
- if tool_name == "send_message" or tool_name == "send_message_to_agent_async":
145
+ if tool_name == "send_message" or tool_name == "send_message_to_agent_async" or tool_name == "finish_rethinking_memory":
125
146
  tool_rules.append(PydanticTerminalToolRule(tool_name=tool_name))
126
147
  elif tool_name in BASE_TOOLS:
127
148
  tool_rules.append(PydanticContinueToolRule(tool_name=tool_name))
149
+
150
+ if agent_create.agent_type == AgentType.sleeptime_agent:
151
+ tool_rules.append(PydanticChildToolRule(tool_name="view_core_memory_with_line_numbers", children=["core_memory_insert"]))
152
+
128
153
  else:
129
154
  tool_rules = agent_create.tool_rules
130
155
  # Check tool rules are valid
@@ -159,6 +184,7 @@ class AgentManager:
159
184
  template_id=agent_create.template_id,
160
185
  base_template_id=agent_create.base_template_id,
161
186
  message_buffer_autoclear=agent_create.message_buffer_autoclear,
187
+ enable_sleeptime=agent_create.enable_sleeptime,
162
188
  )
163
189
 
164
190
  # If there are provided environment variables, add them in
@@ -223,6 +249,7 @@ class AgentManager:
223
249
  template_id: Optional[str] = None,
224
250
  base_template_id: Optional[str] = None,
225
251
  message_buffer_autoclear: bool = False,
252
+ enable_sleeptime: Optional[bool] = None,
226
253
  ) -> PydanticAgentState:
227
254
  """Create a new agent."""
228
255
  with self.session_maker() as session:
@@ -241,6 +268,7 @@ class AgentManager:
241
268
  "template_id": template_id,
242
269
  "base_template_id": base_template_id,
243
270
  "message_buffer_autoclear": message_buffer_autoclear,
271
+ "enable_sleeptime": enable_sleeptime,
244
272
  }
245
273
 
246
274
  # Create the new agent using SqlalchemyBase.create
@@ -269,6 +297,12 @@ class AgentManager:
269
297
  )
270
298
 
271
299
  # Rebuild the system prompt if it's different
300
+ if agent_update.enable_sleeptime and agent_update.system is None:
301
+ agent_update.system = derive_system_message(
302
+ agent_type=agent_state.agent_type,
303
+ enable_sleeptime=agent_update.enable_sleeptime,
304
+ system=agent_update.system,
305
+ )
272
306
  if agent_update.system and agent_update.system != agent_state.system:
273
307
  agent_state = self.rebuild_system_prompt(agent_id=agent_state.id, actor=actor, force=True, update_timestamp=False)
274
308
 
@@ -305,6 +339,7 @@ class AgentManager:
305
339
  "template_id",
306
340
  "base_template_id",
307
341
  "message_buffer_autoclear",
342
+ "enable_sleeptime",
308
343
  }
309
344
  for field in scalar_fields:
310
345
  value = getattr(agent_update, field, None)
@@ -461,9 +496,33 @@ class AgentManager:
461
496
  """
462
497
  with self.session_maker() as session:
463
498
  # Retrieve the agent
499
+ logger.debug(f"Hard deleting Agent with ID: {agent_id} with actor={actor}")
464
500
  agent = AgentModel.read(db_session=session, identifier=agent_id, actor=actor)
465
- # TODO check if it is managing a group
466
- agent.hard_delete(session)
501
+ agents_to_delete = [agent]
502
+ sleeptime_group_to_delete = None
503
+
504
+ # Delete sleeptime agent and group
505
+ if agent.multi_agent_group:
506
+ participant_agent_ids = agent.multi_agent_group.agent_ids
507
+ if agent.multi_agent_group.manager_type == ManagerType.sleeptime and len(participant_agent_ids) == 1:
508
+ sleeptime_agent = AgentModel.read(db_session=session, identifier=participant_agent_ids[0], actor=actor)
509
+ if sleeptime_agent.agent_type == AgentType.sleeptime_agent:
510
+ sleeptime_agent_group = GroupModel.read(db_session=session, identifier=agent.multi_agent_group.id, actor=actor)
511
+ sleeptime_group_to_delete = sleeptime_agent_group
512
+ agents_to_delete.append(sleeptime_agent)
513
+ try:
514
+ if sleeptime_group_to_delete is not None:
515
+ session.delete(sleeptime_group_to_delete)
516
+ session.commit()
517
+ for agent in agents_to_delete:
518
+ session.delete(agent)
519
+ session.commit()
520
+ except Exception as e:
521
+ session.rollback()
522
+ logger.exception(f"Failed to hard delete Agent with ID {agent_id}")
523
+ raise ValueError(f"Failed to hard delete Agent with ID {agent_id}: {e}")
524
+ else:
525
+ logger.debug(f"Agent with ID {agent_id} successfully hard deleted")
467
526
 
468
527
  @enforce_types
469
528
  def serialize(self, agent_id: str, actor: PydanticUser) -> AgentSchema:
@@ -482,6 +541,7 @@ class AgentManager:
482
541
  append_copy_suffix: bool = True,
483
542
  override_existing_tools: bool = True,
484
543
  project_id: Optional[str] = None,
544
+ strip_messages: Optional[bool] = False,
485
545
  ) -> PydanticAgentState:
486
546
  serialized_agent = serialized_agent.model_dump()
487
547
  tool_data_list = serialized_agent.pop("tools", [])
@@ -493,6 +553,10 @@ class AgentManager:
493
553
  agent.name += "_copy"
494
554
  if project_id:
495
555
  agent.project_id = project_id
556
+
557
+ if strip_messages:
558
+ # we want to strip all but the first (system) message
559
+ agent.message_ids = [agent.message_ids[0]]
496
560
  agent = agent.create(session, actor=actor)
497
561
  pydantic_agent = agent.to_pydantic()
498
562