letta-nightly 0.7.30.dev20250603104343__py3-none-any.whl → 0.8.0.dev20250604104349__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (136) hide show
  1. letta/__init__.py +7 -1
  2. letta/agent.py +14 -7
  3. letta/agents/base_agent.py +1 -0
  4. letta/agents/ephemeral_summary_agent.py +104 -0
  5. letta/agents/helpers.py +35 -3
  6. letta/agents/letta_agent.py +492 -176
  7. letta/agents/letta_agent_batch.py +22 -16
  8. letta/agents/prompts/summary_system_prompt.txt +62 -0
  9. letta/agents/voice_agent.py +22 -7
  10. letta/agents/voice_sleeptime_agent.py +13 -8
  11. letta/constants.py +33 -1
  12. letta/data_sources/connectors.py +52 -36
  13. letta/errors.py +4 -0
  14. letta/functions/ast_parsers.py +13 -30
  15. letta/functions/function_sets/base.py +3 -1
  16. letta/functions/functions.py +2 -0
  17. letta/functions/mcp_client/base_client.py +151 -97
  18. letta/functions/mcp_client/sse_client.py +49 -31
  19. letta/functions/mcp_client/stdio_client.py +107 -106
  20. letta/functions/schema_generator.py +22 -22
  21. letta/groups/helpers.py +3 -4
  22. letta/groups/sleeptime_multi_agent.py +4 -4
  23. letta/groups/sleeptime_multi_agent_v2.py +22 -0
  24. letta/helpers/composio_helpers.py +16 -0
  25. letta/helpers/converters.py +20 -0
  26. letta/helpers/datetime_helpers.py +1 -6
  27. letta/helpers/tool_rule_solver.py +2 -1
  28. letta/interfaces/anthropic_streaming_interface.py +17 -2
  29. letta/interfaces/openai_chat_completions_streaming_interface.py +1 -0
  30. letta/interfaces/openai_streaming_interface.py +18 -2
  31. letta/llm_api/anthropic_client.py +24 -3
  32. letta/llm_api/google_ai_client.py +0 -15
  33. letta/llm_api/google_vertex_client.py +6 -5
  34. letta/llm_api/llm_client_base.py +15 -0
  35. letta/llm_api/openai.py +2 -2
  36. letta/llm_api/openai_client.py +60 -8
  37. letta/orm/__init__.py +2 -0
  38. letta/orm/agent.py +45 -43
  39. letta/orm/base.py +0 -2
  40. letta/orm/block.py +1 -0
  41. letta/orm/custom_columns.py +13 -0
  42. letta/orm/enums.py +5 -0
  43. letta/orm/file.py +3 -1
  44. letta/orm/files_agents.py +68 -0
  45. letta/orm/mcp_server.py +48 -0
  46. letta/orm/message.py +1 -0
  47. letta/orm/organization.py +11 -2
  48. letta/orm/passage.py +25 -10
  49. letta/orm/sandbox_config.py +5 -2
  50. letta/orm/sqlalchemy_base.py +171 -110
  51. letta/prompts/system/memgpt_base.txt +6 -1
  52. letta/prompts/system/memgpt_v2_chat.txt +57 -0
  53. letta/prompts/system/sleeptime.txt +2 -0
  54. letta/prompts/system/sleeptime_v2.txt +28 -0
  55. letta/schemas/agent.py +87 -20
  56. letta/schemas/block.py +7 -1
  57. letta/schemas/file.py +57 -0
  58. letta/schemas/mcp.py +74 -0
  59. letta/schemas/memory.py +5 -2
  60. letta/schemas/message.py +9 -0
  61. letta/schemas/openai/openai.py +0 -6
  62. letta/schemas/providers.py +33 -4
  63. letta/schemas/tool.py +26 -21
  64. letta/schemas/tool_execution_result.py +5 -0
  65. letta/server/db.py +23 -8
  66. letta/server/rest_api/app.py +73 -56
  67. letta/server/rest_api/interface.py +4 -4
  68. letta/server/rest_api/routers/v1/agents.py +132 -47
  69. letta/server/rest_api/routers/v1/blocks.py +3 -2
  70. letta/server/rest_api/routers/v1/embeddings.py +3 -3
  71. letta/server/rest_api/routers/v1/groups.py +3 -3
  72. letta/server/rest_api/routers/v1/jobs.py +14 -17
  73. letta/server/rest_api/routers/v1/organizations.py +10 -10
  74. letta/server/rest_api/routers/v1/providers.py +12 -10
  75. letta/server/rest_api/routers/v1/runs.py +3 -3
  76. letta/server/rest_api/routers/v1/sandbox_configs.py +12 -12
  77. letta/server/rest_api/routers/v1/sources.py +108 -43
  78. letta/server/rest_api/routers/v1/steps.py +8 -6
  79. letta/server/rest_api/routers/v1/tools.py +134 -95
  80. letta/server/rest_api/utils.py +12 -1
  81. letta/server/server.py +272 -73
  82. letta/services/agent_manager.py +246 -313
  83. letta/services/block_manager.py +30 -9
  84. letta/services/context_window_calculator/__init__.py +0 -0
  85. letta/services/context_window_calculator/context_window_calculator.py +150 -0
  86. letta/services/context_window_calculator/token_counter.py +82 -0
  87. letta/services/file_processor/__init__.py +0 -0
  88. letta/services/file_processor/chunker/__init__.py +0 -0
  89. letta/services/file_processor/chunker/llama_index_chunker.py +29 -0
  90. letta/services/file_processor/embedder/__init__.py +0 -0
  91. letta/services/file_processor/embedder/openai_embedder.py +84 -0
  92. letta/services/file_processor/file_processor.py +123 -0
  93. letta/services/file_processor/parser/__init__.py +0 -0
  94. letta/services/file_processor/parser/base_parser.py +9 -0
  95. letta/services/file_processor/parser/mistral_parser.py +54 -0
  96. letta/services/file_processor/types.py +0 -0
  97. letta/services/files_agents_manager.py +184 -0
  98. letta/services/group_manager.py +118 -0
  99. letta/services/helpers/agent_manager_helper.py +76 -21
  100. letta/services/helpers/tool_execution_helper.py +3 -0
  101. letta/services/helpers/tool_parser_helper.py +100 -0
  102. letta/services/identity_manager.py +44 -42
  103. letta/services/job_manager.py +21 -10
  104. letta/services/mcp/base_client.py +5 -2
  105. letta/services/mcp/sse_client.py +3 -5
  106. letta/services/mcp/stdio_client.py +3 -5
  107. letta/services/mcp_manager.py +281 -0
  108. letta/services/message_manager.py +40 -26
  109. letta/services/organization_manager.py +55 -19
  110. letta/services/passage_manager.py +211 -13
  111. letta/services/provider_manager.py +48 -2
  112. letta/services/sandbox_config_manager.py +105 -0
  113. letta/services/source_manager.py +4 -5
  114. letta/services/step_manager.py +9 -6
  115. letta/services/summarizer/summarizer.py +50 -23
  116. letta/services/telemetry_manager.py +7 -0
  117. letta/services/tool_executor/tool_execution_manager.py +11 -52
  118. letta/services/tool_executor/tool_execution_sandbox.py +4 -34
  119. letta/services/tool_executor/tool_executor.py +107 -105
  120. letta/services/tool_manager.py +56 -17
  121. letta/services/tool_sandbox/base.py +39 -92
  122. letta/services/tool_sandbox/e2b_sandbox.py +16 -11
  123. letta/services/tool_sandbox/local_sandbox.py +51 -23
  124. letta/services/user_manager.py +36 -3
  125. letta/settings.py +10 -3
  126. letta/templates/__init__.py +0 -0
  127. letta/templates/sandbox_code_file.py.j2 +47 -0
  128. letta/templates/template_helper.py +16 -0
  129. letta/tracing.py +30 -1
  130. letta/types/__init__.py +7 -0
  131. letta/utils.py +25 -1
  132. {letta_nightly-0.7.30.dev20250603104343.dist-info → letta_nightly-0.8.0.dev20250604104349.dist-info}/METADATA +7 -2
  133. {letta_nightly-0.7.30.dev20250603104343.dist-info → letta_nightly-0.8.0.dev20250604104349.dist-info}/RECORD +136 -110
  134. {letta_nightly-0.7.30.dev20250603104343.dist-info → letta_nightly-0.8.0.dev20250604104349.dist-info}/LICENSE +0 -0
  135. {letta_nightly-0.7.30.dev20250603104343.dist-info → letta_nightly-0.8.0.dev20250604104349.dist-info}/WHEEL +0 -0
  136. {letta_nightly-0.7.30.dev20250603104343.dist-info → letta_nightly-0.8.0.dev20250604104349.dist-info}/entry_points.txt +0 -0
@@ -6,9 +6,14 @@ from letta.schemas.agent import AgentState
6
6
 
7
7
 
8
8
  class ToolExecutionResult(BaseModel):
9
+
9
10
  status: Literal["success", "error"] = Field(..., description="The status of the tool execution and return object")
10
11
  func_return: Optional[Any] = Field(None, description="The function return object")
11
12
  agent_state: Optional[AgentState] = Field(None, description="The agent state")
12
13
  stdout: Optional[List[str]] = Field(None, description="Captured stdout (prints, logs) from function invocation")
13
14
  stderr: Optional[List[str]] = Field(None, description="Captured stderr from the function invocation")
14
15
  sandbox_config_fingerprint: Optional[str] = Field(None, description="The fingerprint of the config for the sandbox")
16
+
17
+ @property
18
+ def success_flag(self) -> bool:
19
+ return self.status == "success"
letta/server/db.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import os
2
2
  import threading
3
+ import uuid
3
4
  from contextlib import asynccontextmanager, contextmanager
4
5
  from typing import Any, AsyncGenerator, Generator
5
6
 
@@ -118,13 +119,11 @@ class DatabaseRegistry:
118
119
  else:
119
120
  async_pg_uri = f"postgresql+asyncpg://{pg_uri.split('://', 1)[1]}" if "://" in pg_uri else pg_uri
120
121
  async_pg_uri = async_pg_uri.replace("sslmode=", "ssl=")
121
-
122
122
  async_engine = create_async_engine(async_pg_uri, **self._build_sqlalchemy_engine_args(is_async=True))
123
- self._initialized["async"] = True
124
123
  else:
125
- self.logger.warning("Async SQLite is currently not supported. Please use PostgreSQL for async database operations.")
126
- # TODO (cliandy): unclear around async sqlite support in sqlalchemy, we will not currently support this
127
- self._initialized["async"] = True
124
+ # create sqlite async engine
125
+ self._initialized["async"] = False
126
+ # TODO: remove self.config
128
127
  engine_path = "sqlite+aiosqlite:///" + os.path.join(self.config.recall_storage_path, "sqlite.db")
129
128
  self.logger.info("Creating sqlite engine " + engine_path)
130
129
  async_engine = create_async_engine(engine_path, **self._build_sqlalchemy_engine_args(is_async=True))
@@ -132,7 +131,7 @@ class DatabaseRegistry:
132
131
  # Create async session factory
133
132
  self._async_engines["default"] = async_engine
134
133
  self._async_session_factories["default"] = async_sessionmaker(
135
- autocommit=False, autoflush=False, bind=self._async_engines["default"], class_=AsyncSession
134
+ close_resets_only=False, autocommit=False, autoflush=False, bind=self._async_engines["default"], class_=AsyncSession
136
135
  )
137
136
  self._initialized["async"] = True
138
137
 
@@ -165,8 +164,24 @@ class DatabaseRegistry:
165
164
  }
166
165
  )
167
166
  if not is_async:
168
- base_args["pool_use_lifo"] = settings.pool_use_lifo
169
-
167
+ base_args.update(
168
+ {
169
+ "pool_use_lifo": settings.pool_use_lifo,
170
+ }
171
+ )
172
+
173
+ elif is_async:
174
+ # For asyncpg, statement_cache_size should be in connect_args
175
+ base_args.update(
176
+ {
177
+ "connect_args": {
178
+ "timeout": settings.pg_pool_timeout,
179
+ "prepared_statement_name_func": lambda: f"__asyncpg_{uuid.uuid4()}__",
180
+ "statement_cache_size": 0,
181
+ "prepared_statement_cache_size": 0,
182
+ },
183
+ }
184
+ )
170
185
  return base_args
171
186
 
172
187
  def _wrap_sqlite_engine(self, engine: Engine) -> None:
@@ -1,5 +1,4 @@
1
- import asyncio
2
- import concurrent.futures
1
+ import importlib.util
3
2
  import json
4
3
  import logging
5
4
  import os
@@ -17,7 +16,6 @@ from letta.__init__ import __version__
17
16
  from letta.agents.exceptions import IncompatibleAgentType
18
17
  from letta.constants import ADMIN_PREFIX, API_PREFIX, OPENAI_API_PREFIX
19
18
  from letta.errors import BedrockPermissionError, LettaAgentNotFoundError, LettaUserNotFoundError
20
- from letta.jobs.scheduler import shutdown_scheduler_and_release_lock, start_scheduler_with_leader_election
21
19
  from letta.log import get_logger
22
20
  from letta.orm.errors import DatabaseTimeoutError, ForeignKeyConstraintViolationError, NoResultFound, UniqueConstraintViolationError
23
21
  from letta.schemas.letta_message import create_letta_message_union_schema
@@ -100,7 +98,7 @@ class CheckPasswordMiddleware(BaseHTTPMiddleware):
100
98
  async def dispatch(self, request, call_next):
101
99
 
102
100
  # Exclude health check endpoint from password protection
103
- if request.url.path == "/v1/health/" or request.url.path == "/latest/health/":
101
+ if request.url.path in {"/v1/health", "/v1/health/", "/latest/health/"}:
104
102
  return await call_next(request)
105
103
 
106
104
  if (
@@ -142,34 +140,6 @@ def create_application() -> "FastAPI":
142
140
  debug=debug_mode, # if True, the stack trace will be printed in the response
143
141
  )
144
142
 
145
- @app.on_event("startup")
146
- async def configure_executor():
147
- print(f"INFO: Configured event loop executor with {settings.event_loop_threadpool_max_workers} workers.")
148
- loop = asyncio.get_running_loop()
149
- executor = concurrent.futures.ThreadPoolExecutor(max_workers=settings.event_loop_threadpool_max_workers)
150
- loop.set_default_executor(executor)
151
-
152
- @app.on_event("startup")
153
- async def on_startup():
154
- global server
155
-
156
- await start_scheduler_with_leader_election(server)
157
-
158
- @app.on_event("shutdown")
159
- def shutdown_mcp_clients():
160
- global server
161
- import threading
162
-
163
- def cleanup_clients():
164
- if hasattr(server, "mcp_clients"):
165
- for client in server.mcp_clients.values():
166
- client.cleanup()
167
- server.mcp_clients.clear()
168
-
169
- t = threading.Thread(target=cleanup_clients)
170
- t.start()
171
- t.join()
172
-
173
143
  @app.exception_handler(IncompatibleAgentType)
174
144
  async def handle_incompatible_agent_type(request: Request, exc: IncompatibleAgentType):
175
145
  return JSONResponse(
@@ -320,12 +290,6 @@ def create_application() -> "FastAPI":
320
290
  # Generate OpenAPI schema after all routes are mounted
321
291
  generate_openapi_schema(app)
322
292
 
323
- @app.on_event("shutdown")
324
- async def on_shutdown():
325
- global server
326
- # server = None
327
- await shutdown_scheduler_and_release_lock()
328
-
329
293
  return app
330
294
 
331
295
 
@@ -352,19 +316,53 @@ def start_server(
352
316
  # Add the handler to the logger
353
317
  server_logger.addHandler(stream_handler)
354
318
 
319
+ # Experimental UV Loop Support
320
+ try:
321
+ if importlib.util.find_spec("uvloop") is not None and settings.use_uvloop:
322
+ print("Running server on uvloop...")
323
+ import asyncio
324
+
325
+ import uvloop
326
+
327
+ asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
328
+ except:
329
+ pass
330
+
355
331
  if (os.getenv("LOCAL_HTTPS") == "true") or "--localhttps" in sys.argv:
356
332
  print(f"▶ Server running at: https://{host or 'localhost'}:{port or REST_DEFAULT_PORT}")
357
333
  print(f"▶ View using ADE at: https://app.letta.com/development-servers/local/dashboard\n")
358
- uvicorn.run(
359
- "letta.server.rest_api.app:app",
360
- host=host or "localhost",
361
- port=port or REST_DEFAULT_PORT,
362
- workers=settings.uvicorn_workers,
363
- reload=reload or settings.uvicorn_reload,
364
- timeout_keep_alive=settings.uvicorn_timeout_keep_alive,
365
- ssl_keyfile="certs/localhost-key.pem",
366
- ssl_certfile="certs/localhost.pem",
367
- )
334
+ if importlib.util.find_spec("granian") is not None and settings.use_uvloop:
335
+ from granian import Granian
336
+
337
+ # Experimental Granian engine
338
+ Granian(
339
+ target="letta.server.rest_api.app:app",
340
+ # factory=True,
341
+ interface="asgi",
342
+ address=host or "localhost",
343
+ port=port or REST_DEFAULT_PORT,
344
+ workers=settings.uvicorn_workers,
345
+ # threads=
346
+ reload=reload or settings.uvicorn_reload,
347
+ reload_ignore_patterns=["openapi_letta.json"],
348
+ reload_ignore_worker_failure=True,
349
+ reload_tick=100,
350
+ # log_level="info"
351
+ ssl_keyfile="certs/localhost-key.pem",
352
+ ssl_cert="certs/localhost.pem",
353
+ ).serve()
354
+ else:
355
+ uvicorn.run(
356
+ "letta.server.rest_api.app:app",
357
+ host=host or "localhost",
358
+ port=port or REST_DEFAULT_PORT,
359
+ workers=settings.uvicorn_workers,
360
+ reload=reload or settings.uvicorn_reload,
361
+ timeout_keep_alive=settings.uvicorn_timeout_keep_alive,
362
+ ssl_keyfile="certs/localhost-key.pem",
363
+ ssl_certfile="certs/localhost.pem",
364
+ )
365
+
368
366
  else:
369
367
  if is_windows:
370
368
  # Windows doesn't those the fancy unicode characters
@@ -374,11 +372,30 @@ def start_server(
374
372
  print(f"▶ Server running at: http://{host or 'localhost'}:{port or REST_DEFAULT_PORT}")
375
373
  print(f"▶ View using ADE at: https://app.letta.com/development-servers/local/dashboard\n")
376
374
 
377
- uvicorn.run(
378
- "letta.server.rest_api.app:app",
379
- host=host or "localhost",
380
- port=port or REST_DEFAULT_PORT,
381
- workers=settings.uvicorn_workers,
382
- reload=reload or settings.uvicorn_reload,
383
- timeout_keep_alive=settings.uvicorn_timeout_keep_alive,
384
- )
375
+ if importlib.util.find_spec("granian") is not None and settings.use_granian:
376
+ # Experimental Granian engine
377
+ from granian import Granian
378
+
379
+ Granian(
380
+ target="letta.server.rest_api.app:app",
381
+ # factory=True,
382
+ interface="asgi",
383
+ address=host or "localhost",
384
+ port=port or REST_DEFAULT_PORT,
385
+ workers=settings.uvicorn_workers,
386
+ # threads=
387
+ reload=reload or settings.uvicorn_reload,
388
+ reload_ignore_patterns=["openapi_letta.json"],
389
+ reload_ignore_worker_failure=True,
390
+ reload_tick=100,
391
+ # log_level="info"
392
+ ).serve()
393
+ else:
394
+ uvicorn.run(
395
+ "letta.server.rest_api.app:app",
396
+ host=host or "localhost",
397
+ port=port or REST_DEFAULT_PORT,
398
+ workers=settings.uvicorn_workers,
399
+ reload=reload or settings.uvicorn_reload,
400
+ timeout_keep_alive=settings.uvicorn_timeout_keep_alive,
401
+ )
@@ -1338,8 +1338,8 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
1338
1338
  tool_return=msg,
1339
1339
  status=msg_obj.tool_returns[0].status if msg_obj.tool_returns else "success",
1340
1340
  tool_call_id=msg_obj.tool_call_id,
1341
- stdout=msg_obj.tool_returns[0].stdout if msg_obj.tool_returns else None,
1342
- stderr=msg_obj.tool_returns[0].stderr if msg_obj.tool_returns else None,
1341
+ stdout=msg_obj.tool_returns[0].stdout if msg_obj.tool_returns else [],
1342
+ stderr=msg_obj.tool_returns[0].stderr if msg_obj.tool_returns else [],
1343
1343
  name=msg_obj.name,
1344
1344
  otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
1345
1345
  )
@@ -1354,8 +1354,8 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
1354
1354
  tool_return=msg,
1355
1355
  status=msg_obj.tool_returns[0].status if msg_obj.tool_returns else "error",
1356
1356
  tool_call_id=msg_obj.tool_call_id,
1357
- stdout=msg_obj.tool_returns[0].stdout if msg_obj.tool_returns else None,
1358
- stderr=msg_obj.tool_returns[0].stderr if msg_obj.tool_returns else None,
1357
+ stdout=msg_obj.tool_returns[0].stdout if msg_obj.tool_returns else [],
1358
+ stderr=msg_obj.tool_returns[0].stderr if msg_obj.tool_returns else [],
1359
1359
  name=msg_obj.name,
1360
1360
  otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
1361
1361
  )
@@ -12,12 +12,12 @@ from sqlalchemy.exc import IntegrityError, OperationalError
12
12
  from starlette.responses import Response, StreamingResponse
13
13
 
14
14
  from letta.agents.letta_agent import LettaAgent
15
- from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
15
+ from letta.constants import CORE_MEMORY_SOURCE_CHAR_LIMIT, DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
16
16
  from letta.groups.sleeptime_multi_agent_v2 import SleeptimeMultiAgentV2
17
17
  from letta.helpers.datetime_helpers import get_utc_timestamp_ns
18
18
  from letta.log import get_logger
19
19
  from letta.orm.errors import NoResultFound
20
- from letta.schemas.agent import AgentState, CreateAgent, UpdateAgent
20
+ from letta.schemas.agent import AgentState, AgentType, CreateAgent, UpdateAgent
21
21
  from letta.schemas.block import Block, BlockUpdate
22
22
  from letta.schemas.group import Group
23
23
  from letta.schemas.job import JobStatus, JobUpdate, LettaRequestConfig
@@ -36,6 +36,7 @@ from letta.server.rest_api.utils import get_letta_server
36
36
  from letta.server.server import SyncServer
37
37
  from letta.services.telemetry_manager import NoopTelemetryManager
38
38
  from letta.settings import settings
39
+ from letta.utils import safe_create_task
39
40
 
40
41
  # These can be forward refs, but because Fastapi needs them at runtime the must be imported normally
41
42
 
@@ -127,7 +128,7 @@ class IndentedORJSONResponse(Response):
127
128
 
128
129
 
129
130
  @router.get("/{agent_id}/export", response_class=IndentedORJSONResponse, operation_id="export_agent_serialized")
130
- async def export_agent_serialized(
131
+ def export_agent_serialized(
131
132
  agent_id: str,
132
133
  server: "SyncServer" = Depends(get_letta_server),
133
134
  actor_id: Optional[str] = Header(None, alias="user_id"),
@@ -138,7 +139,7 @@ async def export_agent_serialized(
138
139
  """
139
140
  Export the serialized JSON representation of an agent, formatted with indentation.
140
141
  """
141
- actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
142
+ actor = server.user_manager.get_user_or_default(user_id=actor_id)
142
143
 
143
144
  try:
144
145
  agent = server.agent_manager.serialize(agent_id=agent_id, actor=actor)
@@ -270,7 +271,7 @@ def list_agent_tools(
270
271
 
271
272
 
272
273
  @router.patch("/{agent_id}/tools/attach/{tool_id}", response_model=AgentState, operation_id="attach_tool")
273
- def attach_tool(
274
+ async def attach_tool(
274
275
  agent_id: str,
275
276
  tool_id: str,
276
277
  server: "SyncServer" = Depends(get_letta_server),
@@ -279,12 +280,12 @@ def attach_tool(
279
280
  """
280
281
  Attach a tool to an agent.
281
282
  """
282
- actor = server.user_manager.get_user_or_default(user_id=actor_id)
283
- return server.agent_manager.attach_tool(agent_id=agent_id, tool_id=tool_id, actor=actor)
283
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
284
+ return await server.agent_manager.attach_tool_async(agent_id=agent_id, tool_id=tool_id, actor=actor)
284
285
 
285
286
 
286
287
  @router.patch("/{agent_id}/tools/detach/{tool_id}", response_model=AgentState, operation_id="detach_tool")
287
- def detach_tool(
288
+ async def detach_tool(
288
289
  agent_id: str,
289
290
  tool_id: str,
290
291
  server: "SyncServer" = Depends(get_letta_server),
@@ -293,31 +294,49 @@ def detach_tool(
293
294
  """
294
295
  Detach a tool from an agent.
295
296
  """
296
- actor = server.user_manager.get_user_or_default(user_id=actor_id)
297
- return server.agent_manager.detach_tool(agent_id=agent_id, tool_id=tool_id, actor=actor)
297
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
298
+ return await server.agent_manager.detach_tool_async(agent_id=agent_id, tool_id=tool_id, actor=actor)
298
299
 
299
300
 
300
301
  @router.patch("/{agent_id}/sources/attach/{source_id}", response_model=AgentState, operation_id="attach_source_to_agent")
301
302
  async def attach_source(
302
303
  agent_id: str,
303
304
  source_id: str,
304
- background_tasks: BackgroundTasks,
305
305
  server: "SyncServer" = Depends(get_letta_server),
306
306
  actor_id: Optional[str] = Header(None, alias="user_id"),
307
307
  ):
308
308
  """
309
309
  Attach a source to an agent.
310
310
  """
311
- actor = server.user_manager.get_user_or_default(user_id=actor_id)
312
- agent = server.agent_manager.attach_source(agent_id=agent_id, source_id=source_id, actor=actor)
313
- if agent.enable_sleeptime:
314
- source = await server.source_manager.get_source_by_id_async(source_id=source_id)
315
- background_tasks.add_task(server.sleeptime_document_ingest, agent, source, actor)
316
- return agent
311
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
312
+ agent_state = await server.agent_manager.attach_source_async(agent_id=agent_id, source_id=source_id, actor=actor)
313
+
314
+ files = await server.source_manager.list_files(source_id, actor)
315
+ texts = []
316
+ file_ids = []
317
+ for f in files:
318
+ passages = await server.passage_manager.list_passages_by_file_id_async(file_id=f.id, actor=actor)
319
+ passage_text = ""
320
+ for p in passages:
321
+ if len(passage_text) <= CORE_MEMORY_SOURCE_CHAR_LIMIT:
322
+ passage_text += p.text
323
+
324
+ texts.append(passage_text)
325
+ file_ids.append(f.id)
326
+
327
+ await server.insert_files_into_context_window(agent_state=agent_state, texts=texts, file_ids=file_ids, actor=actor)
328
+
329
+ if agent_state.enable_sleeptime:
330
+ source = await server.source_manager.get_source_by_id(source_id=source_id)
331
+ safe_create_task(
332
+ server.sleeptime_document_ingest_async(agent_state, source, actor), logger=logger, label="sleeptime_document_ingest_async"
333
+ )
334
+
335
+ return agent_state
317
336
 
318
337
 
319
338
  @router.patch("/{agent_id}/sources/detach/{source_id}", response_model=AgentState, operation_id="detach_source_from_agent")
320
- def detach_source(
339
+ async def detach_source(
321
340
  agent_id: str,
322
341
  source_id: str,
323
342
  server: "SyncServer" = Depends(get_letta_server),
@@ -326,16 +345,20 @@ def detach_source(
326
345
  """
327
346
  Detach a source from an agent.
328
347
  """
329
- actor = server.user_manager.get_user_or_default(user_id=actor_id)
330
- agent = server.agent_manager.detach_source(agent_id=agent_id, source_id=source_id, actor=actor)
331
- if agent.enable_sleeptime:
348
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
349
+ agent_state = await server.agent_manager.detach_source_async(agent_id=agent_id, source_id=source_id, actor=actor)
350
+ files = await server.source_manager.list_files(source_id, actor)
351
+ file_ids = [f.id for f in files]
352
+ await server.remove_files_from_context_window(agent_state=agent_state, file_ids=file_ids, actor=actor)
353
+
354
+ if agent_state.enable_sleeptime:
332
355
  try:
333
- source = server.source_manager.get_source_by_id(source_id=source_id)
334
- block = server.agent_manager.get_block_with_label(agent_id=agent.id, block_label=source.name, actor=actor)
335
- server.block_manager.delete_block(block.id, actor)
356
+ source = await server.source_manager.get_source_by_id(source_id=source_id)
357
+ block = await server.agent_manager.get_block_with_label_async(agent_id=agent_state.id, block_label=source.name, actor=actor)
358
+ await server.block_manager.delete_block_async(block.id, actor)
336
359
  except:
337
360
  pass
338
- return agent
361
+ return agent_state
339
362
 
340
363
 
341
364
  @router.get("/{agent_id}", response_model=AgentState, operation_id="retrieve_agent")
@@ -517,18 +540,18 @@ async def list_passages(
517
540
 
518
541
 
519
542
  @router.post("/{agent_id}/archival-memory", response_model=List[Passage], operation_id="create_passage")
520
- def create_passage(
543
+ async def create_passage(
521
544
  agent_id: str,
522
545
  request: CreateArchivalMemory = Body(...),
523
546
  server: "SyncServer" = Depends(get_letta_server),
524
- actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
547
+ actor_id: Optional[str] = Header(None, alias="user_id"),
525
548
  ):
526
549
  """
527
550
  Insert a memory into an agent's archival memory store.
528
551
  """
529
- actor = server.user_manager.get_user_or_default(user_id=actor_id)
552
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
530
553
 
531
- return server.insert_archival_memory(agent_id=agent_id, memory_contents=request.text, actor=actor)
554
+ return await server.insert_archival_memory_async(agent_id=agent_id, memory_contents=request.text, actor=actor)
532
555
 
533
556
 
534
557
  @router.patch("/{agent_id}/archival-memory/{memory_id}", response_model=List[Passage], operation_id="modify_passage")
@@ -549,7 +572,7 @@ def modify_passage(
549
572
  # TODO(ethan): query or path parameter for memory_id?
550
573
  # @router.delete("/{agent_id}/archival")
551
574
  @router.delete("/{agent_id}/archival-memory/{memory_id}", response_model=None, operation_id="delete_passage")
552
- def delete_passage(
575
+ async def delete_passage(
553
576
  agent_id: str,
554
577
  memory_id: str,
555
578
  # memory_id: str = Query(..., description="Unique ID of the memory to be deleted."),
@@ -559,9 +582,9 @@ def delete_passage(
559
582
  """
560
583
  Delete a memory from an agent's archival memory store.
561
584
  """
562
- actor = server.user_manager.get_user_or_default(user_id=actor_id)
585
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
563
586
 
564
- server.delete_archival_memory(memory_id=memory_id, actor=actor)
587
+ await server.delete_archival_memory_async(memory_id=memory_id, actor=actor)
565
588
  return JSONResponse(status_code=status.HTTP_200_OK, content={"message": f"Memory id={memory_id} successfully deleted"})
566
589
 
567
590
 
@@ -636,15 +659,17 @@ async def send_message(
636
659
  This endpoint accepts a message from a user and processes it through the agent.
637
660
  """
638
661
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
662
+ request_start_timestamp_ns = get_utc_timestamp_ns()
663
+ user_eligible = True
639
664
  # TODO: This is redundant, remove soon
640
- agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor)
641
- agent_eligible = agent.enable_sleeptime or not agent.multi_agent_group
665
+ agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
666
+ agent_eligible = agent.enable_sleeptime or agent.agent_type == AgentType.sleeptime_agent or not agent.multi_agent_group
642
667
  experimental_header = request_obj.headers.get("X-EXPERIMENTAL") or "false"
643
668
  feature_enabled = settings.use_experimental or experimental_header.lower() == "true"
644
669
  model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together", "google_ai", "google_vertex"]
645
670
 
646
- if agent_eligible and feature_enabled and model_compatible:
647
- if agent.enable_sleeptime:
671
+ if user_eligible and agent_eligible and feature_enabled and model_compatible:
672
+ if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
648
673
  experimental_agent = SleeptimeMultiAgentV2(
649
674
  agent_id=agent_id,
650
675
  message_manager=server.message_manager,
@@ -668,7 +693,12 @@ async def send_message(
668
693
  telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
669
694
  )
670
695
 
671
- result = await experimental_agent.step(request.messages, max_steps=10, use_assistant_message=request.use_assistant_message)
696
+ result = await experimental_agent.step(
697
+ request.messages,
698
+ max_steps=10,
699
+ use_assistant_message=request.use_assistant_message,
700
+ request_start_timestamp_ns=request_start_timestamp_ns,
701
+ )
672
702
  else:
673
703
  result = await server.send_message_to_agent(
674
704
  agent_id=agent_id,
@@ -691,7 +721,9 @@ async def send_message(
691
721
  responses={
692
722
  200: {
693
723
  "description": "Successful response",
694
- "content": {"text/event-stream": {}},
724
+ "content": {
725
+ "text/event-stream": {"description": "Server-Sent Events stream"},
726
+ },
695
727
  }
696
728
  },
697
729
  )
@@ -709,17 +741,19 @@ async def send_message_streaming(
709
741
  """
710
742
  request_start_timestamp_ns = get_utc_timestamp_ns()
711
743
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
744
+ user_eligible = actor.organization_id not in ["org-4a3af5dd-4c6a-48cb-ac13-3f73ecaaa4bf", "org-4ab3f6e8-9a44-4bee-aeb6-c681cbbc7bf6"]
712
745
  # TODO: This is redundant, remove soon
713
- agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor)
714
- agent_eligible = agent.enable_sleeptime or not agent.multi_agent_group
746
+ agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
747
+ agent_eligible = agent.enable_sleeptime or agent.agent_type == AgentType.sleeptime_agent or not agent.multi_agent_group
715
748
  experimental_header = request_obj.headers.get("X-EXPERIMENTAL") or "false"
716
749
  feature_enabled = settings.use_experimental or experimental_header.lower() == "true"
717
750
  model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together", "google_ai", "google_vertex"]
718
751
  model_compatible_token_streaming = agent.llm_config.model_endpoint_type in ["anthropic", "openai"]
719
- not_letta_endpoint = not ("letta" in agent.llm_config.model_endpoint)
752
+ not_letta_endpoint = not ("inference.letta.com" in agent.llm_config.model_endpoint)
753
+ request_start_timestamp_ns = get_utc_timestamp_ns()
720
754
 
721
- if agent_eligible and feature_enabled and model_compatible:
722
- if agent.enable_sleeptime:
755
+ if user_eligible and agent_eligible and feature_enabled and model_compatible:
756
+ if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
723
757
  experimental_agent = SleeptimeMultiAgentV2(
724
758
  agent_id=agent_id,
725
759
  message_manager=server.message_manager,
@@ -759,7 +793,10 @@ async def send_message_streaming(
759
793
  else:
760
794
  result = StreamingResponseWithStatusCode(
761
795
  experimental_agent.step_stream_no_tokens(
762
- request.messages, max_steps=10, use_assistant_message=request.use_assistant_message
796
+ request.messages,
797
+ max_steps=10,
798
+ use_assistant_message=request.use_assistant_message,
799
+ request_start_timestamp_ns=request_start_timestamp_ns,
763
800
  ),
764
801
  media_type="text/event-stream",
765
802
  )
@@ -792,6 +829,7 @@ async def process_message_background(
792
829
  ) -> None:
793
830
  """Background task to process the message and update job status."""
794
831
  try:
832
+ request_start_timestamp_ns = get_utc_timestamp_ns()
795
833
  result = await server.send_message_to_agent(
796
834
  agent_id=agent_id,
797
835
  actor=actor,
@@ -802,6 +840,7 @@ async def process_message_background(
802
840
  assistant_message_tool_name=assistant_message_tool_name,
803
841
  assistant_message_tool_kwarg=assistant_message_tool_kwarg,
804
842
  metadata={"job_id": job_id}, # Pass job_id through metadata
843
+ request_start_timestamp_ns=request_start_timestamp_ns,
805
844
  )
806
845
 
807
846
  # Update job status to completed
@@ -874,15 +913,17 @@ async def send_message_async(
874
913
 
875
914
 
876
915
  @router.patch("/{agent_id}/reset-messages", response_model=AgentState, operation_id="reset_messages")
877
- def reset_messages(
916
+ async def reset_messages(
878
917
  agent_id: str,
879
918
  add_default_initial_messages: bool = Query(default=False, description="If true, adds the default initial messages after resetting."),
880
919
  server: "SyncServer" = Depends(get_letta_server),
881
920
  actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
882
921
  ):
883
922
  """Resets the messages for an agent"""
884
- actor = server.user_manager.get_user_or_default(user_id=actor_id)
885
- return server.agent_manager.reset_messages(agent_id=agent_id, actor=actor, add_default_initial_messages=add_default_initial_messages)
923
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
924
+ return await server.agent_manager.reset_messages_async(
925
+ agent_id=agent_id, actor=actor, add_default_initial_messages=add_default_initial_messages
926
+ )
886
927
 
887
928
 
888
929
  @router.get("/{agent_id}/groups", response_model=List[Group], operation_id="list_agent_groups")
@@ -896,3 +937,47 @@ async def list_agent_groups(
896
937
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
897
938
  print("in list agents with manager_type", manager_type)
898
939
  return server.agent_manager.list_groups(agent_id=agent_id, manager_type=manager_type, actor=actor)
940
+
941
+
942
+ @router.post("/{agent_id}/summarize", response_model=AgentState, operation_id="summarize_agent_conversation")
943
+ async def summarize_agent_conversation(
944
+ agent_id: str,
945
+ request_obj: Request, # FastAPI Request
946
+ max_message_length: int = Query(..., description="Maximum number of messages to retain after summarization."),
947
+ server: SyncServer = Depends(get_letta_server),
948
+ actor_id: Optional[str] = Header(None, alias="user_id"),
949
+ ):
950
+ """
951
+ Summarize an agent's conversation history to a target message length.
952
+
953
+ This endpoint summarizes the current message history for a given agent,
954
+ truncating and compressing it down to the specified `max_message_length`.
955
+ """
956
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
957
+
958
+ # user_eligible = actor.organization_id not in ["org-4a3af5dd-4c6a-48cb-ac13-3f73ecaaa4bf", "org-4ab3f6e8-9a44-4bee-aeb6-c681cbbc7bf6"]
959
+ # TODO: This is redundant, remove soon
960
+ agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
961
+ agent_eligible = agent.enable_sleeptime or agent.agent_type == AgentType.sleeptime_agent or not agent.multi_agent_group
962
+ experimental_header = request_obj.headers.get("X-EXPERIMENTAL") or "false"
963
+ feature_enabled = settings.use_experimental or experimental_header.lower() == "true"
964
+ model_compatible = agent.llm_config.model_endpoint_type in ["anthropic", "openai", "together", "google_ai", "google_vertex"]
965
+
966
+ if agent_eligible and feature_enabled and model_compatible:
967
+ agent = LettaAgent(
968
+ agent_id=agent_id,
969
+ message_manager=server.message_manager,
970
+ agent_manager=server.agent_manager,
971
+ block_manager=server.block_manager,
972
+ passage_manager=server.passage_manager,
973
+ actor=actor,
974
+ step_manager=server.step_manager,
975
+ telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
976
+ message_buffer_min=max_message_length,
977
+ )
978
+ return await agent.summarize_conversation_history()
979
+
980
+ raise HTTPException(
981
+ status_code=status.HTTP_403_FORBIDDEN,
982
+ detail="Summarization is not currently supported for this agent configuration. Please contact Letta support.",
983
+ )
@@ -39,14 +39,15 @@ async def list_blocks(
39
39
 
40
40
 
41
41
  @router.get("/count", response_model=int, operation_id="count_blocks")
42
- def count_blocks(
42
+ async def count_blocks(
43
43
  server: SyncServer = Depends(get_letta_server),
44
44
  actor_id: Optional[str] = Header(None, alias="user_id"),
45
45
  ):
46
46
  """
47
47
  Count all blocks created by a user.
48
48
  """
49
- return server.block_manager.size(actor=server.user_manager.get_user_or_default(user_id=actor_id))
49
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
50
+ return await server.block_manager.size_async(actor=actor)
50
51
 
51
52
 
52
53
  @router.post("/", response_model=Block, operation_id="create_block")