letta-nightly 0.11.7.dev20250909104137__py3-none-any.whl → 0.11.7.dev20250911104039__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. letta/adapters/letta_llm_adapter.py +81 -0
  2. letta/adapters/letta_llm_request_adapter.py +113 -0
  3. letta/adapters/letta_llm_stream_adapter.py +171 -0
  4. letta/agents/agent_loop.py +23 -0
  5. letta/agents/base_agent.py +4 -1
  6. letta/agents/base_agent_v2.py +68 -0
  7. letta/agents/helpers.py +3 -5
  8. letta/agents/letta_agent.py +23 -12
  9. letta/agents/letta_agent_v2.py +1221 -0
  10. letta/agents/voice_agent.py +2 -1
  11. letta/constants.py +1 -1
  12. letta/errors.py +12 -0
  13. letta/functions/function_sets/base.py +53 -12
  14. letta/functions/helpers.py +3 -2
  15. letta/functions/schema_generator.py +1 -1
  16. letta/groups/sleeptime_multi_agent_v2.py +4 -2
  17. letta/groups/sleeptime_multi_agent_v3.py +233 -0
  18. letta/helpers/tool_rule_solver.py +4 -0
  19. letta/helpers/tpuf_client.py +607 -34
  20. letta/interfaces/anthropic_streaming_interface.py +74 -30
  21. letta/interfaces/openai_streaming_interface.py +80 -37
  22. letta/llm_api/google_vertex_client.py +1 -1
  23. letta/llm_api/openai_client.py +45 -4
  24. letta/orm/agent.py +4 -1
  25. letta/orm/block.py +2 -0
  26. letta/orm/blocks_agents.py +1 -0
  27. letta/orm/group.py +1 -0
  28. letta/orm/source.py +8 -1
  29. letta/orm/sources_agents.py +2 -1
  30. letta/orm/step_metrics.py +10 -0
  31. letta/orm/tools_agents.py +5 -2
  32. letta/schemas/block.py +4 -0
  33. letta/schemas/enums.py +1 -0
  34. letta/schemas/group.py +8 -0
  35. letta/schemas/letta_message.py +1 -1
  36. letta/schemas/letta_request.py +2 -2
  37. letta/schemas/mcp.py +9 -1
  38. letta/schemas/message.py +42 -2
  39. letta/schemas/providers/ollama.py +1 -1
  40. letta/schemas/providers.py +1 -2
  41. letta/schemas/source.py +6 -0
  42. letta/schemas/step_metrics.py +2 -0
  43. letta/server/rest_api/interface.py +34 -2
  44. letta/server/rest_api/json_parser.py +2 -0
  45. letta/server/rest_api/redis_stream_manager.py +2 -1
  46. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -2
  47. letta/server/rest_api/routers/v1/__init__.py +2 -0
  48. letta/server/rest_api/routers/v1/agents.py +132 -170
  49. letta/server/rest_api/routers/v1/blocks.py +6 -0
  50. letta/server/rest_api/routers/v1/folders.py +25 -7
  51. letta/server/rest_api/routers/v1/groups.py +6 -0
  52. letta/server/rest_api/routers/v1/internal_templates.py +218 -12
  53. letta/server/rest_api/routers/v1/messages.py +14 -19
  54. letta/server/rest_api/routers/v1/runs.py +43 -28
  55. letta/server/rest_api/routers/v1/sources.py +25 -7
  56. letta/server/rest_api/routers/v1/tools.py +42 -0
  57. letta/server/rest_api/streaming_response.py +11 -2
  58. letta/server/server.py +9 -6
  59. letta/services/agent_manager.py +39 -59
  60. letta/services/agent_serialization_manager.py +26 -11
  61. letta/services/archive_manager.py +60 -9
  62. letta/services/block_manager.py +5 -0
  63. letta/services/file_processor/embedder/base_embedder.py +5 -0
  64. letta/services/file_processor/embedder/openai_embedder.py +4 -0
  65. letta/services/file_processor/embedder/pinecone_embedder.py +5 -1
  66. letta/services/file_processor/embedder/turbopuffer_embedder.py +71 -0
  67. letta/services/file_processor/file_processor.py +9 -7
  68. letta/services/group_manager.py +74 -11
  69. letta/services/mcp_manager.py +134 -28
  70. letta/services/message_manager.py +229 -125
  71. letta/services/passage_manager.py +2 -1
  72. letta/services/source_manager.py +23 -1
  73. letta/services/summarizer/summarizer.py +4 -1
  74. letta/services/tool_executor/core_tool_executor.py +2 -120
  75. letta/services/tool_executor/files_tool_executor.py +133 -8
  76. letta/services/tool_executor/multi_agent_tool_executor.py +17 -14
  77. letta/services/tool_sandbox/local_sandbox.py +2 -2
  78. letta/services/tool_sandbox/modal_version_manager.py +2 -1
  79. letta/settings.py +6 -0
  80. letta/streaming_utils.py +29 -4
  81. letta/utils.py +106 -4
  82. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/METADATA +2 -2
  83. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/RECORD +86 -78
  84. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/WHEEL +0 -0
  85. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/entry_points.txt +0 -0
  86. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/licenses/LICENSE +0 -0
@@ -12,10 +12,18 @@ from pydantic import BaseModel, Field
12
12
  from sqlalchemy.exc import IntegrityError, OperationalError
13
13
  from starlette.responses import Response, StreamingResponse
14
14
 
15
+ from letta.agents.agent_loop import AgentLoop
15
16
  from letta.agents.letta_agent import LettaAgent
17
+ from letta.agents.letta_agent_v2 import LettaAgentV2
16
18
  from letta.constants import AGENT_ID_PATTERN, DEFAULT_MAX_STEPS, DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, REDIS_RUN_ID_PREFIX
17
19
  from letta.data_sources.redis_client import NoopAsyncRedisClient, get_redis_client
18
- from letta.errors import AgentExportIdMappingError, AgentExportProcessingError, AgentFileImportError, AgentNotFoundForExportError
20
+ from letta.errors import (
21
+ AgentExportIdMappingError,
22
+ AgentExportProcessingError,
23
+ AgentFileImportError,
24
+ AgentNotFoundForExportError,
25
+ PendingApprovalError,
26
+ )
19
27
  from letta.groups.sleeptime_multi_agent_v2 import SleeptimeMultiAgentV2
20
28
  from letta.helpers.datetime_helpers import get_utc_timestamp_ns
21
29
  from letta.log import get_logger
@@ -39,7 +47,7 @@ from letta.schemas.memory import (
39
47
  CreateArchivalMemory,
40
48
  Memory,
41
49
  )
42
- from letta.schemas.message import MessageCreate
50
+ from letta.schemas.message import MessageCreate, MessageSearchRequest, MessageSearchResult
43
51
  from letta.schemas.passage import Passage
44
52
  from letta.schemas.run import Run
45
53
  from letta.schemas.source import Source
@@ -52,7 +60,7 @@ from letta.server.server import SyncServer
52
60
  from letta.services.summarizer.enums import SummarizationMode
53
61
  from letta.services.telemetry_manager import NoopTelemetryManager
54
62
  from letta.settings import settings
55
- from letta.utils import safe_create_task, truncate_file_visible_content
63
+ from letta.utils import safe_create_shielded_task, safe_create_task, truncate_file_visible_content
56
64
 
57
65
  # These can be forward refs, but because Fastapi needs them at runtime the must be imported normally
58
66
 
@@ -1013,7 +1021,7 @@ async def search_archival_memory(
1013
1021
  end_datetime = end_datetime.isoformat() if end_datetime else None
1014
1022
 
1015
1023
  # Use the shared agent manager method
1016
- formatted_results, count = await server.agent_manager.search_agent_archival_memory_async(
1024
+ formatted_results = await server.agent_manager.search_agent_archival_memory_async(
1017
1025
  agent_id=agent_id,
1018
1026
  actor=actor,
1019
1027
  query=query,
@@ -1027,7 +1035,7 @@ async def search_archival_memory(
1027
1035
  # Convert to proper response schema
1028
1036
  search_results = [ArchivalMemorySearchResult(**result) for result in formatted_results]
1029
1037
 
1030
- return ArchivalMemorySearchResponse(results=search_results, count=count)
1038
+ return ArchivalMemorySearchResponse(results=search_results, count=len(formatted_results))
1031
1039
 
1032
1040
  except NoResultFound as e:
1033
1041
  raise HTTPException(status_code=404, detail=f"Agent with id={agent_id} not found for user_id={actor.id}.")
@@ -1138,7 +1146,9 @@ async def send_message(
1138
1146
 
1139
1147
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
1140
1148
  # TODO: This is redundant, remove soon
1141
- agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
1149
+ agent = await server.agent_manager.get_agent_by_id_async(
1150
+ agent_id, actor, include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools"]
1151
+ )
1142
1152
  agent_eligible = agent.multi_agent_group is None or agent.multi_agent_group.manager_type in ["sleeptime", "voice_sleeptime"]
1143
1153
  model_compatible = agent.llm_config.model_endpoint_type in [
1144
1154
  "anthropic",
@@ -1184,42 +1194,11 @@ async def send_message(
1184
1194
 
1185
1195
  try:
1186
1196
  if agent_eligible and model_compatible:
1187
- if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
1188
- agent_loop = SleeptimeMultiAgentV2(
1189
- agent_id=agent_id,
1190
- message_manager=server.message_manager,
1191
- agent_manager=server.agent_manager,
1192
- block_manager=server.block_manager,
1193
- passage_manager=server.passage_manager,
1194
- group_manager=server.group_manager,
1195
- job_manager=server.job_manager,
1196
- actor=actor,
1197
- group=agent.multi_agent_group,
1198
- current_run_id=run.id if run else None,
1199
- )
1200
- else:
1201
- agent_loop = LettaAgent(
1202
- agent_id=agent_id,
1203
- message_manager=server.message_manager,
1204
- agent_manager=server.agent_manager,
1205
- block_manager=server.block_manager,
1206
- job_manager=server.job_manager,
1207
- passage_manager=server.passage_manager,
1208
- actor=actor,
1209
- step_manager=server.step_manager,
1210
- telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
1211
- current_run_id=run.id if run else None,
1212
- # summarizer settings to be added here
1213
- summarizer_mode=(
1214
- SummarizationMode.STATIC_MESSAGE_BUFFER
1215
- if agent.agent_type == AgentType.voice_convo_agent
1216
- else SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER
1217
- ),
1218
- )
1219
-
1197
+ agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
1220
1198
  result = await agent_loop.step(
1221
1199
  request.messages,
1222
1200
  max_steps=request.max_steps,
1201
+ run_id=run.id if run else None,
1223
1202
  use_assistant_message=request.use_assistant_message,
1224
1203
  request_start_timestamp_ns=request_start_timestamp_ns,
1225
1204
  include_return_message_types=request.include_return_message_types,
@@ -1239,6 +1218,12 @@ async def send_message(
1239
1218
  )
1240
1219
  job_status = result.stop_reason.stop_reason.run_status
1241
1220
  return result
1221
+ except PendingApprovalError as e:
1222
+ job_update_metadata = {"error": str(e)}
1223
+ job_status = JobStatus.failed
1224
+ raise HTTPException(
1225
+ status_code=409, detail={"code": "PENDING_APPROVAL", "message": str(e), "pending_request_id": e.pending_request_id}
1226
+ )
1242
1227
  except Exception as e:
1243
1228
  job_update_metadata = {"error": str(e)}
1244
1229
  job_status = JobStatus.failed
@@ -1287,7 +1272,9 @@ async def send_message_streaming(
1287
1272
 
1288
1273
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
1289
1274
  # TODO: This is redundant, remove soon
1290
- agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
1275
+ agent = await server.agent_manager.get_agent_by_id_async(
1276
+ agent_id, actor, include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools"]
1277
+ )
1291
1278
  agent_eligible = agent.multi_agent_group is None or agent.multi_agent_group.manager_type in ["sleeptime", "voice_sleeptime"]
1292
1279
  model_compatible = agent.llm_config.model_endpoint_type in [
1293
1280
  "anthropic",
@@ -1332,57 +1319,16 @@ async def send_message_streaming(
1332
1319
 
1333
1320
  try:
1334
1321
  if agent_eligible and model_compatible:
1335
- if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
1336
- agent_loop = SleeptimeMultiAgentV2(
1337
- agent_id=agent_id,
1338
- message_manager=server.message_manager,
1339
- agent_manager=server.agent_manager,
1340
- block_manager=server.block_manager,
1341
- passage_manager=server.passage_manager,
1342
- group_manager=server.group_manager,
1343
- job_manager=server.job_manager,
1344
- actor=actor,
1345
- step_manager=server.step_manager,
1346
- telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
1347
- group=agent.multi_agent_group,
1348
- current_run_id=run.id if run else None,
1349
- )
1350
- else:
1351
- agent_loop = LettaAgent(
1352
- agent_id=agent_id,
1353
- message_manager=server.message_manager,
1354
- agent_manager=server.agent_manager,
1355
- block_manager=server.block_manager,
1356
- job_manager=server.job_manager,
1357
- passage_manager=server.passage_manager,
1358
- actor=actor,
1359
- step_manager=server.step_manager,
1360
- telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
1361
- current_run_id=run.id if run else None,
1362
- # summarizer settings to be added here
1363
- summarizer_mode=(
1364
- SummarizationMode.STATIC_MESSAGE_BUFFER
1365
- if agent.agent_type == AgentType.voice_convo_agent
1366
- else SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER
1367
- ),
1368
- )
1369
-
1370
- if request.stream_tokens and model_compatible_token_streaming:
1371
- raw_stream = agent_loop.step_stream(
1372
- input_messages=request.messages,
1373
- max_steps=request.max_steps,
1374
- use_assistant_message=request.use_assistant_message,
1375
- request_start_timestamp_ns=request_start_timestamp_ns,
1376
- include_return_message_types=request.include_return_message_types,
1377
- )
1378
- else:
1379
- raw_stream = agent_loop.step_stream_no_tokens(
1380
- request.messages,
1381
- max_steps=request.max_steps,
1382
- use_assistant_message=request.use_assistant_message,
1383
- request_start_timestamp_ns=request_start_timestamp_ns,
1384
- include_return_message_types=request.include_return_message_types,
1385
- )
1322
+ agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
1323
+ raw_stream = agent_loop.stream(
1324
+ input_messages=request.messages,
1325
+ max_steps=request.max_steps,
1326
+ stream_tokens=request.stream_tokens and model_compatible_token_streaming,
1327
+ run_id=run.id if run else None,
1328
+ use_assistant_message=request.use_assistant_message,
1329
+ request_start_timestamp_ns=request_start_timestamp_ns,
1330
+ include_return_message_types=request.include_return_message_types,
1331
+ )
1386
1332
 
1387
1333
  from letta.server.rest_api.streaming_response import StreamingResponseWithStatusCode, add_keepalive_to_stream
1388
1334
 
@@ -1397,12 +1343,13 @@ async def send_message_streaming(
1397
1343
  ),
1398
1344
  )
1399
1345
 
1400
- asyncio.create_task(
1346
+ safe_create_task(
1401
1347
  create_background_stream_processor(
1402
1348
  stream_generator=raw_stream,
1403
1349
  redis_client=redis_client,
1404
1350
  run_id=run.id,
1405
- )
1351
+ ),
1352
+ label=f"background_stream_processor_{run.id}",
1406
1353
  )
1407
1354
 
1408
1355
  raw_stream = redis_sse_stream_generator(
@@ -1437,6 +1384,13 @@ async def send_message_streaming(
1437
1384
  if settings.track_agent_run:
1438
1385
  job_status = JobStatus.running
1439
1386
  return result
1387
+ except PendingApprovalError as e:
1388
+ if settings.track_agent_run:
1389
+ job_update_metadata = {"error": str(e)}
1390
+ job_status = JobStatus.failed
1391
+ raise HTTPException(
1392
+ status_code=409, detail={"code": "PENDING_APPROVAL", "message": str(e), "pending_request_id": e.pending_request_id}
1393
+ )
1440
1394
  except Exception as e:
1441
1395
  if settings.track_agent_run:
1442
1396
  job_update_metadata = {"error": str(e)}
@@ -1498,6 +1452,42 @@ async def cancel_agent_run(
1498
1452
  return results
1499
1453
 
1500
1454
 
1455
+ @router.post("/messages/search", response_model=List[MessageSearchResult], operation_id="search_messages")
1456
+ async def search_messages(
1457
+ request: MessageSearchRequest = Body(...),
1458
+ server: SyncServer = Depends(get_letta_server),
1459
+ actor_id: str | None = Header(None, alias="user_id"),
1460
+ ):
1461
+ """
1462
+ Search messages across the entire organization with optional project and template filtering. Returns messages with FTS/vector ranks and total RRF score.
1463
+
1464
+ This is a cloud-only feature.
1465
+ """
1466
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
1467
+
1468
+ # get embedding config from the default agent if needed
1469
+ # check if any agents exist in the org
1470
+ agent_count = await server.agent_manager.size_async(actor=actor)
1471
+ if agent_count == 0:
1472
+ raise HTTPException(status_code=400, detail="No agents found in organization to derive embedding configuration from")
1473
+
1474
+ try:
1475
+ results = await server.message_manager.search_messages_org_async(
1476
+ actor=actor,
1477
+ query_text=request.query,
1478
+ search_mode=request.search_mode,
1479
+ roles=request.roles,
1480
+ project_id=request.project_id,
1481
+ template_id=request.template_id,
1482
+ limit=request.limit,
1483
+ start_date=request.start_date,
1484
+ end_date=request.end_date,
1485
+ )
1486
+ return results
1487
+ except ValueError as e:
1488
+ raise HTTPException(status_code=400, detail=str(e))
1489
+
1490
+
1501
1491
  async def _process_message_background(
1502
1492
  run_id: str,
1503
1493
  server: SyncServer,
@@ -1513,7 +1503,9 @@ async def _process_message_background(
1513
1503
  """Background task to process the message and update job status."""
1514
1504
  request_start_timestamp_ns = get_utc_timestamp_ns()
1515
1505
  try:
1516
- agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
1506
+ agent = await server.agent_manager.get_agent_by_id_async(
1507
+ agent_id, actor, include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools"]
1508
+ )
1517
1509
  agent_eligible = agent.multi_agent_group is None or agent.multi_agent_group.manager_type in ["sleeptime", "voice_sleeptime"]
1518
1510
  model_compatible = agent.llm_config.model_endpoint_type in [
1519
1511
  "anthropic",
@@ -1529,37 +1521,7 @@ async def _process_message_background(
1529
1521
  "deepseek",
1530
1522
  ]
1531
1523
  if agent_eligible and model_compatible:
1532
- if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
1533
- agent_loop = SleeptimeMultiAgentV2(
1534
- agent_id=agent_id,
1535
- message_manager=server.message_manager,
1536
- agent_manager=server.agent_manager,
1537
- block_manager=server.block_manager,
1538
- passage_manager=server.passage_manager,
1539
- group_manager=server.group_manager,
1540
- job_manager=server.job_manager,
1541
- actor=actor,
1542
- group=agent.multi_agent_group,
1543
- )
1544
- else:
1545
- agent_loop = LettaAgent(
1546
- agent_id=agent_id,
1547
- message_manager=server.message_manager,
1548
- agent_manager=server.agent_manager,
1549
- block_manager=server.block_manager,
1550
- job_manager=server.job_manager,
1551
- passage_manager=server.passage_manager,
1552
- actor=actor,
1553
- step_manager=server.step_manager,
1554
- telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
1555
- # summarizer settings to be added here
1556
- summarizer_mode=(
1557
- SummarizationMode.STATIC_MESSAGE_BUFFER
1558
- if agent.agent_type == AgentType.voice_convo_agent
1559
- else SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER
1560
- ),
1561
- )
1562
-
1524
+ agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
1563
1525
  result = await agent_loop.step(
1564
1526
  messages,
1565
1527
  max_steps=max_steps,
@@ -1590,6 +1552,14 @@ async def _process_message_background(
1590
1552
  )
1591
1553
  await server.job_manager.update_job_by_id_async(job_id=run_id, job_update=job_update, actor=actor)
1592
1554
 
1555
+ except PendingApprovalError as e:
1556
+ # Update job status to failed with specific error info
1557
+ job_update = JobUpdate(
1558
+ status=JobStatus.failed,
1559
+ completed_at=datetime.now(timezone.utc),
1560
+ metadata={"error": str(e), "error_code": "PENDING_APPROVAL", "pending_request_id": e.pending_request_id},
1561
+ )
1562
+ await server.job_manager.update_job_by_id_async(job_id=run_id, job_update=job_update, actor=actor)
1593
1563
  except Exception as e:
1594
1564
  # Update job status to failed
1595
1565
  job_update = JobUpdate(
@@ -1639,8 +1609,8 @@ async def send_message_async(
1639
1609
  )
1640
1610
  run = await server.job_manager.create_job_async(pydantic_job=run, actor=actor)
1641
1611
 
1642
- # Create asyncio task for background processing
1643
- asyncio.create_task(
1612
+ # Create asyncio task for background processing (shielded to prevent cancellation)
1613
+ task = safe_create_shielded_task(
1644
1614
  _process_message_background(
1645
1615
  run_id=run.id,
1646
1616
  server=server,
@@ -1652,9 +1622,34 @@ async def send_message_async(
1652
1622
  assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
1653
1623
  max_steps=request.max_steps,
1654
1624
  include_return_message_types=request.include_return_message_types,
1655
- )
1625
+ ),
1626
+ label=f"process_message_background_{run.id}",
1656
1627
  )
1657
1628
 
1629
+ def handle_task_completion(t):
1630
+ try:
1631
+ t.result()
1632
+ except asyncio.CancelledError:
1633
+ # Note: With shielded tasks, cancellation attempts don't actually stop the task
1634
+ logger.info(f"Cancellation attempted on shielded background task for run {run.id}, but task continues running")
1635
+ # Don't mark as failed since the shielded task is still running
1636
+ except Exception as e:
1637
+ logger.error(f"Unhandled exception in background task for run {run.id}: {e}")
1638
+ safe_create_task(
1639
+ server.job_manager.update_job_by_id_async(
1640
+ job_id=run.id,
1641
+ job_update=JobUpdate(
1642
+ status=JobStatus.failed,
1643
+ completed_at=datetime.now(timezone.utc),
1644
+ metadata={"error": str(e)},
1645
+ ),
1646
+ actor=actor,
1647
+ ),
1648
+ label=f"update_failed_job_{run.id}",
1649
+ )
1650
+
1651
+ task.add_done_callback(handle_task_completion)
1652
+
1658
1653
  return run
1659
1654
 
1660
1655
 
@@ -1721,38 +1716,10 @@ async def preview_raw_payload(
1721
1716
  ]
1722
1717
 
1723
1718
  if agent_eligible and model_compatible:
1724
- if agent.enable_sleeptime:
1725
- # TODO: @caren need to support this for sleeptime
1726
- raise HTTPException(
1727
- status_code=status.HTTP_400_BAD_REQUEST,
1728
- detail="Payload inspection is not supported for agents with sleeptime enabled.",
1729
- )
1730
- else:
1731
- agent_loop = LettaAgent(
1732
- agent_id=agent_id,
1733
- message_manager=server.message_manager,
1734
- agent_manager=server.agent_manager,
1735
- block_manager=server.block_manager,
1736
- job_manager=server.job_manager,
1737
- passage_manager=server.passage_manager,
1738
- actor=actor,
1739
- step_manager=server.step_manager,
1740
- telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
1741
- summarizer_mode=(
1742
- SummarizationMode.STATIC_MESSAGE_BUFFER
1743
- if agent.agent_type == AgentType.voice_convo_agent
1744
- else SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER
1745
- ),
1746
- )
1747
-
1748
- # TODO: Support step_streaming
1749
- return await agent_loop.step(
1719
+ agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
1720
+ return await agent_loop.build_request(
1750
1721
  input_messages=request.messages,
1751
- use_assistant_message=request.use_assistant_message,
1752
- include_return_message_types=request.include_return_message_types,
1753
- dry_run=True,
1754
1722
  )
1755
-
1756
1723
  else:
1757
1724
  raise HTTPException(
1758
1725
  status_code=status.HTTP_403_FORBIDDEN,
@@ -1793,19 +1760,14 @@ async def summarize_agent_conversation(
1793
1760
  ]
1794
1761
 
1795
1762
  if agent_eligible and model_compatible:
1796
- agent = LettaAgent(
1797
- agent_id=agent_id,
1798
- message_manager=server.message_manager,
1799
- agent_manager=server.agent_manager,
1800
- block_manager=server.block_manager,
1801
- job_manager=server.job_manager,
1802
- passage_manager=server.passage_manager,
1803
- actor=actor,
1804
- step_manager=server.step_manager,
1805
- telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
1806
- message_buffer_min=max_message_length,
1763
+ agent_loop = LettaAgentV2(agent_state=agent, actor=actor)
1764
+ in_context_messages = await server.message_manager.get_messages_by_ids_async(message_ids=agent.message_ids, actor=actor)
1765
+ await agent_loop.summarize_conversation_history(
1766
+ in_context_messages=in_context_messages,
1767
+ new_letta_messages=[],
1768
+ total_tokens=None,
1769
+ force=True,
1807
1770
  )
1808
- await agent.summarize_conversation_history()
1809
1771
  # Summarization completed, return 204 No Content
1810
1772
  else:
1811
1773
  raise HTTPException(
@@ -68,6 +68,11 @@ async def list_blocks(
68
68
  "If provided, returns blocks that have exactly this number of connected agents."
69
69
  ),
70
70
  ),
71
+ show_hidden_blocks: bool | None = Query(
72
+ False,
73
+ include_in_schema=False,
74
+ description="If set to True, include blocks marked as hidden in the results.",
75
+ ),
71
76
  server: SyncServer = Depends(get_letta_server),
72
77
  actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
73
78
  ):
@@ -89,6 +94,7 @@ async def list_blocks(
89
94
  connected_to_agents_count_eq=connected_to_agents_count_eq,
90
95
  limit=limit,
91
96
  after=after,
97
+ show_hidden_blocks=show_hidden_blocks,
92
98
  )
93
99
 
94
100
 
@@ -15,6 +15,7 @@ from letta.helpers.pinecone_utils import (
15
15
  delete_source_records_from_pinecone_index,
16
16
  should_use_pinecone,
17
17
  )
18
+ from letta.helpers.tpuf_client import should_use_tpuf
18
19
  from letta.log import get_logger
19
20
  from letta.otel.tracing import trace_method
20
21
  from letta.schemas.agent import AgentState
@@ -191,7 +192,13 @@ async def delete_folder(
191
192
  files = await server.file_manager.list_files(folder_id, actor)
192
193
  file_ids = [f.id for f in files]
193
194
 
194
- if should_use_pinecone():
195
+ if should_use_tpuf():
196
+ logger.info(f"Deleting folder {folder_id} from Turbopuffer")
197
+ from letta.helpers.tpuf_client import TurbopufferClient
198
+
199
+ tpuf_client = TurbopufferClient()
200
+ await tpuf_client.delete_source_passages(source_id=folder_id, organization_id=actor.organization_id)
201
+ elif should_use_pinecone():
195
202
  logger.info(f"Deleting folder {folder_id} from pinecone index")
196
203
  await delete_source_records_from_pinecone_index(source_id=folder_id, actor=actor)
197
204
 
@@ -320,7 +327,7 @@ async def upload_file_to_folder(
320
327
  logger=logger,
321
328
  label="file_processor.process",
322
329
  )
323
- safe_create_task(sleeptime_document_ingest_async(server, folder_id, actor), logger=logger, label="sleeptime_document_ingest_async")
330
+ safe_create_task(sleeptime_document_ingest_async(server, folder_id, actor), label="sleeptime_document_ingest_async")
324
331
 
325
332
  return file_metadata
326
333
 
@@ -450,11 +457,17 @@ async def delete_file_from_folder(
450
457
 
451
458
  await server.remove_file_from_context_windows(source_id=folder_id, file_id=deleted_file.id, actor=actor)
452
459
 
453
- if should_use_pinecone():
460
+ if should_use_tpuf():
461
+ logger.info(f"Deleting file {file_id} from Turbopuffer")
462
+ from letta.helpers.tpuf_client import TurbopufferClient
463
+
464
+ tpuf_client = TurbopufferClient()
465
+ await tpuf_client.delete_file_passages(source_id=folder_id, file_id=file_id, organization_id=actor.organization_id)
466
+ elif should_use_pinecone():
454
467
  logger.info(f"Deleting file {file_id} from pinecone index")
455
468
  await delete_file_records_from_pinecone_index(file_id=file_id, actor=actor)
456
469
 
457
- asyncio.create_task(sleeptime_document_ingest_async(server, folder_id, actor, clear_history=True))
470
+ safe_create_task(sleeptime_document_ingest_async(server, folder_id, actor, clear_history=True), label="document_ingest_after_delete")
458
471
  if deleted_file is None:
459
472
  raise HTTPException(status_code=404, detail=f"File with id={file_id} not found.")
460
473
 
@@ -496,10 +509,15 @@ async def load_file_to_source_cloud(
496
509
  else:
497
510
  file_parser = MarkitdownFileParser()
498
511
 
499
- using_pinecone = should_use_pinecone()
500
- if using_pinecone:
512
+ # determine which embedder to use - turbopuffer takes precedence
513
+ if should_use_tpuf():
514
+ from letta.services.file_processor.embedder.turbopuffer_embedder import TurbopufferEmbedder
515
+
516
+ embedder = TurbopufferEmbedder(embedding_config=embedding_config)
517
+ elif should_use_pinecone():
501
518
  embedder = PineconeEmbedder(embedding_config=embedding_config)
502
519
  else:
503
520
  embedder = OpenAIEmbedder(embedding_config=embedding_config)
504
- file_processor = FileProcessor(file_parser=file_parser, embedder=embedder, actor=actor, using_pinecone=using_pinecone)
521
+
522
+ file_processor = FileProcessor(file_parser=file_parser, embedder=embedder, actor=actor)
505
523
  await file_processor.process(agent_states=agent_states, source_id=source_id, content=content, file_metadata=file_metadata)
@@ -25,6 +25,11 @@ async def list_groups(
25
25
  after: Optional[str] = Query(None, description="Cursor for pagination"),
26
26
  limit: Optional[int] = Query(None, description="Limit for pagination"),
27
27
  project_id: Optional[str] = Query(None, description="Search groups by project id"),
28
+ show_hidden_groups: bool | None = Query(
29
+ False,
30
+ include_in_schema=False,
31
+ description="If set to True, include groups marked as hidden in the results.",
32
+ ),
28
33
  ):
29
34
  """
30
35
  Fetch all multi-agent groups matching query.
@@ -37,6 +42,7 @@ async def list_groups(
37
42
  before=before,
38
43
  after=after,
39
44
  limit=limit,
45
+ show_hidden_groups=show_hidden_groups,
40
46
  )
41
47
 
42
48