letta-nightly 0.11.7.dev20250909104137__py3-none-any.whl → 0.11.7.dev20250911104039__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. letta/adapters/letta_llm_adapter.py +81 -0
  2. letta/adapters/letta_llm_request_adapter.py +113 -0
  3. letta/adapters/letta_llm_stream_adapter.py +171 -0
  4. letta/agents/agent_loop.py +23 -0
  5. letta/agents/base_agent.py +4 -1
  6. letta/agents/base_agent_v2.py +68 -0
  7. letta/agents/helpers.py +3 -5
  8. letta/agents/letta_agent.py +23 -12
  9. letta/agents/letta_agent_v2.py +1221 -0
  10. letta/agents/voice_agent.py +2 -1
  11. letta/constants.py +1 -1
  12. letta/errors.py +12 -0
  13. letta/functions/function_sets/base.py +53 -12
  14. letta/functions/helpers.py +3 -2
  15. letta/functions/schema_generator.py +1 -1
  16. letta/groups/sleeptime_multi_agent_v2.py +4 -2
  17. letta/groups/sleeptime_multi_agent_v3.py +233 -0
  18. letta/helpers/tool_rule_solver.py +4 -0
  19. letta/helpers/tpuf_client.py +607 -34
  20. letta/interfaces/anthropic_streaming_interface.py +74 -30
  21. letta/interfaces/openai_streaming_interface.py +80 -37
  22. letta/llm_api/google_vertex_client.py +1 -1
  23. letta/llm_api/openai_client.py +45 -4
  24. letta/orm/agent.py +4 -1
  25. letta/orm/block.py +2 -0
  26. letta/orm/blocks_agents.py +1 -0
  27. letta/orm/group.py +1 -0
  28. letta/orm/source.py +8 -1
  29. letta/orm/sources_agents.py +2 -1
  30. letta/orm/step_metrics.py +10 -0
  31. letta/orm/tools_agents.py +5 -2
  32. letta/schemas/block.py +4 -0
  33. letta/schemas/enums.py +1 -0
  34. letta/schemas/group.py +8 -0
  35. letta/schemas/letta_message.py +1 -1
  36. letta/schemas/letta_request.py +2 -2
  37. letta/schemas/mcp.py +9 -1
  38. letta/schemas/message.py +42 -2
  39. letta/schemas/providers/ollama.py +1 -1
  40. letta/schemas/providers.py +1 -2
  41. letta/schemas/source.py +6 -0
  42. letta/schemas/step_metrics.py +2 -0
  43. letta/server/rest_api/interface.py +34 -2
  44. letta/server/rest_api/json_parser.py +2 -0
  45. letta/server/rest_api/redis_stream_manager.py +2 -1
  46. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -2
  47. letta/server/rest_api/routers/v1/__init__.py +2 -0
  48. letta/server/rest_api/routers/v1/agents.py +132 -170
  49. letta/server/rest_api/routers/v1/blocks.py +6 -0
  50. letta/server/rest_api/routers/v1/folders.py +25 -7
  51. letta/server/rest_api/routers/v1/groups.py +6 -0
  52. letta/server/rest_api/routers/v1/internal_templates.py +218 -12
  53. letta/server/rest_api/routers/v1/messages.py +14 -19
  54. letta/server/rest_api/routers/v1/runs.py +43 -28
  55. letta/server/rest_api/routers/v1/sources.py +25 -7
  56. letta/server/rest_api/routers/v1/tools.py +42 -0
  57. letta/server/rest_api/streaming_response.py +11 -2
  58. letta/server/server.py +9 -6
  59. letta/services/agent_manager.py +39 -59
  60. letta/services/agent_serialization_manager.py +26 -11
  61. letta/services/archive_manager.py +60 -9
  62. letta/services/block_manager.py +5 -0
  63. letta/services/file_processor/embedder/base_embedder.py +5 -0
  64. letta/services/file_processor/embedder/openai_embedder.py +4 -0
  65. letta/services/file_processor/embedder/pinecone_embedder.py +5 -1
  66. letta/services/file_processor/embedder/turbopuffer_embedder.py +71 -0
  67. letta/services/file_processor/file_processor.py +9 -7
  68. letta/services/group_manager.py +74 -11
  69. letta/services/mcp_manager.py +134 -28
  70. letta/services/message_manager.py +229 -125
  71. letta/services/passage_manager.py +2 -1
  72. letta/services/source_manager.py +23 -1
  73. letta/services/summarizer/summarizer.py +4 -1
  74. letta/services/tool_executor/core_tool_executor.py +2 -120
  75. letta/services/tool_executor/files_tool_executor.py +133 -8
  76. letta/services/tool_executor/multi_agent_tool_executor.py +17 -14
  77. letta/services/tool_sandbox/local_sandbox.py +2 -2
  78. letta/services/tool_sandbox/modal_version_manager.py +2 -1
  79. letta/settings.py +6 -0
  80. letta/streaming_utils.py +29 -4
  81. letta/utils.py +106 -4
  82. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/METADATA +2 -2
  83. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/RECORD +86 -78
  84. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/WHEEL +0 -0
  85. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/entry_points.txt +0 -0
  86. {letta_nightly-0.11.7.dev20250909104137.dist-info → letta_nightly-0.11.7.dev20250911104039.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,7 @@
1
- from typing import Optional
1
+ from typing import List, Optional
2
2
 
3
- from fastapi import APIRouter, Body, Depends, Header, HTTPException
3
+ from fastapi import APIRouter, Body, Depends, Header, HTTPException, Query
4
+ from pydantic import BaseModel
4
5
 
5
6
  from letta.schemas.agent import AgentState, InternalTemplateAgentCreate
6
7
  from letta.schemas.block import Block, InternalTemplateBlockCreate
@@ -16,9 +17,6 @@ async def create_group(
16
17
  group: InternalTemplateGroupCreate = Body(...),
17
18
  server: "SyncServer" = Depends(get_letta_server),
18
19
  actor_id: Optional[str] = Header(None, alias="user_id"),
19
- x_project: Optional[str] = Header(
20
- None, alias="X-Project", description="The project slug to associate with the group (cloud only)."
21
- ), # Only handled by next js middleware
22
20
  ):
23
21
  """
24
22
  Create a new multi-agent group with the specified configuration.
@@ -35,9 +33,6 @@ async def create_agent(
35
33
  agent: InternalTemplateAgentCreate = Body(...),
36
34
  server: "SyncServer" = Depends(get_letta_server),
37
35
  actor_id: Optional[str] = Header(None, alias="user_id"),
38
- x_project: Optional[str] = Header(
39
- None, alias="X-Project", description="The project slug to associate with the agent (cloud only)."
40
- ), # Only handled by next js middleware
41
36
  ):
42
37
  """
43
38
  Create a new agent with template-related fields.
@@ -54,15 +49,226 @@ async def create_block(
54
49
  block: InternalTemplateBlockCreate = Body(...),
55
50
  server: "SyncServer" = Depends(get_letta_server),
56
51
  actor_id: Optional[str] = Header(None, alias="user_id"),
57
- x_project: Optional[str] = Header(
58
- None, alias="X-Project", description="The project slug to associate with the block (cloud only)."
59
- ), # Only handled by next js middleware
60
52
  ):
61
53
  """
62
54
  Create a new block with template-related fields.
63
55
  """
64
56
  try:
65
57
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
66
- return await server.block_manager.create_or_update_block_async(block, actor=actor)
58
+ block_obj = Block(**block.model_dump())
59
+ return await server.block_manager.create_or_update_block_async(block_obj, actor=actor)
60
+ except Exception as e:
61
+ raise HTTPException(status_code=500, detail=str(e))
62
+
63
+
64
+ class DeploymentEntity(BaseModel):
65
+ """A deployment entity."""
66
+
67
+ id: str
68
+ type: str
69
+ name: Optional[str] = None
70
+ description: Optional[str] = None
71
+
72
+
73
+ class ListDeploymentEntitiesResponse(BaseModel):
74
+ """Response model for listing deployment entities."""
75
+
76
+ entities: List[DeploymentEntity] = []
77
+ total_count: int
78
+ deployment_id: str
79
+ message: str
80
+
81
+
82
+ class DeleteDeploymentResponse(BaseModel):
83
+ """Response model for delete deployment operation."""
84
+
85
+ deleted_blocks: List[str] = []
86
+ deleted_agents: List[str] = []
87
+ deleted_groups: List[str] = []
88
+ message: str
89
+
90
+
91
+ @router.get("/deployment/{deployment_id}", response_model=ListDeploymentEntitiesResponse, operation_id="list_deployment_entities")
92
+ async def list_deployment_entities(
93
+ deployment_id: str,
94
+ server: "SyncServer" = Depends(get_letta_server),
95
+ actor_id: Optional[str] = Header(None, alias="user_id"),
96
+ entity_types: Optional[List[str]] = Query(None, description="Filter by entity types (block, agent, group)"),
97
+ ):
98
+ """
99
+ List all entities (blocks, agents, groups) with the specified deployment_id.
100
+ Optionally filter by entity types.
101
+ """
102
+ try:
103
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
104
+
105
+ entities = []
106
+
107
+ # Parse entity_types filter - support both array and comma-separated string
108
+ allowed_types = {"block", "agent", "group"}
109
+ if entity_types is None:
110
+ # If no filter specified, include all types
111
+ types_to_include = allowed_types
112
+ else:
113
+ # Handle comma-separated strings in a single item
114
+ if len(entity_types) == 1 and "," in entity_types[0]:
115
+ entity_types = [t.strip() for t in entity_types[0].split(",")]
116
+
117
+ # Validate and filter types
118
+ types_to_include = {t.lower() for t in entity_types if t.lower() in allowed_types}
119
+ if not types_to_include:
120
+ types_to_include = allowed_types # Default to all if invalid types provided
121
+
122
+ # Query blocks if requested
123
+ if "block" in types_to_include:
124
+ from sqlalchemy import select
125
+
126
+ from letta.orm.block import Block as BlockModel
127
+ from letta.server.db import db_registry
128
+
129
+ async with db_registry.async_session() as session:
130
+ block_query = select(BlockModel).where(
131
+ BlockModel.deployment_id == deployment_id, BlockModel.organization_id == actor.organization_id
132
+ )
133
+ result = await session.execute(block_query)
134
+ blocks = result.scalars().all()
135
+
136
+ for block in blocks:
137
+ entities.append(
138
+ DeploymentEntity(
139
+ id=block.id,
140
+ type="block",
141
+ name=getattr(block, "template_name", None) or getattr(block, "label", None),
142
+ description=block.description,
143
+ )
144
+ )
145
+
146
+ # Query agents if requested
147
+ if "agent" in types_to_include:
148
+ from letta.orm.agent import Agent as AgentModel
149
+
150
+ async with db_registry.async_session() as session:
151
+ agent_query = select(AgentModel).where(
152
+ AgentModel.deployment_id == deployment_id, AgentModel.organization_id == actor.organization_id
153
+ )
154
+ result = await session.execute(agent_query)
155
+ agents = result.scalars().all()
156
+
157
+ for agent in agents:
158
+ entities.append(DeploymentEntity(id=agent.id, type="agent", name=agent.name, description=agent.description))
159
+
160
+ # Query groups if requested
161
+ if "group" in types_to_include:
162
+ from letta.orm.group import Group as GroupModel
163
+
164
+ async with db_registry.async_session() as session:
165
+ group_query = select(GroupModel).where(
166
+ GroupModel.deployment_id == deployment_id, GroupModel.organization_id == actor.organization_id
167
+ )
168
+ result = await session.execute(group_query)
169
+ groups = result.scalars().all()
170
+
171
+ for group in groups:
172
+ entities.append(
173
+ DeploymentEntity(
174
+ id=group.id,
175
+ type="group",
176
+ name=None, # Groups don't have a name field
177
+ description=group.description,
178
+ )
179
+ )
180
+
181
+ message = f"Found {len(entities)} entities for deployment {deployment_id}"
182
+ if entity_types:
183
+ message += f" (filtered by types: {', '.join(types_to_include)})"
184
+
185
+ return ListDeploymentEntitiesResponse(entities=entities, total_count=len(entities), deployment_id=deployment_id, message=message)
186
+ except Exception as e:
187
+ raise HTTPException(status_code=500, detail=str(e))
188
+
189
+
190
+ @router.delete("/deployment/{deployment_id}", response_model=DeleteDeploymentResponse, operation_id="delete_deployment")
191
+ async def delete_deployment(
192
+ deployment_id: str,
193
+ server: "SyncServer" = Depends(get_letta_server),
194
+ actor_id: Optional[str] = Header(None, alias="user_id"),
195
+ ):
196
+ """
197
+ Delete all entities (blocks, agents, groups) with the specified deployment_id.
198
+ Deletion order: blocks -> agents -> groups to maintain referential integrity.
199
+ """
200
+ try:
201
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
202
+
203
+ deleted_blocks = []
204
+ deleted_agents = []
205
+ deleted_groups = []
206
+
207
+ # First delete blocks
208
+ from sqlalchemy import select
209
+
210
+ from letta.orm.block import Block as BlockModel
211
+ from letta.server.db import db_registry
212
+
213
+ async with db_registry.async_session() as session:
214
+ # Get all blocks with the deployment_id
215
+ block_query = select(BlockModel).where(
216
+ BlockModel.deployment_id == deployment_id, BlockModel.organization_id == actor.organization_id
217
+ )
218
+ result = await session.execute(block_query)
219
+ blocks = result.scalars().all()
220
+
221
+ for block in blocks:
222
+ try:
223
+ await server.block_manager.delete_block_async(block.id, actor)
224
+ deleted_blocks.append(block.id)
225
+ except Exception as e:
226
+ # Continue deleting other blocks even if one fails
227
+ print(f"Failed to delete block {block.id}: {e}")
228
+
229
+ # Then delete agents
230
+ from letta.orm.agent import Agent as AgentModel
231
+
232
+ async with db_registry.async_session() as session:
233
+ # Get all agents with the deployment_id
234
+ agent_query = select(AgentModel).where(
235
+ AgentModel.deployment_id == deployment_id, AgentModel.organization_id == actor.organization_id
236
+ )
237
+ result = await session.execute(agent_query)
238
+ agents = result.scalars().all()
239
+
240
+ for agent in agents:
241
+ try:
242
+ await server.agent_manager.delete_agent_async(agent.id, actor)
243
+ deleted_agents.append(agent.id)
244
+ except Exception as e:
245
+ # Continue deleting other agents even if one fails
246
+ print(f"Failed to delete agent {agent.id}: {e}")
247
+
248
+ # Finally delete groups
249
+ from letta.orm.group import Group as GroupModel
250
+
251
+ async with db_registry.async_session() as session:
252
+ # Get all groups with the deployment_id
253
+ group_query = select(GroupModel).where(
254
+ GroupModel.deployment_id == deployment_id, GroupModel.organization_id == actor.organization_id
255
+ )
256
+ result = await session.execute(group_query)
257
+ groups = result.scalars().all()
258
+
259
+ for group in groups:
260
+ try:
261
+ await server.group_manager.delete_group_async(group.id, actor)
262
+ deleted_groups.append(group.id)
263
+ except Exception as e:
264
+ # Continue deleting other groups even if one fails
265
+ print(f"Failed to delete group {group.id}: {e}")
266
+
267
+ total_deleted = len(deleted_blocks) + len(deleted_agents) + len(deleted_groups)
268
+ message = f"Successfully deleted {total_deleted} entities from deployment {deployment_id}"
269
+
270
+ return DeleteDeploymentResponse(
271
+ deleted_blocks=deleted_blocks, deleted_agents=deleted_agents, deleted_groups=deleted_groups, message=message
272
+ )
67
273
  except Exception as e:
68
274
  raise HTTPException(status_code=500, detail=str(e))
@@ -1,4 +1,4 @@
1
- from typing import List, Optional
1
+ from typing import List, Literal, Optional
2
2
 
3
3
  from fastapi import APIRouter, Body, Depends, Header, Query
4
4
  from fastapi.exceptions import HTTPException
@@ -25,9 +25,9 @@ logger = get_logger(__name__)
25
25
  @router.post(
26
26
  "/batches",
27
27
  response_model=BatchJob,
28
- operation_id="create_messages_batch",
28
+ operation_id="create_batch_run",
29
29
  )
30
- async def create_messages_batch(
30
+ async def create_batch_run(
31
31
  request: Request,
32
32
  payload: CreateBatch = Body(..., description="Messages and config for all agents"),
33
33
  server: SyncServer = Depends(get_letta_server),
@@ -127,25 +127,21 @@ async def list_batch_runs(
127
127
  )
128
128
  async def list_batch_messages(
129
129
  batch_id: str,
130
- limit: int = Query(100, description="Maximum number of messages to return"),
131
- cursor: Optional[str] = Query(
132
- None, description="Message ID to use as pagination cursor (get messages before/after this ID) depending on sort_descending."
130
+ before: Optional[str] = Query(
131
+ None, description="Message ID cursor for pagination. Returns messages that come before this message ID in the specified sort order"
132
+ ),
133
+ after: Optional[str] = Query(
134
+ None, description="Message ID cursor for pagination. Returns messages that come after this message ID in the specified sort order"
135
+ ),
136
+ limit: Optional[int] = Query(100, description="Maximum number of messages to return"),
137
+ order: Literal["asc", "desc"] = Query(
138
+ "desc", description="Sort order for messages by creation time. 'asc' for oldest first, 'desc' for newest first"
133
139
  ),
134
140
  agent_id: Optional[str] = Query(None, description="Filter messages by agent ID"),
135
- sort_descending: bool = Query(True, description="Sort messages by creation time (true=newest first)"),
136
141
  actor_id: Optional[str] = Header(None, alias="user_id"),
137
142
  server: SyncServer = Depends(get_letta_server),
138
143
  ):
139
- """
140
- Get messages for a specific batch job.
141
-
142
- Returns messages associated with the batch in chronological order.
143
-
144
- Pagination:
145
- - For the first page, omit the cursor parameter
146
- - For subsequent pages, use the ID of the last message from the previous response as the cursor
147
- - Results will include messages before/after the cursor based on sort_descending
148
- """
144
+ """Get response messages for a specific batch job."""
149
145
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
150
146
 
151
147
  # First, verify the batch job exists and the user has access to it
@@ -156,9 +152,8 @@ async def list_batch_messages(
156
152
  raise HTTPException(status_code=404, detail="Batch not found")
157
153
 
158
154
  # Get messages directly using our efficient method
159
- # We'll need to update the underlying implementation to use message_id as cursor
160
155
  messages = await server.batch_manager.get_messages_for_letta_batch_async(
161
- letta_batch_job_id=batch_id, limit=limit, actor=actor, agent_id=agent_id, sort_descending=sort_descending, cursor=cursor
156
+ letta_batch_job_id=batch_id, limit=limit, actor=actor, agent_id=agent_id, ascending=(order == "asc"), before=before, after=after
162
157
  )
163
158
 
164
159
  return LettaBatchMessages(messages=messages)
@@ -1,5 +1,5 @@
1
1
  from datetime import timedelta
2
- from typing import Annotated, List, Optional
2
+ from typing import Annotated, List, Literal, Optional
3
3
 
4
4
  from fastapi import APIRouter, Body, Depends, Header, HTTPException, Query
5
5
  from pydantic import Field
@@ -14,7 +14,11 @@ from letta.schemas.openai.chat_completion_response import UsageStatistics
14
14
  from letta.schemas.run import Run
15
15
  from letta.schemas.step import Step
16
16
  from letta.server.rest_api.redis_stream_manager import redis_sse_stream_generator
17
- from letta.server.rest_api.streaming_response import StreamingResponseWithStatusCode, add_keepalive_to_stream
17
+ from letta.server.rest_api.streaming_response import (
18
+ StreamingResponseWithStatusCode,
19
+ add_keepalive_to_stream,
20
+ cancellation_aware_stream_wrapper,
21
+ )
18
22
  from letta.server.rest_api.utils import get_letta_server
19
23
  from letta.server.server import SyncServer
20
24
  from letta.settings import settings
@@ -115,33 +119,18 @@ async def list_run_messages(
115
119
  run_id: str,
116
120
  server: "SyncServer" = Depends(get_letta_server),
117
121
  actor_id: Optional[str] = Header(None, alias="user_id"),
118
- before: Optional[str] = Query(None, description="Cursor for pagination"),
119
- after: Optional[str] = Query(None, description="Cursor for pagination"),
122
+ before: Optional[str] = Query(
123
+ None, description="Message ID cursor for pagination. Returns messages that come before this message ID in the specified sort order"
124
+ ),
125
+ after: Optional[str] = Query(
126
+ None, description="Message ID cursor for pagination. Returns messages that come after this message ID in the specified sort order"
127
+ ),
120
128
  limit: Optional[int] = Query(100, description="Maximum number of messages to return"),
121
- order: str = Query(
122
- "asc", description="Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order."
129
+ order: Literal["asc", "desc"] = Query(
130
+ "asc", description="Sort order for messages by creation time. 'asc' for oldest first, 'desc' for newest first"
123
131
  ),
124
- role: Optional[MessageRole] = Query(None, description="Filter by role"),
125
132
  ):
126
- """
127
- Get messages associated with a run with filtering options.
128
-
129
- Args:
130
- run_id: ID of the run
131
- before: A cursor for use in pagination. `before` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, starting with obj_foo, your subsequent call can include before=obj_foo in order to fetch the previous page of the list.
132
- after: A cursor for use in pagination. `after` is an object ID that defines your place in the list. For instance, if you make a list request and receive 100 objects, ending with obj_foo, your subsequent call can include after=obj_foo in order to fetch the next page of the list.
133
- limit: Maximum number of messages to return
134
- order: Sort order by the created_at timestamp of the objects. asc for ascending order and desc for descending order.
135
- role: Filter by role (user/assistant/system/tool)
136
- return_message_object: Whether to return Message objects or LettaMessage objects
137
- user_id: ID of the user making the request
138
-
139
- Returns:
140
- A list of messages associated with the run. Default is List[LettaMessage].
141
- """
142
- if order not in ["asc", "desc"]:
143
- raise HTTPException(status_code=400, detail="Order must be 'asc' or 'desc'")
144
-
133
+ """Get response messages associated with a run."""
145
134
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
146
135
 
147
136
  try:
@@ -152,7 +141,6 @@ async def list_run_messages(
152
141
  before=before,
153
142
  after=after,
154
143
  ascending=(order == "asc"),
155
- role=role,
156
144
  )
157
145
  return messages
158
146
  except NoResultFound as e:
@@ -251,7 +239,26 @@ async def delete_run(
251
239
  200: {
252
240
  "description": "Successful response",
253
241
  "content": {
254
- "text/event-stream": {"description": "Server-Sent Events stream"},
242
+ # Align streaming schema with agents.create_stream so SDKs accept approval messages
243
+ "text/event-stream": {
244
+ "description": "Server-Sent Events stream",
245
+ "schema": {
246
+ "oneOf": [
247
+ {"$ref": "#/components/schemas/SystemMessage"},
248
+ {"$ref": "#/components/schemas/UserMessage"},
249
+ {"$ref": "#/components/schemas/ReasoningMessage"},
250
+ {"$ref": "#/components/schemas/HiddenReasoningMessage"},
251
+ {"$ref": "#/components/schemas/ToolCallMessage"},
252
+ {"$ref": "#/components/schemas/ToolReturnMessage"},
253
+ {"$ref": "#/components/schemas/AssistantMessage"},
254
+ {"$ref": "#/components/schemas/ApprovalRequestMessage"},
255
+ {"$ref": "#/components/schemas/ApprovalResponseMessage"},
256
+ {"$ref": "#/components/schemas/LettaPing"},
257
+ {"$ref": "#/components/schemas/LettaStopReason"},
258
+ {"$ref": "#/components/schemas/LettaUsageStatistics"},
259
+ ]
260
+ },
261
+ },
255
262
  },
256
263
  }
257
264
  },
@@ -296,6 +303,14 @@ async def retrieve_stream(
296
303
  batch_size=request.batch_size,
297
304
  )
298
305
 
306
+ if settings.enable_cancellation_aware_streaming:
307
+ stream = cancellation_aware_stream_wrapper(
308
+ stream_generator=stream,
309
+ job_manager=server.job_manager,
310
+ job_id=run_id,
311
+ actor=actor,
312
+ )
313
+
299
314
  if request.include_pings and settings.enable_keepalive:
300
315
  stream = add_keepalive_to_stream(stream, keepalive_interval=settings.keepalive_interval)
301
316
 
@@ -15,6 +15,7 @@ from letta.helpers.pinecone_utils import (
15
15
  delete_source_records_from_pinecone_index,
16
16
  should_use_pinecone,
17
17
  )
18
+ from letta.helpers.tpuf_client import should_use_tpuf
18
19
  from letta.log import get_logger
19
20
  from letta.otel.tracing import trace_method
20
21
  from letta.schemas.agent import AgentState
@@ -189,7 +190,13 @@ async def delete_source(
189
190
  files = await server.file_manager.list_files(source_id, actor)
190
191
  file_ids = [f.id for f in files]
191
192
 
192
- if should_use_pinecone():
193
+ if should_use_tpuf():
194
+ logger.info(f"Deleting source {source_id} from Turbopuffer")
195
+ from letta.helpers.tpuf_client import TurbopufferClient
196
+
197
+ tpuf_client = TurbopufferClient()
198
+ await tpuf_client.delete_source_passages(source_id=source_id, organization_id=actor.organization_id)
199
+ elif should_use_pinecone():
193
200
  logger.info(f"Deleting source {source_id} from pinecone index")
194
201
  await delete_source_records_from_pinecone_index(source_id=source_id, actor=actor)
195
202
 
@@ -318,7 +325,7 @@ async def upload_file_to_source(
318
325
  logger=logger,
319
326
  label="file_processor.process",
320
327
  )
321
- safe_create_task(sleeptime_document_ingest_async(server, source_id, actor), logger=logger, label="sleeptime_document_ingest_async")
328
+ safe_create_task(sleeptime_document_ingest_async(server, source_id, actor), label="sleeptime_document_ingest_async")
322
329
 
323
330
  return file_metadata
324
331
 
@@ -435,11 +442,17 @@ async def delete_file_from_source(
435
442
 
436
443
  await server.remove_file_from_context_windows(source_id=source_id, file_id=deleted_file.id, actor=actor)
437
444
 
438
- if should_use_pinecone():
445
+ if should_use_tpuf():
446
+ logger.info(f"Deleting file {file_id} from Turbopuffer")
447
+ from letta.helpers.tpuf_client import TurbopufferClient
448
+
449
+ tpuf_client = TurbopufferClient()
450
+ await tpuf_client.delete_file_passages(source_id=source_id, file_id=file_id, organization_id=actor.organization_id)
451
+ elif should_use_pinecone():
439
452
  logger.info(f"Deleting file {file_id} from pinecone index")
440
453
  await delete_file_records_from_pinecone_index(file_id=file_id, actor=actor)
441
454
 
442
- asyncio.create_task(sleeptime_document_ingest_async(server, source_id, actor, clear_history=True))
455
+ safe_create_task(sleeptime_document_ingest_async(server, source_id, actor, clear_history=True), label="document_ingest_after_delete")
443
456
  if deleted_file is None:
444
457
  raise HTTPException(status_code=404, detail=f"File with id={file_id} not found.")
445
458
 
@@ -481,10 +494,15 @@ async def load_file_to_source_cloud(
481
494
  else:
482
495
  file_parser = MarkitdownFileParser()
483
496
 
484
- using_pinecone = should_use_pinecone()
485
- if using_pinecone:
497
+ # determine which embedder to use - turbopuffer takes precedence
498
+ if should_use_tpuf():
499
+ from letta.services.file_processor.embedder.turbopuffer_embedder import TurbopufferEmbedder
500
+
501
+ embedder = TurbopufferEmbedder(embedding_config=embedding_config)
502
+ elif should_use_pinecone():
486
503
  embedder = PineconeEmbedder(embedding_config=embedding_config)
487
504
  else:
488
505
  embedder = OpenAIEmbedder(embedding_config=embedding_config)
489
- file_processor = FileProcessor(file_parser=file_parser, embedder=embedder, actor=actor, using_pinecone=using_pinecone)
506
+
507
+ file_processor = FileProcessor(file_parser=file_parser, embedder=embedder, actor=actor)
490
508
  await file_processor.process(agent_states=agent_states, source_id=source_id, content=content, file_metadata=file_metadata)
@@ -587,6 +587,48 @@ async def list_mcp_tools_by_server(
587
587
  return mcp_tools
588
588
 
589
589
 
590
+ @router.post("/mcp/servers/{mcp_server_name}/resync", operation_id="resync_mcp_server_tools")
591
+ async def resync_mcp_server_tools(
592
+ mcp_server_name: str,
593
+ server: SyncServer = Depends(get_letta_server),
594
+ actor_id: Optional[str] = Header(None, alias="user_id"),
595
+ agent_id: Optional[str] = None,
596
+ ):
597
+ """
598
+ Resync tools for an MCP server by:
599
+ 1. Fetching current tools from the MCP server
600
+ 2. Deleting tools that no longer exist on the server
601
+ 3. Updating schemas for existing tools
602
+ 4. Adding new tools from the server
603
+
604
+ Returns a summary of changes made.
605
+ """
606
+ actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
607
+
608
+ try:
609
+ result = await server.mcp_manager.resync_mcp_server_tools(mcp_server_name=mcp_server_name, actor=actor, agent_id=agent_id)
610
+ return result
611
+ except ValueError as e:
612
+ raise HTTPException(
613
+ status_code=404,
614
+ detail={
615
+ "code": "MCPServerNotFoundError",
616
+ "message": str(e),
617
+ "mcp_server_name": mcp_server_name,
618
+ },
619
+ )
620
+ except Exception as e:
621
+ logger.error(f"Unexpected error refreshing MCP server tools: {e}")
622
+ raise HTTPException(
623
+ status_code=404,
624
+ detail={
625
+ "code": "MCPRefreshError",
626
+ "message": f"Failed to refresh MCP server tools: {str(e)}",
627
+ "mcp_server_name": mcp_server_name,
628
+ },
629
+ )
630
+
631
+
590
632
  @router.post("/mcp/servers/{mcp_server_name}/{mcp_tool_name}", response_model=Tool, operation_id="add_mcp_tool")
591
633
  async def add_mcp_tool(
592
634
  mcp_server_name: str,
@@ -7,10 +7,11 @@ import json
7
7
  from collections.abc import AsyncIterator
8
8
 
9
9
  import anyio
10
+ from fastapi import HTTPException
10
11
  from fastapi.responses import StreamingResponse
11
12
  from starlette.types import Send
12
13
 
13
- from letta.errors import LettaUnexpectedStreamCancellationError
14
+ from letta.errors import LettaUnexpectedStreamCancellationError, PendingApprovalError
14
15
  from letta.log import get_logger
15
16
  from letta.schemas.enums import JobStatus
16
17
  from letta.schemas.letta_ping import LettaPing
@@ -18,6 +19,7 @@ from letta.schemas.user import User
18
19
  from letta.server.rest_api.utils import capture_sentry_exception
19
20
  from letta.services.job_manager import JobManager
20
21
  from letta.settings import settings
22
+ from letta.utils import safe_create_task
21
23
 
22
24
  logger = get_logger(__name__)
23
25
 
@@ -63,7 +65,7 @@ async def add_keepalive_to_stream(
63
65
  await queue.put(("end", None))
64
66
 
65
67
  # Start the stream reader task
66
- reader_task = asyncio.create_task(stream_reader())
68
+ reader_task = safe_create_task(stream_reader(), label="stream_reader")
67
69
 
68
70
  try:
69
71
  while True:
@@ -189,6 +191,13 @@ class StreamingResponseWithStatusCode(StreamingResponse):
189
191
  except anyio.ClosedResourceError:
190
192
  logger.info("Client disconnected, but shielded task should continue")
191
193
  self._client_connected = False
194
+ except PendingApprovalError as e:
195
+ # This is an expected error, don't log as error
196
+ logger.info(f"Pending approval conflict in stream response: {e}")
197
+ # Re-raise as HTTPException for proper client handling
198
+ raise HTTPException(
199
+ status_code=409, detail={"code": "PENDING_APPROVAL", "message": str(e), "pending_request_id": e.pending_request_id}
200
+ )
192
201
  except Exception as e:
193
202
  logger.error(f"Error in protected stream response: {e}")
194
203
  raise
letta/server/server.py CHANGED
@@ -109,7 +109,7 @@ from letta.services.tool_manager import ToolManager
109
109
  from letta.services.user_manager import UserManager
110
110
  from letta.settings import DatabaseChoice, model_settings, settings, tool_settings
111
111
  from letta.streaming_interface import AgentChunkStreamingInterface
112
- from letta.utils import get_friendly_error_msg, get_persona_text, make_key
112
+ from letta.utils import get_friendly_error_msg, get_persona_text, make_key, safe_create_task
113
113
 
114
114
  config = LettaConfig.load()
115
115
  logger = get_logger(__name__)
@@ -1125,7 +1125,8 @@ class SyncServer(Server):
1125
1125
  ascending=ascending,
1126
1126
  limit=limit,
1127
1127
  )
1128
- return records
1128
+ # Extract just the passages (SQL path returns empty metadata)
1129
+ return [passage for passage, _, _ in records]
1129
1130
 
1130
1131
  async def insert_archival_memory_async(
1131
1132
  self, agent_id: str, memory_contents: str, actor: User, tags: Optional[List[str]], created_at: Optional[datetime]
@@ -2247,7 +2248,7 @@ class SyncServer(Server):
2247
2248
 
2248
2249
  # Offload the synchronous message_func to a separate thread
2249
2250
  streaming_interface.stream_start()
2250
- task = asyncio.create_task(
2251
+ task = safe_create_task(
2251
2252
  asyncio.to_thread(
2252
2253
  self.send_messages,
2253
2254
  actor=actor,
@@ -2255,7 +2256,8 @@ class SyncServer(Server):
2255
2256
  input_messages=input_messages,
2256
2257
  interface=streaming_interface,
2257
2258
  metadata=metadata,
2258
- )
2259
+ ),
2260
+ label="send_messages_thread",
2259
2261
  )
2260
2262
 
2261
2263
  if stream_steps:
@@ -2362,13 +2364,14 @@ class SyncServer(Server):
2362
2364
  streaming_interface.metadata = metadata
2363
2365
 
2364
2366
  streaming_interface.stream_start()
2365
- task = asyncio.create_task(
2367
+ task = safe_create_task(
2366
2368
  asyncio.to_thread(
2367
2369
  letta_multi_agent.step,
2368
2370
  input_messages=input_messages,
2369
2371
  chaining=self.chaining,
2370
2372
  max_chaining_steps=self.max_chaining_steps,
2371
- )
2373
+ ),
2374
+ label="multi_agent_step_thread",
2372
2375
  )
2373
2376
 
2374
2377
  if stream_steps: