letta-nightly 0.11.7.dev20250910104051__py3-none-any.whl → 0.11.7.dev20250912104045__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. letta/adapters/letta_llm_request_adapter.py +4 -2
  2. letta/adapters/letta_llm_stream_adapter.py +4 -2
  3. letta/agents/agent_loop.py +23 -0
  4. letta/agents/letta_agent_v2.py +34 -12
  5. letta/functions/helpers.py +3 -2
  6. letta/groups/sleeptime_multi_agent_v2.py +4 -2
  7. letta/groups/sleeptime_multi_agent_v3.py +4 -2
  8. letta/helpers/tpuf_client.py +41 -9
  9. letta/interfaces/anthropic_streaming_interface.py +10 -6
  10. letta/interfaces/openai_streaming_interface.py +9 -74
  11. letta/llm_api/google_vertex_client.py +6 -1
  12. letta/llm_api/openai_client.py +9 -8
  13. letta/orm/agent.py +4 -1
  14. letta/orm/block.py +1 -0
  15. letta/orm/blocks_agents.py +1 -0
  16. letta/orm/job.py +5 -1
  17. letta/orm/organization.py +2 -0
  18. letta/orm/sources_agents.py +2 -1
  19. letta/orm/tools_agents.py +5 -2
  20. letta/schemas/message.py +19 -2
  21. letta/server/rest_api/interface.py +34 -2
  22. letta/server/rest_api/json_parser.py +2 -0
  23. letta/server/rest_api/redis_stream_manager.py +17 -3
  24. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -2
  25. letta/server/rest_api/routers/v1/agents.py +49 -180
  26. letta/server/rest_api/routers/v1/folders.py +2 -2
  27. letta/server/rest_api/routers/v1/sources.py +2 -2
  28. letta/server/rest_api/routers/v1/tools.py +23 -39
  29. letta/server/rest_api/streaming_response.py +2 -1
  30. letta/server/server.py +7 -5
  31. letta/services/agent_serialization_manager.py +4 -3
  32. letta/services/job_manager.py +5 -2
  33. letta/services/mcp_manager.py +66 -5
  34. letta/services/summarizer/summarizer.py +2 -1
  35. letta/services/tool_executor/files_tool_executor.py +2 -2
  36. letta/services/tool_executor/multi_agent_tool_executor.py +17 -14
  37. letta/services/tool_sandbox/local_sandbox.py +2 -2
  38. letta/services/tool_sandbox/modal_version_manager.py +2 -1
  39. letta/streaming_utils.py +29 -4
  40. letta/utils.py +72 -3
  41. {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250912104045.dist-info}/METADATA +3 -3
  42. {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250912104045.dist-info}/RECORD +45 -44
  43. {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250912104045.dist-info}/WHEEL +0 -0
  44. {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250912104045.dist-info}/entry_points.txt +0 -0
  45. {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250912104045.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,4 @@
1
- from sqlalchemy import ForeignKey, String
1
+ from sqlalchemy import ForeignKey, Index, String
2
2
  from sqlalchemy.orm import Mapped, mapped_column
3
3
 
4
4
  from letta.orm.base import Base
@@ -8,6 +8,7 @@ class SourcesAgents(Base):
8
8
  """Agents can have zero to many sources"""
9
9
 
10
10
  __tablename__ = "sources_agents"
11
+ __table_args__ = (Index("ix_sources_agents_source_id", "source_id"),)
11
12
 
12
13
  agent_id: Mapped[String] = mapped_column(String, ForeignKey("agents.id", ondelete="CASCADE"), primary_key=True)
13
14
  source_id: Mapped[String] = mapped_column(String, ForeignKey("sources.id", ondelete="CASCADE"), primary_key=True)
letta/orm/tools_agents.py CHANGED
@@ -1,4 +1,4 @@
1
- from sqlalchemy import ForeignKey, String, UniqueConstraint
1
+ from sqlalchemy import ForeignKey, Index, String, UniqueConstraint
2
2
  from sqlalchemy.orm import Mapped, mapped_column
3
3
 
4
4
  from letta.orm import Base
@@ -8,7 +8,10 @@ class ToolsAgents(Base):
8
8
  """Agents can have one or many tools associated with them."""
9
9
 
10
10
  __tablename__ = "tools_agents"
11
- __table_args__ = (UniqueConstraint("agent_id", "tool_id", name="unique_agent_tool"),)
11
+ __table_args__ = (
12
+ UniqueConstraint("agent_id", "tool_id", name="unique_agent_tool"),
13
+ Index("ix_tools_agents_tool_id", "tool_id"),
14
+ )
12
15
 
13
16
  # Each agent must have unique tool names
14
17
  agent_id: Mapped[str] = mapped_column(String, ForeignKey("agents.id", ondelete="CASCADE"), primary_key=True)
letta/schemas/message.py CHANGED
@@ -1027,10 +1027,13 @@ class Message(BaseMessage):
1027
1027
  result = [m for m in result if m is not None]
1028
1028
  return result
1029
1029
 
1030
- def to_google_ai_dict(self, put_inner_thoughts_in_kwargs: bool = True) -> dict:
1030
+ def to_google_dict(self, put_inner_thoughts_in_kwargs: bool = True) -> dict | None:
1031
1031
  """
1032
1032
  Go from Message class to Google AI REST message object
1033
1033
  """
1034
+ if self.role == "approval" and self.tool_calls is None:
1035
+ return None
1036
+
1034
1037
  # type Content: https://ai.google.dev/api/rest/v1/Content / https://ai.google.dev/api/rest/v1beta/Content
1035
1038
  # parts[]: Part
1036
1039
  # role: str ('user' or 'model')
@@ -1076,7 +1079,7 @@ class Message(BaseMessage):
1076
1079
  "parts": content_parts,
1077
1080
  }
1078
1081
 
1079
- elif self.role == "assistant":
1082
+ elif self.role == "assistant" or self.role == "approval":
1080
1083
  assert self.tool_calls is not None or text_content is not None
1081
1084
  google_ai_message = {
1082
1085
  "role": "model", # NOTE: different
@@ -1164,6 +1167,20 @@ class Message(BaseMessage):
1164
1167
 
1165
1168
  return google_ai_message
1166
1169
 
1170
+ @staticmethod
1171
+ def to_google_dicts_from_list(
1172
+ messages: List[Message],
1173
+ put_inner_thoughts_in_kwargs: bool = True,
1174
+ ):
1175
+ result = [
1176
+ m.to_google_dict(
1177
+ put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
1178
+ )
1179
+ for m in messages
1180
+ ]
1181
+ result = [m for m in result if m is not None]
1182
+ return result
1183
+
1167
1184
  @staticmethod
1168
1185
  def generate_otid_from_id(message_id: str, index: int) -> str:
1169
1186
  """
@@ -295,6 +295,25 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
295
295
  self.optimistic_json_parser = OptimisticJSONParser()
296
296
  self.current_json_parse_result = {}
297
297
 
298
+ # NOTE (fix): OpenAI deltas may split a key and its value across chunks
299
+ # (e.g. '"request_heartbeat"' in one chunk, ': true' in the next). The
300
+ # old behavior passed through each fragment verbatim, which could emit
301
+ # a bare key (or a key+opening quote) without its value, producing
302
+ # invalid JSON slices and the "missing end-quote" symptom downstream.
303
+ #
304
+ # To make streamed arguments robust, we add a JSON-aware incremental
305
+ # reader that only releases safe updates for the "main" JSON portion of
306
+ # the tool_call arguments. This prevents partial-key emissions while
307
+ # preserving incremental streaming for consumers.
308
+ #
309
+ # We still stream 'name' fragments as-is (safe), but 'arguments' are
310
+ # parsed incrementally and emitted only when a boundary is safe.
311
+ self._raw_args_reader = JSONInnerThoughtsExtractor(
312
+ inner_thoughts_key=inner_thoughts_kwarg,
313
+ wait_for_first_key=False,
314
+ )
315
+ self._raw_args_tool_call_id = None
316
+
298
317
  # Store metadata passed from server
299
318
  self.metadata = {}
300
319
 
@@ -654,11 +673,24 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
654
673
  tool_call_delta = {}
655
674
  if tool_call.id:
656
675
  tool_call_delta["id"] = tool_call.id
676
+ # Reset raw args reader per tool_call id
677
+ if self._raw_args_tool_call_id != tool_call.id:
678
+ self._raw_args_tool_call_id = tool_call.id
679
+ self._raw_args_reader = JSONInnerThoughtsExtractor(
680
+ inner_thoughts_key=self.inner_thoughts_kwarg,
681
+ wait_for_first_key=False,
682
+ )
657
683
  if tool_call.function:
658
- if tool_call.function.arguments:
659
- tool_call_delta["arguments"] = tool_call.function.arguments
684
+ # Stream name fragments as-is (names are short and harmless to emit)
660
685
  if tool_call.function.name:
661
686
  tool_call_delta["name"] = tool_call.function.name
687
+ # For arguments, incrementally parse to avoid emitting partial keys
688
+ if tool_call.function.arguments:
689
+ self.current_function_arguments += tool_call.function.arguments
690
+ updates_main_json, _ = self._raw_args_reader.process_fragment(tool_call.function.arguments)
691
+ # Only emit argument updates when a safe boundary is reached
692
+ if updates_main_json:
693
+ tool_call_delta["arguments"] = updates_main_json
662
694
 
663
695
  # We might end up with a no-op, in which case we should omit
664
696
  if (
@@ -63,6 +63,8 @@ class OptimisticJSONParser(JSONParser):
63
63
  '"': self._parse_string,
64
64
  "t": self._parse_true,
65
65
  "f": self._parse_false,
66
+ "T": self._parse_true,
67
+ "F": self._parse_false,
66
68
  "n": self._parse_null,
67
69
  }
68
70
  # Register number parser for digits and signs
@@ -8,6 +8,10 @@ from typing import AsyncIterator, Dict, List, Optional
8
8
 
9
9
  from letta.data_sources.redis_client import AsyncRedisClient
10
10
  from letta.log import get_logger
11
+ from letta.schemas.enums import JobStatus
12
+ from letta.schemas.user import User
13
+ from letta.services.job_manager import JobManager
14
+ from letta.utils import safe_create_task
11
15
 
12
16
  logger = get_logger(__name__)
13
17
 
@@ -62,7 +66,7 @@ class RedisSSEStreamWriter:
62
66
  """Start the background flush task."""
63
67
  if not self._running:
64
68
  self._running = True
65
- self._flush_task = asyncio.create_task(self._periodic_flush())
69
+ self._flush_task = safe_create_task(self._periodic_flush(), label="redis_periodic_flush")
66
70
 
67
71
  async def stop(self):
68
72
  """Stop the background flush task and flush remaining data."""
@@ -132,9 +136,9 @@ class RedisSSEStreamWriter:
132
136
 
133
137
  async with client.pipeline(transaction=False) as pipe:
134
138
  for chunk in chunks:
135
- pipe.xadd(stream_key, chunk, maxlen=self.max_stream_length, approximate=True)
139
+ await pipe.xadd(stream_key, chunk, maxlen=self.max_stream_length, approximate=True)
136
140
 
137
- pipe.expire(stream_key, self.stream_ttl)
141
+ await pipe.expire(stream_key, self.stream_ttl)
138
142
 
139
143
  await pipe.execute()
140
144
 
@@ -190,6 +194,8 @@ async def create_background_stream_processor(
190
194
  redis_client: AsyncRedisClient,
191
195
  run_id: str,
192
196
  writer: Optional[RedisSSEStreamWriter] = None,
197
+ job_manager: Optional[JobManager] = None,
198
+ actor: Optional[User] = None,
193
199
  ) -> None:
194
200
  """
195
201
  Process a stream in the background and store chunks to Redis.
@@ -202,6 +208,8 @@ async def create_background_stream_processor(
202
208
  redis_client: Redis client instance
203
209
  run_id: The run ID to store chunks under
204
210
  writer: Optional pre-configured writer (creates new if not provided)
211
+ job_manager: Optional job manager for updating job status
212
+ actor: Optional actor for job status updates
205
213
  """
206
214
  if writer is None:
207
215
  writer = RedisSSEStreamWriter(redis_client)
@@ -226,6 +234,12 @@ async def create_background_stream_processor(
226
234
  logger.error(f"Error processing stream for run {run_id}: {e}")
227
235
  # Write error chunk
228
236
  # error_chunk = {"error": {"message": str(e)}}
237
+ # Mark run_id terminal state
238
+ if job_manager and actor:
239
+ await job_manager.safe_update_job_status_async(
240
+ job_id=run_id, new_status=JobStatus.failed, actor=actor, metadata={"error": str(e)}
241
+ )
242
+
229
243
  error_chunk = {"error": str(e), "code": "INTERNAL_SERVER_ERROR"}
230
244
  await writer.write_chunk(run_id=run_id, data=f"event: error\ndata: {json.dumps(error_chunk)}\n\n", is_complete=True)
231
245
  finally:
@@ -14,6 +14,7 @@ from letta.server.rest_api.chat_completions_interface import ChatCompletionsStre
14
14
 
15
15
  # TODO this belongs in a controller!
16
16
  from letta.server.rest_api.utils import get_letta_server, get_user_message_from_chat_completions_request, sse_async_generator
17
+ from letta.utils import safe_create_task
17
18
 
18
19
  if TYPE_CHECKING:
19
20
  from letta.server.server import SyncServer
@@ -98,7 +99,7 @@ async def send_message_to_agent_chat_completions(
98
99
 
99
100
  # Offload the synchronous message_func to a separate thread
100
101
  streaming_interface.stream_start()
101
- asyncio.create_task(
102
+ safe_create_task(
102
103
  asyncio.to_thread(
103
104
  server.send_messages,
104
105
  actor=actor,
@@ -106,7 +107,8 @@ async def send_message_to_agent_chat_completions(
106
107
  input_messages=messages,
107
108
  interface=streaming_interface,
108
109
  put_inner_thoughts_first=False,
109
- )
110
+ ),
111
+ label="openai_send_messages",
110
112
  )
111
113
 
112
114
  # return a stream
@@ -12,7 +12,9 @@ from pydantic import BaseModel, Field
12
12
  from sqlalchemy.exc import IntegrityError, OperationalError
13
13
  from starlette.responses import Response, StreamingResponse
14
14
 
15
+ from letta.agents.agent_loop import AgentLoop
15
16
  from letta.agents.letta_agent import LettaAgent
17
+ from letta.agents.letta_agent_v2 import LettaAgentV2
16
18
  from letta.constants import AGENT_ID_PATTERN, DEFAULT_MAX_STEPS, DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, REDIS_RUN_ID_PREFIX
17
19
  from letta.data_sources.redis_client import NoopAsyncRedisClient, get_redis_client
18
20
  from letta.errors import (
@@ -58,7 +60,7 @@ from letta.server.server import SyncServer
58
60
  from letta.services.summarizer.enums import SummarizationMode
59
61
  from letta.services.telemetry_manager import NoopTelemetryManager
60
62
  from letta.settings import settings
61
- from letta.utils import safe_create_task, truncate_file_visible_content
63
+ from letta.utils import safe_create_shielded_task, safe_create_task, truncate_file_visible_content
62
64
 
63
65
  # These can be forward refs, but because Fastapi needs them at runtime the must be imported normally
64
66
 
@@ -1144,7 +1146,9 @@ async def send_message(
1144
1146
 
1145
1147
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
1146
1148
  # TODO: This is redundant, remove soon
1147
- agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
1149
+ agent = await server.agent_manager.get_agent_by_id_async(
1150
+ agent_id, actor, include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools"]
1151
+ )
1148
1152
  agent_eligible = agent.multi_agent_group is None or agent.multi_agent_group.manager_type in ["sleeptime", "voice_sleeptime"]
1149
1153
  model_compatible = agent.llm_config.model_endpoint_type in [
1150
1154
  "anthropic",
@@ -1190,42 +1194,11 @@ async def send_message(
1190
1194
 
1191
1195
  try:
1192
1196
  if agent_eligible and model_compatible:
1193
- if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
1194
- agent_loop = SleeptimeMultiAgentV2(
1195
- agent_id=agent_id,
1196
- message_manager=server.message_manager,
1197
- agent_manager=server.agent_manager,
1198
- block_manager=server.block_manager,
1199
- passage_manager=server.passage_manager,
1200
- group_manager=server.group_manager,
1201
- job_manager=server.job_manager,
1202
- actor=actor,
1203
- group=agent.multi_agent_group,
1204
- current_run_id=run.id if run else None,
1205
- )
1206
- else:
1207
- agent_loop = LettaAgent(
1208
- agent_id=agent_id,
1209
- message_manager=server.message_manager,
1210
- agent_manager=server.agent_manager,
1211
- block_manager=server.block_manager,
1212
- job_manager=server.job_manager,
1213
- passage_manager=server.passage_manager,
1214
- actor=actor,
1215
- step_manager=server.step_manager,
1216
- telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
1217
- current_run_id=run.id if run else None,
1218
- # summarizer settings to be added here
1219
- summarizer_mode=(
1220
- SummarizationMode.STATIC_MESSAGE_BUFFER
1221
- if agent.agent_type == AgentType.voice_convo_agent
1222
- else SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER
1223
- ),
1224
- )
1225
-
1197
+ agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
1226
1198
  result = await agent_loop.step(
1227
1199
  request.messages,
1228
1200
  max_steps=request.max_steps,
1201
+ run_id=run.id if run else None,
1229
1202
  use_assistant_message=request.use_assistant_message,
1230
1203
  request_start_timestamp_ns=request_start_timestamp_ns,
1231
1204
  include_return_message_types=request.include_return_message_types,
@@ -1299,7 +1272,9 @@ async def send_message_streaming(
1299
1272
 
1300
1273
  actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
1301
1274
  # TODO: This is redundant, remove soon
1302
- agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
1275
+ agent = await server.agent_manager.get_agent_by_id_async(
1276
+ agent_id, actor, include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools"]
1277
+ )
1303
1278
  agent_eligible = agent.multi_agent_group is None or agent.multi_agent_group.manager_type in ["sleeptime", "voice_sleeptime"]
1304
1279
  model_compatible = agent.llm_config.model_endpoint_type in [
1305
1280
  "anthropic",
@@ -1344,57 +1319,16 @@ async def send_message_streaming(
1344
1319
 
1345
1320
  try:
1346
1321
  if agent_eligible and model_compatible:
1347
- if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
1348
- agent_loop = SleeptimeMultiAgentV2(
1349
- agent_id=agent_id,
1350
- message_manager=server.message_manager,
1351
- agent_manager=server.agent_manager,
1352
- block_manager=server.block_manager,
1353
- passage_manager=server.passage_manager,
1354
- group_manager=server.group_manager,
1355
- job_manager=server.job_manager,
1356
- actor=actor,
1357
- step_manager=server.step_manager,
1358
- telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
1359
- group=agent.multi_agent_group,
1360
- current_run_id=run.id if run else None,
1361
- )
1362
- else:
1363
- agent_loop = LettaAgent(
1364
- agent_id=agent_id,
1365
- message_manager=server.message_manager,
1366
- agent_manager=server.agent_manager,
1367
- block_manager=server.block_manager,
1368
- job_manager=server.job_manager,
1369
- passage_manager=server.passage_manager,
1370
- actor=actor,
1371
- step_manager=server.step_manager,
1372
- telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
1373
- current_run_id=run.id if run else None,
1374
- # summarizer settings to be added here
1375
- summarizer_mode=(
1376
- SummarizationMode.STATIC_MESSAGE_BUFFER
1377
- if agent.agent_type == AgentType.voice_convo_agent
1378
- else SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER
1379
- ),
1380
- )
1381
-
1382
- if request.stream_tokens and model_compatible_token_streaming:
1383
- raw_stream = agent_loop.step_stream(
1384
- input_messages=request.messages,
1385
- max_steps=request.max_steps,
1386
- use_assistant_message=request.use_assistant_message,
1387
- request_start_timestamp_ns=request_start_timestamp_ns,
1388
- include_return_message_types=request.include_return_message_types,
1389
- )
1390
- else:
1391
- raw_stream = agent_loop.step_stream_no_tokens(
1392
- request.messages,
1393
- max_steps=request.max_steps,
1394
- use_assistant_message=request.use_assistant_message,
1395
- request_start_timestamp_ns=request_start_timestamp_ns,
1396
- include_return_message_types=request.include_return_message_types,
1397
- )
1322
+ agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
1323
+ raw_stream = agent_loop.stream(
1324
+ input_messages=request.messages,
1325
+ max_steps=request.max_steps,
1326
+ stream_tokens=request.stream_tokens and model_compatible_token_streaming,
1327
+ run_id=run.id if run else None,
1328
+ use_assistant_message=request.use_assistant_message,
1329
+ request_start_timestamp_ns=request_start_timestamp_ns,
1330
+ include_return_message_types=request.include_return_message_types,
1331
+ )
1398
1332
 
1399
1333
  from letta.server.rest_api.streaming_response import StreamingResponseWithStatusCode, add_keepalive_to_stream
1400
1334
 
@@ -1409,12 +1343,15 @@ async def send_message_streaming(
1409
1343
  ),
1410
1344
  )
1411
1345
 
1412
- asyncio.create_task(
1346
+ safe_create_task(
1413
1347
  create_background_stream_processor(
1414
1348
  stream_generator=raw_stream,
1415
1349
  redis_client=redis_client,
1416
1350
  run_id=run.id,
1417
- )
1351
+ job_manager=server.job_manager,
1352
+ actor=actor,
1353
+ ),
1354
+ label=f"background_stream_processor_{run.id}",
1418
1355
  )
1419
1356
 
1420
1357
  raw_stream = redis_sse_stream_generator(
@@ -1568,7 +1505,9 @@ async def _process_message_background(
1568
1505
  """Background task to process the message and update job status."""
1569
1506
  request_start_timestamp_ns = get_utc_timestamp_ns()
1570
1507
  try:
1571
- agent = await server.agent_manager.get_agent_by_id_async(agent_id, actor, include_relationships=["multi_agent_group"])
1508
+ agent = await server.agent_manager.get_agent_by_id_async(
1509
+ agent_id, actor, include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools"]
1510
+ )
1572
1511
  agent_eligible = agent.multi_agent_group is None or agent.multi_agent_group.manager_type in ["sleeptime", "voice_sleeptime"]
1573
1512
  model_compatible = agent.llm_config.model_endpoint_type in [
1574
1513
  "anthropic",
@@ -1584,37 +1523,7 @@ async def _process_message_background(
1584
1523
  "deepseek",
1585
1524
  ]
1586
1525
  if agent_eligible and model_compatible:
1587
- if agent.enable_sleeptime and agent.agent_type != AgentType.voice_convo_agent:
1588
- agent_loop = SleeptimeMultiAgentV2(
1589
- agent_id=agent_id,
1590
- message_manager=server.message_manager,
1591
- agent_manager=server.agent_manager,
1592
- block_manager=server.block_manager,
1593
- passage_manager=server.passage_manager,
1594
- group_manager=server.group_manager,
1595
- job_manager=server.job_manager,
1596
- actor=actor,
1597
- group=agent.multi_agent_group,
1598
- )
1599
- else:
1600
- agent_loop = LettaAgent(
1601
- agent_id=agent_id,
1602
- message_manager=server.message_manager,
1603
- agent_manager=server.agent_manager,
1604
- block_manager=server.block_manager,
1605
- job_manager=server.job_manager,
1606
- passage_manager=server.passage_manager,
1607
- actor=actor,
1608
- step_manager=server.step_manager,
1609
- telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
1610
- # summarizer settings to be added here
1611
- summarizer_mode=(
1612
- SummarizationMode.STATIC_MESSAGE_BUFFER
1613
- if agent.agent_type == AgentType.voice_convo_agent
1614
- else SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER
1615
- ),
1616
- )
1617
-
1526
+ agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
1618
1527
  result = await agent_loop.step(
1619
1528
  messages,
1620
1529
  max_steps=max_steps,
@@ -1702,8 +1611,8 @@ async def send_message_async(
1702
1611
  )
1703
1612
  run = await server.job_manager.create_job_async(pydantic_job=run, actor=actor)
1704
1613
 
1705
- # Create asyncio task for background processing
1706
- task = asyncio.create_task(
1614
+ # Create asyncio task for background processing (shielded to prevent cancellation)
1615
+ task = safe_create_shielded_task(
1707
1616
  _process_message_background(
1708
1617
  run_id=run.id,
1709
1618
  server=server,
@@ -1715,28 +1624,20 @@ async def send_message_async(
1715
1624
  assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
1716
1625
  max_steps=request.max_steps,
1717
1626
  include_return_message_types=request.include_return_message_types,
1718
- )
1627
+ ),
1628
+ label=f"process_message_background_{run.id}",
1719
1629
  )
1720
1630
 
1721
1631
  def handle_task_completion(t):
1722
1632
  try:
1723
1633
  t.result()
1724
1634
  except asyncio.CancelledError:
1725
- logger.error(f"Background task for run {run.id} was cancelled")
1726
- asyncio.create_task(
1727
- server.job_manager.update_job_by_id_async(
1728
- job_id=run.id,
1729
- job_update=JobUpdate(
1730
- status=JobStatus.failed,
1731
- completed_at=datetime.now(timezone.utc),
1732
- metadata={"error": "Task was cancelled"},
1733
- ),
1734
- actor=actor,
1735
- )
1736
- )
1635
+ # Note: With shielded tasks, cancellation attempts don't actually stop the task
1636
+ logger.info(f"Cancellation attempted on shielded background task for run {run.id}, but task continues running")
1637
+ # Don't mark as failed since the shielded task is still running
1737
1638
  except Exception as e:
1738
1639
  logger.error(f"Unhandled exception in background task for run {run.id}: {e}")
1739
- asyncio.create_task(
1640
+ safe_create_task(
1740
1641
  server.job_manager.update_job_by_id_async(
1741
1642
  job_id=run.id,
1742
1643
  job_update=JobUpdate(
@@ -1745,7 +1646,8 @@ async def send_message_async(
1745
1646
  metadata={"error": str(e)},
1746
1647
  ),
1747
1648
  actor=actor,
1748
- )
1649
+ ),
1650
+ label=f"update_failed_job_{run.id}",
1749
1651
  )
1750
1652
 
1751
1653
  task.add_done_callback(handle_task_completion)
@@ -1816,38 +1718,10 @@ async def preview_raw_payload(
1816
1718
  ]
1817
1719
 
1818
1720
  if agent_eligible and model_compatible:
1819
- if agent.enable_sleeptime:
1820
- # TODO: @caren need to support this for sleeptime
1821
- raise HTTPException(
1822
- status_code=status.HTTP_400_BAD_REQUEST,
1823
- detail="Payload inspection is not supported for agents with sleeptime enabled.",
1824
- )
1825
- else:
1826
- agent_loop = LettaAgent(
1827
- agent_id=agent_id,
1828
- message_manager=server.message_manager,
1829
- agent_manager=server.agent_manager,
1830
- block_manager=server.block_manager,
1831
- job_manager=server.job_manager,
1832
- passage_manager=server.passage_manager,
1833
- actor=actor,
1834
- step_manager=server.step_manager,
1835
- telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
1836
- summarizer_mode=(
1837
- SummarizationMode.STATIC_MESSAGE_BUFFER
1838
- if agent.agent_type == AgentType.voice_convo_agent
1839
- else SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER
1840
- ),
1841
- )
1842
-
1843
- # TODO: Support step_streaming
1844
- return await agent_loop.step(
1721
+ agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
1722
+ return await agent_loop.build_request(
1845
1723
  input_messages=request.messages,
1846
- use_assistant_message=request.use_assistant_message,
1847
- include_return_message_types=request.include_return_message_types,
1848
- dry_run=True,
1849
1724
  )
1850
-
1851
1725
  else:
1852
1726
  raise HTTPException(
1853
1727
  status_code=status.HTTP_403_FORBIDDEN,
@@ -1888,19 +1762,14 @@ async def summarize_agent_conversation(
1888
1762
  ]
1889
1763
 
1890
1764
  if agent_eligible and model_compatible:
1891
- agent = LettaAgent(
1892
- agent_id=agent_id,
1893
- message_manager=server.message_manager,
1894
- agent_manager=server.agent_manager,
1895
- block_manager=server.block_manager,
1896
- job_manager=server.job_manager,
1897
- passage_manager=server.passage_manager,
1898
- actor=actor,
1899
- step_manager=server.step_manager,
1900
- telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
1901
- message_buffer_min=max_message_length,
1765
+ agent_loop = LettaAgentV2(agent_state=agent, actor=actor)
1766
+ in_context_messages = await server.message_manager.get_messages_by_ids_async(message_ids=agent.message_ids, actor=actor)
1767
+ await agent_loop.summarize_conversation_history(
1768
+ in_context_messages=in_context_messages,
1769
+ new_letta_messages=[],
1770
+ total_tokens=None,
1771
+ force=True,
1902
1772
  )
1903
- await agent.summarize_conversation_history()
1904
1773
  # Summarization completed, return 204 No Content
1905
1774
  else:
1906
1775
  raise HTTPException(
@@ -327,7 +327,7 @@ async def upload_file_to_folder(
327
327
  logger=logger,
328
328
  label="file_processor.process",
329
329
  )
330
- safe_create_task(sleeptime_document_ingest_async(server, folder_id, actor), logger=logger, label="sleeptime_document_ingest_async")
330
+ safe_create_task(sleeptime_document_ingest_async(server, folder_id, actor), label="sleeptime_document_ingest_async")
331
331
 
332
332
  return file_metadata
333
333
 
@@ -467,7 +467,7 @@ async def delete_file_from_folder(
467
467
  logger.info(f"Deleting file {file_id} from pinecone index")
468
468
  await delete_file_records_from_pinecone_index(file_id=file_id, actor=actor)
469
469
 
470
- asyncio.create_task(sleeptime_document_ingest_async(server, folder_id, actor, clear_history=True))
470
+ safe_create_task(sleeptime_document_ingest_async(server, folder_id, actor, clear_history=True), label="document_ingest_after_delete")
471
471
  if deleted_file is None:
472
472
  raise HTTPException(status_code=404, detail=f"File with id={file_id} not found.")
473
473
 
@@ -325,7 +325,7 @@ async def upload_file_to_source(
325
325
  logger=logger,
326
326
  label="file_processor.process",
327
327
  )
328
- safe_create_task(sleeptime_document_ingest_async(server, source_id, actor), logger=logger, label="sleeptime_document_ingest_async")
328
+ safe_create_task(sleeptime_document_ingest_async(server, source_id, actor), label="sleeptime_document_ingest_async")
329
329
 
330
330
  return file_metadata
331
331
 
@@ -452,7 +452,7 @@ async def delete_file_from_source(
452
452
  logger.info(f"Deleting file {file_id} from pinecone index")
453
453
  await delete_file_records_from_pinecone_index(file_id=file_id, actor=actor)
454
454
 
455
- asyncio.create_task(sleeptime_document_ingest_async(server, source_id, actor, clear_history=True))
455
+ safe_create_task(sleeptime_document_ingest_async(server, source_id, actor, clear_history=True), label="document_ingest_after_delete")
456
456
  if deleted_file is None:
457
457
  raise HTTPException(status_code=404, detail=f"File with id={file_id} not found.")
458
458