letta-nightly 0.11.7.dev20250910104051__py3-none-any.whl → 0.11.7.dev20250912104045__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/adapters/letta_llm_request_adapter.py +4 -2
- letta/adapters/letta_llm_stream_adapter.py +4 -2
- letta/agents/agent_loop.py +23 -0
- letta/agents/letta_agent_v2.py +34 -12
- letta/functions/helpers.py +3 -2
- letta/groups/sleeptime_multi_agent_v2.py +4 -2
- letta/groups/sleeptime_multi_agent_v3.py +4 -2
- letta/helpers/tpuf_client.py +41 -9
- letta/interfaces/anthropic_streaming_interface.py +10 -6
- letta/interfaces/openai_streaming_interface.py +9 -74
- letta/llm_api/google_vertex_client.py +6 -1
- letta/llm_api/openai_client.py +9 -8
- letta/orm/agent.py +4 -1
- letta/orm/block.py +1 -0
- letta/orm/blocks_agents.py +1 -0
- letta/orm/job.py +5 -1
- letta/orm/organization.py +2 -0
- letta/orm/sources_agents.py +2 -1
- letta/orm/tools_agents.py +5 -2
- letta/schemas/message.py +19 -2
- letta/server/rest_api/interface.py +34 -2
- letta/server/rest_api/json_parser.py +2 -0
- letta/server/rest_api/redis_stream_manager.py +17 -3
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -2
- letta/server/rest_api/routers/v1/agents.py +49 -180
- letta/server/rest_api/routers/v1/folders.py +2 -2
- letta/server/rest_api/routers/v1/sources.py +2 -2
- letta/server/rest_api/routers/v1/tools.py +23 -39
- letta/server/rest_api/streaming_response.py +2 -1
- letta/server/server.py +7 -5
- letta/services/agent_serialization_manager.py +4 -3
- letta/services/job_manager.py +5 -2
- letta/services/mcp_manager.py +66 -5
- letta/services/summarizer/summarizer.py +2 -1
- letta/services/tool_executor/files_tool_executor.py +2 -2
- letta/services/tool_executor/multi_agent_tool_executor.py +17 -14
- letta/services/tool_sandbox/local_sandbox.py +2 -2
- letta/services/tool_sandbox/modal_version_manager.py +2 -1
- letta/streaming_utils.py +29 -4
- letta/utils.py +72 -3
- {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250912104045.dist-info}/METADATA +3 -3
- {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250912104045.dist-info}/RECORD +45 -44
- {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250912104045.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250912104045.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.11.7.dev20250910104051.dist-info → letta_nightly-0.11.7.dev20250912104045.dist-info}/licenses/LICENSE +0 -0
letta/orm/sources_agents.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from sqlalchemy import ForeignKey, String
|
1
|
+
from sqlalchemy import ForeignKey, Index, String
|
2
2
|
from sqlalchemy.orm import Mapped, mapped_column
|
3
3
|
|
4
4
|
from letta.orm.base import Base
|
@@ -8,6 +8,7 @@ class SourcesAgents(Base):
|
|
8
8
|
"""Agents can have zero to many sources"""
|
9
9
|
|
10
10
|
__tablename__ = "sources_agents"
|
11
|
+
__table_args__ = (Index("ix_sources_agents_source_id", "source_id"),)
|
11
12
|
|
12
13
|
agent_id: Mapped[String] = mapped_column(String, ForeignKey("agents.id", ondelete="CASCADE"), primary_key=True)
|
13
14
|
source_id: Mapped[String] = mapped_column(String, ForeignKey("sources.id", ondelete="CASCADE"), primary_key=True)
|
letta/orm/tools_agents.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from sqlalchemy import ForeignKey, String, UniqueConstraint
|
1
|
+
from sqlalchemy import ForeignKey, Index, String, UniqueConstraint
|
2
2
|
from sqlalchemy.orm import Mapped, mapped_column
|
3
3
|
|
4
4
|
from letta.orm import Base
|
@@ -8,7 +8,10 @@ class ToolsAgents(Base):
|
|
8
8
|
"""Agents can have one or many tools associated with them."""
|
9
9
|
|
10
10
|
__tablename__ = "tools_agents"
|
11
|
-
__table_args__ = (
|
11
|
+
__table_args__ = (
|
12
|
+
UniqueConstraint("agent_id", "tool_id", name="unique_agent_tool"),
|
13
|
+
Index("ix_tools_agents_tool_id", "tool_id"),
|
14
|
+
)
|
12
15
|
|
13
16
|
# Each agent must have unique tool names
|
14
17
|
agent_id: Mapped[str] = mapped_column(String, ForeignKey("agents.id", ondelete="CASCADE"), primary_key=True)
|
letta/schemas/message.py
CHANGED
@@ -1027,10 +1027,13 @@ class Message(BaseMessage):
|
|
1027
1027
|
result = [m for m in result if m is not None]
|
1028
1028
|
return result
|
1029
1029
|
|
1030
|
-
def
|
1030
|
+
def to_google_dict(self, put_inner_thoughts_in_kwargs: bool = True) -> dict | None:
|
1031
1031
|
"""
|
1032
1032
|
Go from Message class to Google AI REST message object
|
1033
1033
|
"""
|
1034
|
+
if self.role == "approval" and self.tool_calls is None:
|
1035
|
+
return None
|
1036
|
+
|
1034
1037
|
# type Content: https://ai.google.dev/api/rest/v1/Content / https://ai.google.dev/api/rest/v1beta/Content
|
1035
1038
|
# parts[]: Part
|
1036
1039
|
# role: str ('user' or 'model')
|
@@ -1076,7 +1079,7 @@ class Message(BaseMessage):
|
|
1076
1079
|
"parts": content_parts,
|
1077
1080
|
}
|
1078
1081
|
|
1079
|
-
elif self.role == "assistant":
|
1082
|
+
elif self.role == "assistant" or self.role == "approval":
|
1080
1083
|
assert self.tool_calls is not None or text_content is not None
|
1081
1084
|
google_ai_message = {
|
1082
1085
|
"role": "model", # NOTE: different
|
@@ -1164,6 +1167,20 @@ class Message(BaseMessage):
|
|
1164
1167
|
|
1165
1168
|
return google_ai_message
|
1166
1169
|
|
1170
|
+
@staticmethod
|
1171
|
+
def to_google_dicts_from_list(
|
1172
|
+
messages: List[Message],
|
1173
|
+
put_inner_thoughts_in_kwargs: bool = True,
|
1174
|
+
):
|
1175
|
+
result = [
|
1176
|
+
m.to_google_dict(
|
1177
|
+
put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
|
1178
|
+
)
|
1179
|
+
for m in messages
|
1180
|
+
]
|
1181
|
+
result = [m for m in result if m is not None]
|
1182
|
+
return result
|
1183
|
+
|
1167
1184
|
@staticmethod
|
1168
1185
|
def generate_otid_from_id(message_id: str, index: int) -> str:
|
1169
1186
|
"""
|
@@ -295,6 +295,25 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
295
295
|
self.optimistic_json_parser = OptimisticJSONParser()
|
296
296
|
self.current_json_parse_result = {}
|
297
297
|
|
298
|
+
# NOTE (fix): OpenAI deltas may split a key and its value across chunks
|
299
|
+
# (e.g. '"request_heartbeat"' in one chunk, ': true' in the next). The
|
300
|
+
# old behavior passed through each fragment verbatim, which could emit
|
301
|
+
# a bare key (or a key+opening quote) without its value, producing
|
302
|
+
# invalid JSON slices and the "missing end-quote" symptom downstream.
|
303
|
+
#
|
304
|
+
# To make streamed arguments robust, we add a JSON-aware incremental
|
305
|
+
# reader that only releases safe updates for the "main" JSON portion of
|
306
|
+
# the tool_call arguments. This prevents partial-key emissions while
|
307
|
+
# preserving incremental streaming for consumers.
|
308
|
+
#
|
309
|
+
# We still stream 'name' fragments as-is (safe), but 'arguments' are
|
310
|
+
# parsed incrementally and emitted only when a boundary is safe.
|
311
|
+
self._raw_args_reader = JSONInnerThoughtsExtractor(
|
312
|
+
inner_thoughts_key=inner_thoughts_kwarg,
|
313
|
+
wait_for_first_key=False,
|
314
|
+
)
|
315
|
+
self._raw_args_tool_call_id = None
|
316
|
+
|
298
317
|
# Store metadata passed from server
|
299
318
|
self.metadata = {}
|
300
319
|
|
@@ -654,11 +673,24 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
654
673
|
tool_call_delta = {}
|
655
674
|
if tool_call.id:
|
656
675
|
tool_call_delta["id"] = tool_call.id
|
676
|
+
# Reset raw args reader per tool_call id
|
677
|
+
if self._raw_args_tool_call_id != tool_call.id:
|
678
|
+
self._raw_args_tool_call_id = tool_call.id
|
679
|
+
self._raw_args_reader = JSONInnerThoughtsExtractor(
|
680
|
+
inner_thoughts_key=self.inner_thoughts_kwarg,
|
681
|
+
wait_for_first_key=False,
|
682
|
+
)
|
657
683
|
if tool_call.function:
|
658
|
-
|
659
|
-
tool_call_delta["arguments"] = tool_call.function.arguments
|
684
|
+
# Stream name fragments as-is (names are short and harmless to emit)
|
660
685
|
if tool_call.function.name:
|
661
686
|
tool_call_delta["name"] = tool_call.function.name
|
687
|
+
# For arguments, incrementally parse to avoid emitting partial keys
|
688
|
+
if tool_call.function.arguments:
|
689
|
+
self.current_function_arguments += tool_call.function.arguments
|
690
|
+
updates_main_json, _ = self._raw_args_reader.process_fragment(tool_call.function.arguments)
|
691
|
+
# Only emit argument updates when a safe boundary is reached
|
692
|
+
if updates_main_json:
|
693
|
+
tool_call_delta["arguments"] = updates_main_json
|
662
694
|
|
663
695
|
# We might end up with a no-op, in which case we should omit
|
664
696
|
if (
|
@@ -8,6 +8,10 @@ from typing import AsyncIterator, Dict, List, Optional
|
|
8
8
|
|
9
9
|
from letta.data_sources.redis_client import AsyncRedisClient
|
10
10
|
from letta.log import get_logger
|
11
|
+
from letta.schemas.enums import JobStatus
|
12
|
+
from letta.schemas.user import User
|
13
|
+
from letta.services.job_manager import JobManager
|
14
|
+
from letta.utils import safe_create_task
|
11
15
|
|
12
16
|
logger = get_logger(__name__)
|
13
17
|
|
@@ -62,7 +66,7 @@ class RedisSSEStreamWriter:
|
|
62
66
|
"""Start the background flush task."""
|
63
67
|
if not self._running:
|
64
68
|
self._running = True
|
65
|
-
self._flush_task =
|
69
|
+
self._flush_task = safe_create_task(self._periodic_flush(), label="redis_periodic_flush")
|
66
70
|
|
67
71
|
async def stop(self):
|
68
72
|
"""Stop the background flush task and flush remaining data."""
|
@@ -132,9 +136,9 @@ class RedisSSEStreamWriter:
|
|
132
136
|
|
133
137
|
async with client.pipeline(transaction=False) as pipe:
|
134
138
|
for chunk in chunks:
|
135
|
-
pipe.xadd(stream_key, chunk, maxlen=self.max_stream_length, approximate=True)
|
139
|
+
await pipe.xadd(stream_key, chunk, maxlen=self.max_stream_length, approximate=True)
|
136
140
|
|
137
|
-
pipe.expire(stream_key, self.stream_ttl)
|
141
|
+
await pipe.expire(stream_key, self.stream_ttl)
|
138
142
|
|
139
143
|
await pipe.execute()
|
140
144
|
|
@@ -190,6 +194,8 @@ async def create_background_stream_processor(
|
|
190
194
|
redis_client: AsyncRedisClient,
|
191
195
|
run_id: str,
|
192
196
|
writer: Optional[RedisSSEStreamWriter] = None,
|
197
|
+
job_manager: Optional[JobManager] = None,
|
198
|
+
actor: Optional[User] = None,
|
193
199
|
) -> None:
|
194
200
|
"""
|
195
201
|
Process a stream in the background and store chunks to Redis.
|
@@ -202,6 +208,8 @@ async def create_background_stream_processor(
|
|
202
208
|
redis_client: Redis client instance
|
203
209
|
run_id: The run ID to store chunks under
|
204
210
|
writer: Optional pre-configured writer (creates new if not provided)
|
211
|
+
job_manager: Optional job manager for updating job status
|
212
|
+
actor: Optional actor for job status updates
|
205
213
|
"""
|
206
214
|
if writer is None:
|
207
215
|
writer = RedisSSEStreamWriter(redis_client)
|
@@ -226,6 +234,12 @@ async def create_background_stream_processor(
|
|
226
234
|
logger.error(f"Error processing stream for run {run_id}: {e}")
|
227
235
|
# Write error chunk
|
228
236
|
# error_chunk = {"error": {"message": str(e)}}
|
237
|
+
# Mark run_id terminal state
|
238
|
+
if job_manager and actor:
|
239
|
+
await job_manager.safe_update_job_status_async(
|
240
|
+
job_id=run_id, new_status=JobStatus.failed, actor=actor, metadata={"error": str(e)}
|
241
|
+
)
|
242
|
+
|
229
243
|
error_chunk = {"error": str(e), "code": "INTERNAL_SERVER_ERROR"}
|
230
244
|
await writer.write_chunk(run_id=run_id, data=f"event: error\ndata: {json.dumps(error_chunk)}\n\n", is_complete=True)
|
231
245
|
finally:
|
@@ -14,6 +14,7 @@ from letta.server.rest_api.chat_completions_interface import ChatCompletionsStre
|
|
14
14
|
|
15
15
|
# TODO this belongs in a controller!
|
16
16
|
from letta.server.rest_api.utils import get_letta_server, get_user_message_from_chat_completions_request, sse_async_generator
|
17
|
+
from letta.utils import safe_create_task
|
17
18
|
|
18
19
|
if TYPE_CHECKING:
|
19
20
|
from letta.server.server import SyncServer
|
@@ -98,7 +99,7 @@ async def send_message_to_agent_chat_completions(
|
|
98
99
|
|
99
100
|
# Offload the synchronous message_func to a separate thread
|
100
101
|
streaming_interface.stream_start()
|
101
|
-
|
102
|
+
safe_create_task(
|
102
103
|
asyncio.to_thread(
|
103
104
|
server.send_messages,
|
104
105
|
actor=actor,
|
@@ -106,7 +107,8 @@ async def send_message_to_agent_chat_completions(
|
|
106
107
|
input_messages=messages,
|
107
108
|
interface=streaming_interface,
|
108
109
|
put_inner_thoughts_first=False,
|
109
|
-
)
|
110
|
+
),
|
111
|
+
label="openai_send_messages",
|
110
112
|
)
|
111
113
|
|
112
114
|
# return a stream
|
@@ -12,7 +12,9 @@ from pydantic import BaseModel, Field
|
|
12
12
|
from sqlalchemy.exc import IntegrityError, OperationalError
|
13
13
|
from starlette.responses import Response, StreamingResponse
|
14
14
|
|
15
|
+
from letta.agents.agent_loop import AgentLoop
|
15
16
|
from letta.agents.letta_agent import LettaAgent
|
17
|
+
from letta.agents.letta_agent_v2 import LettaAgentV2
|
16
18
|
from letta.constants import AGENT_ID_PATTERN, DEFAULT_MAX_STEPS, DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, REDIS_RUN_ID_PREFIX
|
17
19
|
from letta.data_sources.redis_client import NoopAsyncRedisClient, get_redis_client
|
18
20
|
from letta.errors import (
|
@@ -58,7 +60,7 @@ from letta.server.server import SyncServer
|
|
58
60
|
from letta.services.summarizer.enums import SummarizationMode
|
59
61
|
from letta.services.telemetry_manager import NoopTelemetryManager
|
60
62
|
from letta.settings import settings
|
61
|
-
from letta.utils import safe_create_task, truncate_file_visible_content
|
63
|
+
from letta.utils import safe_create_shielded_task, safe_create_task, truncate_file_visible_content
|
62
64
|
|
63
65
|
# These can be forward refs, but because Fastapi needs them at runtime the must be imported normally
|
64
66
|
|
@@ -1144,7 +1146,9 @@ async def send_message(
|
|
1144
1146
|
|
1145
1147
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
1146
1148
|
# TODO: This is redundant, remove soon
|
1147
|
-
agent = await server.agent_manager.get_agent_by_id_async(
|
1149
|
+
agent = await server.agent_manager.get_agent_by_id_async(
|
1150
|
+
agent_id, actor, include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools"]
|
1151
|
+
)
|
1148
1152
|
agent_eligible = agent.multi_agent_group is None or agent.multi_agent_group.manager_type in ["sleeptime", "voice_sleeptime"]
|
1149
1153
|
model_compatible = agent.llm_config.model_endpoint_type in [
|
1150
1154
|
"anthropic",
|
@@ -1190,42 +1194,11 @@ async def send_message(
|
|
1190
1194
|
|
1191
1195
|
try:
|
1192
1196
|
if agent_eligible and model_compatible:
|
1193
|
-
|
1194
|
-
agent_loop = SleeptimeMultiAgentV2(
|
1195
|
-
agent_id=agent_id,
|
1196
|
-
message_manager=server.message_manager,
|
1197
|
-
agent_manager=server.agent_manager,
|
1198
|
-
block_manager=server.block_manager,
|
1199
|
-
passage_manager=server.passage_manager,
|
1200
|
-
group_manager=server.group_manager,
|
1201
|
-
job_manager=server.job_manager,
|
1202
|
-
actor=actor,
|
1203
|
-
group=agent.multi_agent_group,
|
1204
|
-
current_run_id=run.id if run else None,
|
1205
|
-
)
|
1206
|
-
else:
|
1207
|
-
agent_loop = LettaAgent(
|
1208
|
-
agent_id=agent_id,
|
1209
|
-
message_manager=server.message_manager,
|
1210
|
-
agent_manager=server.agent_manager,
|
1211
|
-
block_manager=server.block_manager,
|
1212
|
-
job_manager=server.job_manager,
|
1213
|
-
passage_manager=server.passage_manager,
|
1214
|
-
actor=actor,
|
1215
|
-
step_manager=server.step_manager,
|
1216
|
-
telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
|
1217
|
-
current_run_id=run.id if run else None,
|
1218
|
-
# summarizer settings to be added here
|
1219
|
-
summarizer_mode=(
|
1220
|
-
SummarizationMode.STATIC_MESSAGE_BUFFER
|
1221
|
-
if agent.agent_type == AgentType.voice_convo_agent
|
1222
|
-
else SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER
|
1223
|
-
),
|
1224
|
-
)
|
1225
|
-
|
1197
|
+
agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
|
1226
1198
|
result = await agent_loop.step(
|
1227
1199
|
request.messages,
|
1228
1200
|
max_steps=request.max_steps,
|
1201
|
+
run_id=run.id if run else None,
|
1229
1202
|
use_assistant_message=request.use_assistant_message,
|
1230
1203
|
request_start_timestamp_ns=request_start_timestamp_ns,
|
1231
1204
|
include_return_message_types=request.include_return_message_types,
|
@@ -1299,7 +1272,9 @@ async def send_message_streaming(
|
|
1299
1272
|
|
1300
1273
|
actor = await server.user_manager.get_actor_or_default_async(actor_id=actor_id)
|
1301
1274
|
# TODO: This is redundant, remove soon
|
1302
|
-
agent = await server.agent_manager.get_agent_by_id_async(
|
1275
|
+
agent = await server.agent_manager.get_agent_by_id_async(
|
1276
|
+
agent_id, actor, include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools"]
|
1277
|
+
)
|
1303
1278
|
agent_eligible = agent.multi_agent_group is None or agent.multi_agent_group.manager_type in ["sleeptime", "voice_sleeptime"]
|
1304
1279
|
model_compatible = agent.llm_config.model_endpoint_type in [
|
1305
1280
|
"anthropic",
|
@@ -1344,57 +1319,16 @@ async def send_message_streaming(
|
|
1344
1319
|
|
1345
1320
|
try:
|
1346
1321
|
if agent_eligible and model_compatible:
|
1347
|
-
|
1348
|
-
|
1349
|
-
|
1350
|
-
|
1351
|
-
|
1352
|
-
|
1353
|
-
|
1354
|
-
|
1355
|
-
|
1356
|
-
|
1357
|
-
step_manager=server.step_manager,
|
1358
|
-
telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
|
1359
|
-
group=agent.multi_agent_group,
|
1360
|
-
current_run_id=run.id if run else None,
|
1361
|
-
)
|
1362
|
-
else:
|
1363
|
-
agent_loop = LettaAgent(
|
1364
|
-
agent_id=agent_id,
|
1365
|
-
message_manager=server.message_manager,
|
1366
|
-
agent_manager=server.agent_manager,
|
1367
|
-
block_manager=server.block_manager,
|
1368
|
-
job_manager=server.job_manager,
|
1369
|
-
passage_manager=server.passage_manager,
|
1370
|
-
actor=actor,
|
1371
|
-
step_manager=server.step_manager,
|
1372
|
-
telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
|
1373
|
-
current_run_id=run.id if run else None,
|
1374
|
-
# summarizer settings to be added here
|
1375
|
-
summarizer_mode=(
|
1376
|
-
SummarizationMode.STATIC_MESSAGE_BUFFER
|
1377
|
-
if agent.agent_type == AgentType.voice_convo_agent
|
1378
|
-
else SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER
|
1379
|
-
),
|
1380
|
-
)
|
1381
|
-
|
1382
|
-
if request.stream_tokens and model_compatible_token_streaming:
|
1383
|
-
raw_stream = agent_loop.step_stream(
|
1384
|
-
input_messages=request.messages,
|
1385
|
-
max_steps=request.max_steps,
|
1386
|
-
use_assistant_message=request.use_assistant_message,
|
1387
|
-
request_start_timestamp_ns=request_start_timestamp_ns,
|
1388
|
-
include_return_message_types=request.include_return_message_types,
|
1389
|
-
)
|
1390
|
-
else:
|
1391
|
-
raw_stream = agent_loop.step_stream_no_tokens(
|
1392
|
-
request.messages,
|
1393
|
-
max_steps=request.max_steps,
|
1394
|
-
use_assistant_message=request.use_assistant_message,
|
1395
|
-
request_start_timestamp_ns=request_start_timestamp_ns,
|
1396
|
-
include_return_message_types=request.include_return_message_types,
|
1397
|
-
)
|
1322
|
+
agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
|
1323
|
+
raw_stream = agent_loop.stream(
|
1324
|
+
input_messages=request.messages,
|
1325
|
+
max_steps=request.max_steps,
|
1326
|
+
stream_tokens=request.stream_tokens and model_compatible_token_streaming,
|
1327
|
+
run_id=run.id if run else None,
|
1328
|
+
use_assistant_message=request.use_assistant_message,
|
1329
|
+
request_start_timestamp_ns=request_start_timestamp_ns,
|
1330
|
+
include_return_message_types=request.include_return_message_types,
|
1331
|
+
)
|
1398
1332
|
|
1399
1333
|
from letta.server.rest_api.streaming_response import StreamingResponseWithStatusCode, add_keepalive_to_stream
|
1400
1334
|
|
@@ -1409,12 +1343,15 @@ async def send_message_streaming(
|
|
1409
1343
|
),
|
1410
1344
|
)
|
1411
1345
|
|
1412
|
-
|
1346
|
+
safe_create_task(
|
1413
1347
|
create_background_stream_processor(
|
1414
1348
|
stream_generator=raw_stream,
|
1415
1349
|
redis_client=redis_client,
|
1416
1350
|
run_id=run.id,
|
1417
|
-
|
1351
|
+
job_manager=server.job_manager,
|
1352
|
+
actor=actor,
|
1353
|
+
),
|
1354
|
+
label=f"background_stream_processor_{run.id}",
|
1418
1355
|
)
|
1419
1356
|
|
1420
1357
|
raw_stream = redis_sse_stream_generator(
|
@@ -1568,7 +1505,9 @@ async def _process_message_background(
|
|
1568
1505
|
"""Background task to process the message and update job status."""
|
1569
1506
|
request_start_timestamp_ns = get_utc_timestamp_ns()
|
1570
1507
|
try:
|
1571
|
-
agent = await server.agent_manager.get_agent_by_id_async(
|
1508
|
+
agent = await server.agent_manager.get_agent_by_id_async(
|
1509
|
+
agent_id, actor, include_relationships=["memory", "multi_agent_group", "sources", "tool_exec_environment_variables", "tools"]
|
1510
|
+
)
|
1572
1511
|
agent_eligible = agent.multi_agent_group is None or agent.multi_agent_group.manager_type in ["sleeptime", "voice_sleeptime"]
|
1573
1512
|
model_compatible = agent.llm_config.model_endpoint_type in [
|
1574
1513
|
"anthropic",
|
@@ -1584,37 +1523,7 @@ async def _process_message_background(
|
|
1584
1523
|
"deepseek",
|
1585
1524
|
]
|
1586
1525
|
if agent_eligible and model_compatible:
|
1587
|
-
|
1588
|
-
agent_loop = SleeptimeMultiAgentV2(
|
1589
|
-
agent_id=agent_id,
|
1590
|
-
message_manager=server.message_manager,
|
1591
|
-
agent_manager=server.agent_manager,
|
1592
|
-
block_manager=server.block_manager,
|
1593
|
-
passage_manager=server.passage_manager,
|
1594
|
-
group_manager=server.group_manager,
|
1595
|
-
job_manager=server.job_manager,
|
1596
|
-
actor=actor,
|
1597
|
-
group=agent.multi_agent_group,
|
1598
|
-
)
|
1599
|
-
else:
|
1600
|
-
agent_loop = LettaAgent(
|
1601
|
-
agent_id=agent_id,
|
1602
|
-
message_manager=server.message_manager,
|
1603
|
-
agent_manager=server.agent_manager,
|
1604
|
-
block_manager=server.block_manager,
|
1605
|
-
job_manager=server.job_manager,
|
1606
|
-
passage_manager=server.passage_manager,
|
1607
|
-
actor=actor,
|
1608
|
-
step_manager=server.step_manager,
|
1609
|
-
telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
|
1610
|
-
# summarizer settings to be added here
|
1611
|
-
summarizer_mode=(
|
1612
|
-
SummarizationMode.STATIC_MESSAGE_BUFFER
|
1613
|
-
if agent.agent_type == AgentType.voice_convo_agent
|
1614
|
-
else SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER
|
1615
|
-
),
|
1616
|
-
)
|
1617
|
-
|
1526
|
+
agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
|
1618
1527
|
result = await agent_loop.step(
|
1619
1528
|
messages,
|
1620
1529
|
max_steps=max_steps,
|
@@ -1702,8 +1611,8 @@ async def send_message_async(
|
|
1702
1611
|
)
|
1703
1612
|
run = await server.job_manager.create_job_async(pydantic_job=run, actor=actor)
|
1704
1613
|
|
1705
|
-
# Create asyncio task for background processing
|
1706
|
-
task =
|
1614
|
+
# Create asyncio task for background processing (shielded to prevent cancellation)
|
1615
|
+
task = safe_create_shielded_task(
|
1707
1616
|
_process_message_background(
|
1708
1617
|
run_id=run.id,
|
1709
1618
|
server=server,
|
@@ -1715,28 +1624,20 @@ async def send_message_async(
|
|
1715
1624
|
assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
|
1716
1625
|
max_steps=request.max_steps,
|
1717
1626
|
include_return_message_types=request.include_return_message_types,
|
1718
|
-
)
|
1627
|
+
),
|
1628
|
+
label=f"process_message_background_{run.id}",
|
1719
1629
|
)
|
1720
1630
|
|
1721
1631
|
def handle_task_completion(t):
|
1722
1632
|
try:
|
1723
1633
|
t.result()
|
1724
1634
|
except asyncio.CancelledError:
|
1725
|
-
|
1726
|
-
|
1727
|
-
|
1728
|
-
job_id=run.id,
|
1729
|
-
job_update=JobUpdate(
|
1730
|
-
status=JobStatus.failed,
|
1731
|
-
completed_at=datetime.now(timezone.utc),
|
1732
|
-
metadata={"error": "Task was cancelled"},
|
1733
|
-
),
|
1734
|
-
actor=actor,
|
1735
|
-
)
|
1736
|
-
)
|
1635
|
+
# Note: With shielded tasks, cancellation attempts don't actually stop the task
|
1636
|
+
logger.info(f"Cancellation attempted on shielded background task for run {run.id}, but task continues running")
|
1637
|
+
# Don't mark as failed since the shielded task is still running
|
1737
1638
|
except Exception as e:
|
1738
1639
|
logger.error(f"Unhandled exception in background task for run {run.id}: {e}")
|
1739
|
-
|
1640
|
+
safe_create_task(
|
1740
1641
|
server.job_manager.update_job_by_id_async(
|
1741
1642
|
job_id=run.id,
|
1742
1643
|
job_update=JobUpdate(
|
@@ -1745,7 +1646,8 @@ async def send_message_async(
|
|
1745
1646
|
metadata={"error": str(e)},
|
1746
1647
|
),
|
1747
1648
|
actor=actor,
|
1748
|
-
)
|
1649
|
+
),
|
1650
|
+
label=f"update_failed_job_{run.id}",
|
1749
1651
|
)
|
1750
1652
|
|
1751
1653
|
task.add_done_callback(handle_task_completion)
|
@@ -1816,38 +1718,10 @@ async def preview_raw_payload(
|
|
1816
1718
|
]
|
1817
1719
|
|
1818
1720
|
if agent_eligible and model_compatible:
|
1819
|
-
|
1820
|
-
|
1821
|
-
raise HTTPException(
|
1822
|
-
status_code=status.HTTP_400_BAD_REQUEST,
|
1823
|
-
detail="Payload inspection is not supported for agents with sleeptime enabled.",
|
1824
|
-
)
|
1825
|
-
else:
|
1826
|
-
agent_loop = LettaAgent(
|
1827
|
-
agent_id=agent_id,
|
1828
|
-
message_manager=server.message_manager,
|
1829
|
-
agent_manager=server.agent_manager,
|
1830
|
-
block_manager=server.block_manager,
|
1831
|
-
job_manager=server.job_manager,
|
1832
|
-
passage_manager=server.passage_manager,
|
1833
|
-
actor=actor,
|
1834
|
-
step_manager=server.step_manager,
|
1835
|
-
telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
|
1836
|
-
summarizer_mode=(
|
1837
|
-
SummarizationMode.STATIC_MESSAGE_BUFFER
|
1838
|
-
if agent.agent_type == AgentType.voice_convo_agent
|
1839
|
-
else SummarizationMode.PARTIAL_EVICT_MESSAGE_BUFFER
|
1840
|
-
),
|
1841
|
-
)
|
1842
|
-
|
1843
|
-
# TODO: Support step_streaming
|
1844
|
-
return await agent_loop.step(
|
1721
|
+
agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
|
1722
|
+
return await agent_loop.build_request(
|
1845
1723
|
input_messages=request.messages,
|
1846
|
-
use_assistant_message=request.use_assistant_message,
|
1847
|
-
include_return_message_types=request.include_return_message_types,
|
1848
|
-
dry_run=True,
|
1849
1724
|
)
|
1850
|
-
|
1851
1725
|
else:
|
1852
1726
|
raise HTTPException(
|
1853
1727
|
status_code=status.HTTP_403_FORBIDDEN,
|
@@ -1888,19 +1762,14 @@ async def summarize_agent_conversation(
|
|
1888
1762
|
]
|
1889
1763
|
|
1890
1764
|
if agent_eligible and model_compatible:
|
1891
|
-
|
1892
|
-
|
1893
|
-
|
1894
|
-
|
1895
|
-
|
1896
|
-
|
1897
|
-
|
1898
|
-
actor=actor,
|
1899
|
-
step_manager=server.step_manager,
|
1900
|
-
telemetry_manager=server.telemetry_manager if settings.llm_api_logging else NoopTelemetryManager(),
|
1901
|
-
message_buffer_min=max_message_length,
|
1765
|
+
agent_loop = LettaAgentV2(agent_state=agent, actor=actor)
|
1766
|
+
in_context_messages = await server.message_manager.get_messages_by_ids_async(message_ids=agent.message_ids, actor=actor)
|
1767
|
+
await agent_loop.summarize_conversation_history(
|
1768
|
+
in_context_messages=in_context_messages,
|
1769
|
+
new_letta_messages=[],
|
1770
|
+
total_tokens=None,
|
1771
|
+
force=True,
|
1902
1772
|
)
|
1903
|
-
await agent.summarize_conversation_history()
|
1904
1773
|
# Summarization completed, return 204 No Content
|
1905
1774
|
else:
|
1906
1775
|
raise HTTPException(
|
@@ -327,7 +327,7 @@ async def upload_file_to_folder(
|
|
327
327
|
logger=logger,
|
328
328
|
label="file_processor.process",
|
329
329
|
)
|
330
|
-
safe_create_task(sleeptime_document_ingest_async(server, folder_id, actor),
|
330
|
+
safe_create_task(sleeptime_document_ingest_async(server, folder_id, actor), label="sleeptime_document_ingest_async")
|
331
331
|
|
332
332
|
return file_metadata
|
333
333
|
|
@@ -467,7 +467,7 @@ async def delete_file_from_folder(
|
|
467
467
|
logger.info(f"Deleting file {file_id} from pinecone index")
|
468
468
|
await delete_file_records_from_pinecone_index(file_id=file_id, actor=actor)
|
469
469
|
|
470
|
-
|
470
|
+
safe_create_task(sleeptime_document_ingest_async(server, folder_id, actor, clear_history=True), label="document_ingest_after_delete")
|
471
471
|
if deleted_file is None:
|
472
472
|
raise HTTPException(status_code=404, detail=f"File with id={file_id} not found.")
|
473
473
|
|
@@ -325,7 +325,7 @@ async def upload_file_to_source(
|
|
325
325
|
logger=logger,
|
326
326
|
label="file_processor.process",
|
327
327
|
)
|
328
|
-
safe_create_task(sleeptime_document_ingest_async(server, source_id, actor),
|
328
|
+
safe_create_task(sleeptime_document_ingest_async(server, source_id, actor), label="sleeptime_document_ingest_async")
|
329
329
|
|
330
330
|
return file_metadata
|
331
331
|
|
@@ -452,7 +452,7 @@ async def delete_file_from_source(
|
|
452
452
|
logger.info(f"Deleting file {file_id} from pinecone index")
|
453
453
|
await delete_file_records_from_pinecone_index(file_id=file_id, actor=actor)
|
454
454
|
|
455
|
-
|
455
|
+
safe_create_task(sleeptime_document_ingest_async(server, source_id, actor, clear_history=True), label="document_ingest_after_delete")
|
456
456
|
if deleted_file is None:
|
457
457
|
raise HTTPException(status_code=404, detail=f"File with id={file_id} not found.")
|
458
458
|
|