letta-nightly 0.7.8.dev20250501104226__py3-none-any.whl → 0.7.9.dev20250502222710__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +2 -2
- letta/agents/helpers.py +58 -1
- letta/agents/letta_agent.py +13 -3
- letta/agents/letta_agent_batch.py +33 -17
- letta/agents/voice_agent.py +1 -2
- letta/agents/voice_sleeptime_agent.py +75 -320
- letta/functions/function_sets/multi_agent.py +1 -1
- letta/functions/function_sets/voice.py +20 -32
- letta/functions/helpers.py +7 -7
- letta/helpers/datetime_helpers.py +6 -0
- letta/helpers/message_helper.py +19 -18
- letta/jobs/scheduler.py +233 -49
- letta/llm_api/google_ai_client.py +13 -4
- letta/llm_api/google_vertex_client.py +5 -1
- letta/llm_api/openai.py +10 -2
- letta/llm_api/openai_client.py +14 -2
- letta/orm/message.py +4 -0
- letta/prompts/system/voice_sleeptime.txt +2 -3
- letta/schemas/letta_message.py +1 -0
- letta/schemas/letta_request.py +8 -1
- letta/schemas/letta_response.py +5 -0
- letta/schemas/llm_batch_job.py +6 -4
- letta/schemas/llm_config.py +9 -0
- letta/schemas/message.py +23 -2
- letta/schemas/providers.py +3 -1
- letta/server/rest_api/app.py +15 -7
- letta/server/rest_api/routers/v1/agents.py +3 -0
- letta/server/rest_api/routers/v1/messages.py +46 -1
- letta/server/rest_api/routers/v1/steps.py +1 -1
- letta/server/rest_api/utils.py +25 -6
- letta/server/server.py +11 -3
- letta/services/llm_batch_manager.py +60 -1
- letta/services/message_manager.py +1 -0
- letta/services/summarizer/summarizer.py +42 -36
- letta/settings.py +1 -0
- letta/tracing.py +5 -0
- {letta_nightly-0.7.8.dev20250501104226.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/METADATA +2 -2
- {letta_nightly-0.7.8.dev20250501104226.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/RECORD +41 -41
- {letta_nightly-0.7.8.dev20250501104226.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/LICENSE +0 -0
- {letta_nightly-0.7.8.dev20250501104226.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/WHEEL +0 -0
- {letta_nightly-0.7.8.dev20250501104226.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/entry_points.txt +0 -0
letta/schemas/message.py
CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
2
2
|
|
3
3
|
import copy
|
4
4
|
import json
|
5
|
+
import re
|
5
6
|
import uuid
|
6
7
|
import warnings
|
7
8
|
from collections import OrderedDict
|
@@ -84,6 +85,7 @@ class MessageCreate(BaseModel):
|
|
84
85
|
name: Optional[str] = Field(None, description="The name of the participant.")
|
85
86
|
otid: Optional[str] = Field(None, description="The offline threading id associated with this message")
|
86
87
|
sender_id: Optional[str] = Field(None, description="The id of the sender of the message, can be an identity id or agent id")
|
88
|
+
batch_item_id: Optional[str] = Field(None, description="The id of the LLMBatchItem that this message is associated with")
|
87
89
|
group_id: Optional[str] = Field(None, description="The multi-agent group that the message was sent in")
|
88
90
|
|
89
91
|
def model_dump(self, to_orm: bool = False, **kwargs) -> Dict[str, Any]:
|
@@ -137,6 +139,11 @@ class Message(BaseMessage):
|
|
137
139
|
created_at (datetime): The time the message was created.
|
138
140
|
tool_calls (List[OpenAIToolCall,]): The list of tool calls requested.
|
139
141
|
tool_call_id (str): The id of the tool call.
|
142
|
+
step_id (str): The id of the step that this message was created in.
|
143
|
+
otid (str): The offline threading id associated with this message.
|
144
|
+
tool_returns (List[ToolReturn]): The list of tool returns requested.
|
145
|
+
group_id (str): The multi-agent group that the message was sent in.
|
146
|
+
sender_id (str): The id of the sender of the message, can be an identity id or agent id.
|
140
147
|
|
141
148
|
"""
|
142
149
|
|
@@ -162,6 +169,7 @@ class Message(BaseMessage):
|
|
162
169
|
tool_returns: Optional[List[ToolReturn]] = Field(None, description="Tool execution return information for prior tool calls")
|
163
170
|
group_id: Optional[str] = Field(None, description="The multi-agent group that the message was sent in")
|
164
171
|
sender_id: Optional[str] = Field(None, description="The id of the sender of the message, can be an identity id or agent id")
|
172
|
+
batch_item_id: Optional[str] = Field(None, description="The id of the LLMBatchItem that this message is associated with")
|
165
173
|
# This overrides the optional base orm schema, created_at MUST exist on all messages objects
|
166
174
|
created_at: datetime = Field(default_factory=get_utc_time, description="The timestamp when the object was created.")
|
167
175
|
|
@@ -252,6 +260,7 @@ class Message(BaseMessage):
|
|
252
260
|
name=self.name,
|
253
261
|
otid=otid,
|
254
262
|
sender_id=self.sender_id,
|
263
|
+
step_id=self.step_id,
|
255
264
|
)
|
256
265
|
)
|
257
266
|
# Otherwise, we may have a list of multiple types
|
@@ -269,6 +278,7 @@ class Message(BaseMessage):
|
|
269
278
|
name=self.name,
|
270
279
|
otid=otid,
|
271
280
|
sender_id=self.sender_id,
|
281
|
+
step_id=self.step_id,
|
272
282
|
)
|
273
283
|
)
|
274
284
|
elif isinstance(content_part, ReasoningContent):
|
@@ -282,6 +292,7 @@ class Message(BaseMessage):
|
|
282
292
|
signature=content_part.signature,
|
283
293
|
name=self.name,
|
284
294
|
otid=otid,
|
295
|
+
step_id=self.step_id,
|
285
296
|
)
|
286
297
|
)
|
287
298
|
elif isinstance(content_part, RedactedReasoningContent):
|
@@ -295,6 +306,7 @@ class Message(BaseMessage):
|
|
295
306
|
name=self.name,
|
296
307
|
otid=otid,
|
297
308
|
sender_id=self.sender_id,
|
309
|
+
step_id=self.step_id,
|
298
310
|
)
|
299
311
|
)
|
300
312
|
elif isinstance(content_part, OmittedReasoningContent):
|
@@ -307,6 +319,7 @@ class Message(BaseMessage):
|
|
307
319
|
state="omitted",
|
308
320
|
name=self.name,
|
309
321
|
otid=otid,
|
322
|
+
step_id=self.step_id,
|
310
323
|
)
|
311
324
|
)
|
312
325
|
else:
|
@@ -333,6 +346,7 @@ class Message(BaseMessage):
|
|
333
346
|
name=self.name,
|
334
347
|
otid=otid,
|
335
348
|
sender_id=self.sender_id,
|
349
|
+
step_id=self.step_id,
|
336
350
|
)
|
337
351
|
)
|
338
352
|
else:
|
@@ -348,6 +362,7 @@ class Message(BaseMessage):
|
|
348
362
|
name=self.name,
|
349
363
|
otid=otid,
|
350
364
|
sender_id=self.sender_id,
|
365
|
+
step_id=self.step_id,
|
351
366
|
)
|
352
367
|
)
|
353
368
|
elif self.role == MessageRole.tool:
|
@@ -391,6 +406,7 @@ class Message(BaseMessage):
|
|
391
406
|
name=self.name,
|
392
407
|
otid=self.id.replace("message-", ""),
|
393
408
|
sender_id=self.sender_id,
|
409
|
+
step_id=self.step_id,
|
394
410
|
)
|
395
411
|
)
|
396
412
|
elif self.role == MessageRole.user:
|
@@ -409,6 +425,7 @@ class Message(BaseMessage):
|
|
409
425
|
name=self.name,
|
410
426
|
otid=self.otid,
|
411
427
|
sender_id=self.sender_id,
|
428
|
+
step_id=self.step_id,
|
412
429
|
)
|
413
430
|
)
|
414
431
|
elif self.role == MessageRole.system:
|
@@ -426,6 +443,7 @@ class Message(BaseMessage):
|
|
426
443
|
name=self.name,
|
427
444
|
otid=self.otid,
|
428
445
|
sender_id=self.sender_id,
|
446
|
+
step_id=self.step_id,
|
429
447
|
)
|
430
448
|
)
|
431
449
|
else:
|
@@ -700,9 +718,12 @@ class Message(BaseMessage):
|
|
700
718
|
else:
|
701
719
|
raise ValueError(self.role)
|
702
720
|
|
703
|
-
# Optional field, do not include if null
|
721
|
+
# Optional field, do not include if null or invalid
|
704
722
|
if self.name is not None:
|
705
|
-
|
723
|
+
if bool(re.match(r"^[^\s<|\\/>]+$", self.name)):
|
724
|
+
openai_message["name"] = self.name
|
725
|
+
else:
|
726
|
+
warnings.warn(f"Using OpenAI with invalid 'name' field (name={self.name} role={self.role}).")
|
706
727
|
|
707
728
|
if parse_content_parts:
|
708
729
|
for content in self.content:
|
letta/schemas/providers.py
CHANGED
@@ -201,7 +201,9 @@ class OpenAIProvider(Provider):
|
|
201
201
|
# for openai, filter models
|
202
202
|
if self.base_url == "https://api.openai.com/v1":
|
203
203
|
allowed_types = ["gpt-4", "o1", "o3"]
|
204
|
-
|
204
|
+
# NOTE: o1-mini and o1-preview do not support tool calling
|
205
|
+
# NOTE: o1-pro is only available in Responses API
|
206
|
+
disallowed_types = ["transcribe", "search", "realtime", "tts", "audio", "computer", "o1-mini", "o1-preview", "o1-pro"]
|
205
207
|
skip = True
|
206
208
|
for model_type in allowed_types:
|
207
209
|
if model_name.startswith(model_type):
|
letta/server/rest_api/app.py
CHANGED
@@ -17,7 +17,7 @@ from letta.__init__ import __version__
|
|
17
17
|
from letta.agents.exceptions import IncompatibleAgentType
|
18
18
|
from letta.constants import ADMIN_PREFIX, API_PREFIX, OPENAI_API_PREFIX
|
19
19
|
from letta.errors import BedrockPermissionError, LettaAgentNotFoundError, LettaUserNotFoundError
|
20
|
-
from letta.jobs.scheduler import
|
20
|
+
from letta.jobs.scheduler import shutdown_scheduler_and_release_lock, start_scheduler_with_leader_election
|
21
21
|
from letta.log import get_logger
|
22
22
|
from letta.orm.errors import DatabaseTimeoutError, ForeignKeyConstraintViolationError, NoResultFound, UniqueConstraintViolationError
|
23
23
|
from letta.schemas.letta_message import create_letta_message_union_schema
|
@@ -150,10 +150,10 @@ def create_application() -> "FastAPI":
|
|
150
150
|
loop.set_default_executor(executor)
|
151
151
|
|
152
152
|
@app.on_event("startup")
|
153
|
-
def on_startup():
|
153
|
+
async def on_startup():
|
154
154
|
global server
|
155
155
|
|
156
|
-
|
156
|
+
await start_scheduler_with_leader_election(server)
|
157
157
|
|
158
158
|
@app.on_event("shutdown")
|
159
159
|
def shutdown_mcp_clients():
|
@@ -170,9 +170,16 @@ def create_application() -> "FastAPI":
|
|
170
170
|
t.start()
|
171
171
|
t.join()
|
172
172
|
|
173
|
-
@app.
|
174
|
-
def
|
175
|
-
|
173
|
+
@app.exception_handler(IncompatibleAgentType)
|
174
|
+
async def handle_incompatible_agent_type(request: Request, exc: IncompatibleAgentType):
|
175
|
+
return JSONResponse(
|
176
|
+
status_code=400,
|
177
|
+
content={
|
178
|
+
"detail": str(exc),
|
179
|
+
"expected_type": exc.expected_type,
|
180
|
+
"actual_type": exc.actual_type,
|
181
|
+
},
|
182
|
+
)
|
176
183
|
|
177
184
|
@app.exception_handler(IncompatibleAgentType)
|
178
185
|
async def handle_incompatible_agent_type(request: Request, exc: IncompatibleAgentType):
|
@@ -322,9 +329,10 @@ def create_application() -> "FastAPI":
|
|
322
329
|
generate_openapi_schema(app)
|
323
330
|
|
324
331
|
@app.on_event("shutdown")
|
325
|
-
def on_shutdown():
|
332
|
+
async def on_shutdown():
|
326
333
|
global server
|
327
334
|
# server = None
|
335
|
+
await shutdown_scheduler_and_release_lock()
|
328
336
|
|
329
337
|
return app
|
330
338
|
|
@@ -13,6 +13,7 @@ from starlette.responses import Response, StreamingResponse
|
|
13
13
|
|
14
14
|
from letta.agents.letta_agent import LettaAgent
|
15
15
|
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
16
|
+
from letta.helpers.datetime_helpers import get_utc_timestamp_ns
|
16
17
|
from letta.log import get_logger
|
17
18
|
from letta.orm.errors import NoResultFound
|
18
19
|
from letta.schemas.agent import AgentState, AgentType, CreateAgent, UpdateAgent
|
@@ -684,6 +685,7 @@ async def send_message_streaming(
|
|
684
685
|
This endpoint accepts a message from a user and processes it through the agent.
|
685
686
|
It will stream the steps of the response always, and stream the tokens if 'stream_tokens' is set to True.
|
686
687
|
"""
|
688
|
+
request_start_timestamp_ns = get_utc_timestamp_ns()
|
687
689
|
actor = server.user_manager.get_user_or_default(user_id=actor_id)
|
688
690
|
# TODO: This is redundant, remove soon
|
689
691
|
agent = server.agent_manager.get_agent_by_id(agent_id, actor)
|
@@ -719,6 +721,7 @@ async def send_message_streaming(
|
|
719
721
|
use_assistant_message=request.use_assistant_message,
|
720
722
|
assistant_message_tool_name=request.assistant_message_tool_name,
|
721
723
|
assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
|
724
|
+
request_start_timestamp_ns=request_start_timestamp_ns,
|
722
725
|
)
|
723
726
|
|
724
727
|
return result
|
@@ -1,6 +1,6 @@
|
|
1
1
|
from typing import List, Optional
|
2
2
|
|
3
|
-
from fastapi import APIRouter, Body, Depends, Header, status
|
3
|
+
from fastapi import APIRouter, Body, Depends, Header, Query, status
|
4
4
|
from fastapi.exceptions import HTTPException
|
5
5
|
from starlette.requests import Request
|
6
6
|
|
@@ -9,6 +9,7 @@ from letta.log import get_logger
|
|
9
9
|
from letta.orm.errors import NoResultFound
|
10
10
|
from letta.schemas.job import BatchJob, JobStatus, JobType, JobUpdate
|
11
11
|
from letta.schemas.letta_request import CreateBatch
|
12
|
+
from letta.schemas.letta_response import LettaBatchMessages
|
12
13
|
from letta.server.rest_api.utils import get_letta_server
|
13
14
|
from letta.server.server import SyncServer
|
14
15
|
from letta.settings import settings
|
@@ -123,6 +124,50 @@ async def list_batch_runs(
|
|
123
124
|
return [BatchJob.from_job(job) for job in jobs]
|
124
125
|
|
125
126
|
|
127
|
+
@router.get(
|
128
|
+
"/batches/{batch_id}/messages",
|
129
|
+
response_model=LettaBatchMessages,
|
130
|
+
operation_id="list_batch_messages",
|
131
|
+
)
|
132
|
+
async def list_batch_messages(
|
133
|
+
batch_id: str,
|
134
|
+
limit: int = Query(100, description="Maximum number of messages to return"),
|
135
|
+
cursor: Optional[str] = Query(
|
136
|
+
None, description="Message ID to use as pagination cursor (get messages before/after this ID) depending on sort_descending."
|
137
|
+
),
|
138
|
+
agent_id: Optional[str] = Query(None, description="Filter messages by agent ID"),
|
139
|
+
sort_descending: bool = Query(True, description="Sort messages by creation time (true=newest first)"),
|
140
|
+
actor_id: Optional[str] = Header(None, alias="user_id"),
|
141
|
+
server: SyncServer = Depends(get_letta_server),
|
142
|
+
):
|
143
|
+
"""
|
144
|
+
Get messages for a specific batch job.
|
145
|
+
|
146
|
+
Returns messages associated with the batch in chronological order.
|
147
|
+
|
148
|
+
Pagination:
|
149
|
+
- For the first page, omit the cursor parameter
|
150
|
+
- For subsequent pages, use the ID of the last message from the previous response as the cursor
|
151
|
+
- Results will include messages before/after the cursor based on sort_descending
|
152
|
+
"""
|
153
|
+
actor = server.user_manager.get_user_or_default(user_id=actor_id)
|
154
|
+
|
155
|
+
# First, verify the batch job exists and the user has access to it
|
156
|
+
try:
|
157
|
+
job = server.job_manager.get_job_by_id(job_id=batch_id, actor=actor)
|
158
|
+
BatchJob.from_job(job)
|
159
|
+
except NoResultFound:
|
160
|
+
raise HTTPException(status_code=404, detail="Batch not found")
|
161
|
+
|
162
|
+
# Get messages directly using our efficient method
|
163
|
+
# We'll need to update the underlying implementation to use message_id as cursor
|
164
|
+
messages = server.batch_manager.get_messages_for_letta_batch(
|
165
|
+
letta_batch_job_id=batch_id, limit=limit, actor=actor, agent_id=agent_id, sort_descending=sort_descending, cursor=cursor
|
166
|
+
)
|
167
|
+
|
168
|
+
return LettaBatchMessages(messages=messages)
|
169
|
+
|
170
|
+
|
126
171
|
@router.patch("/batches/{batch_id}/cancel", operation_id="cancel_batch_run")
|
127
172
|
async def cancel_batch_run(
|
128
173
|
batch_id: str,
|
@@ -11,7 +11,7 @@ from letta.server.server import SyncServer
|
|
11
11
|
router = APIRouter(prefix="/steps", tags=["steps"])
|
12
12
|
|
13
13
|
|
14
|
-
@router.get("", response_model=List[Step], operation_id="list_steps")
|
14
|
+
@router.get("/", response_model=List[Step], operation_id="list_steps")
|
15
15
|
def list_steps(
|
16
16
|
before: Optional[str] = Query(None, description="Return steps before this step ID"),
|
17
17
|
after: Optional[str] = Query(None, description="Return steps after this step ID"),
|
letta/server/rest_api/utils.py
CHANGED
@@ -15,7 +15,7 @@ from pydantic import BaseModel
|
|
15
15
|
|
16
16
|
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE
|
17
17
|
from letta.errors import ContextWindowExceededError, RateLimitExceededError
|
18
|
-
from letta.helpers.datetime_helpers import get_utc_time
|
18
|
+
from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns
|
19
19
|
from letta.helpers.message_helper import convert_message_creates_to_messages
|
20
20
|
from letta.log import get_logger
|
21
21
|
from letta.schemas.enums import MessageRole
|
@@ -25,6 +25,7 @@ from letta.schemas.usage import LettaUsageStatistics
|
|
25
25
|
from letta.schemas.user import User
|
26
26
|
from letta.server.rest_api.interface import StreamingServerInterface
|
27
27
|
from letta.system import get_heartbeat, package_function_response
|
28
|
+
from letta.tracing import tracer
|
28
29
|
|
29
30
|
if TYPE_CHECKING:
|
30
31
|
from letta.server.server import SyncServer
|
@@ -51,18 +52,35 @@ async def sse_async_generator(
|
|
51
52
|
generator: AsyncGenerator,
|
52
53
|
usage_task: Optional[asyncio.Task] = None,
|
53
54
|
finish_message=True,
|
55
|
+
request_start_timestamp_ns: Optional[int] = None,
|
54
56
|
):
|
55
57
|
"""
|
56
58
|
Wraps a generator for use in Server-Sent Events (SSE), handling errors and ensuring a completion message.
|
57
59
|
|
58
60
|
Args:
|
59
61
|
- generator: An asynchronous generator yielding data chunks.
|
62
|
+
- usage_task: Optional task that will return usage statistics.
|
63
|
+
- finish_message: Whether to send a completion message.
|
64
|
+
- request_start_timestamp_ns: Optional ns timestamp when the request started, used to measure time to first token.
|
60
65
|
|
61
66
|
Yields:
|
62
67
|
- Formatted Server-Sent Event strings.
|
63
68
|
"""
|
69
|
+
first_chunk = True
|
70
|
+
ttft_span = None
|
71
|
+
if request_start_timestamp_ns is not None:
|
72
|
+
ttft_span = tracer.start_span("time_to_first_token", start_time=request_start_timestamp_ns)
|
73
|
+
|
64
74
|
try:
|
65
75
|
async for chunk in generator:
|
76
|
+
# Measure time to first token
|
77
|
+
if first_chunk and ttft_span is not None:
|
78
|
+
now = get_utc_timestamp_ns()
|
79
|
+
ttft_ns = now - request_start_timestamp_ns
|
80
|
+
ttft_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ttft_ns // 1_000_000})
|
81
|
+
ttft_span.end()
|
82
|
+
first_chunk = False
|
83
|
+
|
66
84
|
# yield f"data: {json.dumps(chunk)}\n\n"
|
67
85
|
if isinstance(chunk, BaseModel):
|
68
86
|
chunk = chunk.model_dump()
|
@@ -168,6 +186,7 @@ def create_letta_messages_from_llm_response(
|
|
168
186
|
reasoning_content: Optional[List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent]]] = None,
|
169
187
|
pre_computed_assistant_message_id: Optional[str] = None,
|
170
188
|
pre_computed_tool_message_id: Optional[str] = None,
|
189
|
+
llm_batch_item_id: Optional[str] = None,
|
171
190
|
) -> List[Message]:
|
172
191
|
messages = []
|
173
192
|
|
@@ -192,6 +211,7 @@ def create_letta_messages_from_llm_response(
|
|
192
211
|
tool_calls=[tool_call],
|
193
212
|
tool_call_id=tool_call_id,
|
194
213
|
created_at=get_utc_time(),
|
214
|
+
batch_item_id=llm_batch_item_id,
|
195
215
|
)
|
196
216
|
if pre_computed_assistant_message_id:
|
197
217
|
assistant_message.id = pre_computed_assistant_message_id
|
@@ -209,6 +229,7 @@ def create_letta_messages_from_llm_response(
|
|
209
229
|
tool_call_id=tool_call_id,
|
210
230
|
created_at=get_utc_time(),
|
211
231
|
name=function_name,
|
232
|
+
batch_item_id=llm_batch_item_id,
|
212
233
|
)
|
213
234
|
if pre_computed_tool_message_id:
|
214
235
|
tool_message.id = pre_computed_tool_message_id
|
@@ -216,7 +237,7 @@ def create_letta_messages_from_llm_response(
|
|
216
237
|
|
217
238
|
if add_heartbeat_request_system_message:
|
218
239
|
heartbeat_system_message = create_heartbeat_system_message(
|
219
|
-
agent_id=agent_id, model=model, function_call_success=function_call_success, actor=actor
|
240
|
+
agent_id=agent_id, model=model, function_call_success=function_call_success, actor=actor, llm_batch_item_id=llm_batch_item_id
|
220
241
|
)
|
221
242
|
messages.append(heartbeat_system_message)
|
222
243
|
|
@@ -224,10 +245,7 @@ def create_letta_messages_from_llm_response(
|
|
224
245
|
|
225
246
|
|
226
247
|
def create_heartbeat_system_message(
|
227
|
-
agent_id: str,
|
228
|
-
model: str,
|
229
|
-
function_call_success: bool,
|
230
|
-
actor: User,
|
248
|
+
agent_id: str, model: str, function_call_success: bool, actor: User, llm_batch_item_id: Optional[str] = None
|
231
249
|
) -> Message:
|
232
250
|
text_content = REQ_HEARTBEAT_MESSAGE if function_call_success else FUNC_FAILED_HEARTBEAT_MESSAGE
|
233
251
|
heartbeat_system_message = Message(
|
@@ -239,6 +257,7 @@ def create_heartbeat_system_message(
|
|
239
257
|
tool_calls=[],
|
240
258
|
tool_call_id=None,
|
241
259
|
created_at=get_utc_time(),
|
260
|
+
batch_item_id=llm_batch_item_id,
|
242
261
|
)
|
243
262
|
return heartbeat_system_message
|
244
263
|
|
letta/server/server.py
CHANGED
@@ -244,9 +244,15 @@ class SyncServer(Server):
|
|
244
244
|
tool_dir = tool_settings.tool_exec_dir or LETTA_TOOL_EXECUTION_DIR
|
245
245
|
|
246
246
|
venv_dir = Path(tool_dir) / venv_name
|
247
|
-
|
248
|
-
|
247
|
+
tool_path = Path(tool_dir)
|
248
|
+
|
249
|
+
if tool_path.exists() and not tool_path.is_dir():
|
250
|
+
logger.error(f"LETTA_TOOL_SANDBOX_DIR exists but is not a directory: {tool_dir}")
|
249
251
|
else:
|
252
|
+
if not tool_path.exists():
|
253
|
+
logger.warning(f"LETTA_TOOL_SANDBOX_DIR does not exist, creating now: {tool_dir}")
|
254
|
+
tool_path.mkdir(parents=True, exist_ok=True)
|
255
|
+
|
250
256
|
if tool_settings.tool_exec_venv_name and not venv_dir.is_dir():
|
251
257
|
logger.warning(
|
252
258
|
f"Provided LETTA_TOOL_SANDBOX_VENV_NAME is not a valid venv ({venv_dir}), one will be created for you during tool execution."
|
@@ -859,7 +865,7 @@ class SyncServer(Server):
|
|
859
865
|
value=get_persona_text("voice_memory_persona"),
|
860
866
|
),
|
861
867
|
],
|
862
|
-
llm_config=
|
868
|
+
llm_config=LLMConfig.default_config("gpt-4.1"),
|
863
869
|
embedding_config=main_agent.embedding_config,
|
864
870
|
project_id=main_agent.project_id,
|
865
871
|
)
|
@@ -1633,6 +1639,7 @@ class SyncServer(Server):
|
|
1633
1639
|
assistant_message_tool_name: str = constants.DEFAULT_MESSAGE_TOOL,
|
1634
1640
|
assistant_message_tool_kwarg: str = constants.DEFAULT_MESSAGE_TOOL_KWARG,
|
1635
1641
|
metadata: Optional[dict] = None,
|
1642
|
+
request_start_timestamp_ns: Optional[int] = None,
|
1636
1643
|
) -> Union[StreamingResponse, LettaResponse]:
|
1637
1644
|
"""Split off into a separate function so that it can be imported in the /chat/completion proxy."""
|
1638
1645
|
# TODO: @charles is this the correct way to handle?
|
@@ -1717,6 +1724,7 @@ class SyncServer(Server):
|
|
1717
1724
|
streaming_interface.get_generator(),
|
1718
1725
|
usage_task=task,
|
1719
1726
|
finish_message=include_final_message,
|
1727
|
+
request_start_timestamp_ns=request_start_timestamp_ns,
|
1720
1728
|
),
|
1721
1729
|
media_type="text/event-stream",
|
1722
1730
|
)
|
@@ -2,10 +2,11 @@ import datetime
|
|
2
2
|
from typing import Any, Dict, List, Optional, Tuple
|
3
3
|
|
4
4
|
from anthropic.types.beta.messages import BetaMessageBatch, BetaMessageBatchIndividualResponse
|
5
|
-
from sqlalchemy import func, tuple_
|
5
|
+
from sqlalchemy import desc, func, tuple_
|
6
6
|
|
7
7
|
from letta.jobs.types import BatchPollingResult, ItemUpdateInfo, RequestStatusUpdateInfo, StepStatusUpdateInfo
|
8
8
|
from letta.log import get_logger
|
9
|
+
from letta.orm import Message as MessageModel
|
9
10
|
from letta.orm.llm_batch_items import LLMBatchItem
|
10
11
|
from letta.orm.llm_batch_job import LLMBatchJob
|
11
12
|
from letta.schemas.agent import AgentStepState
|
@@ -13,6 +14,7 @@ from letta.schemas.enums import AgentStepStatus, JobStatus, ProviderType
|
|
13
14
|
from letta.schemas.llm_batch_job import LLMBatchItem as PydanticLLMBatchItem
|
14
15
|
from letta.schemas.llm_batch_job import LLMBatchJob as PydanticLLMBatchJob
|
15
16
|
from letta.schemas.llm_config import LLMConfig
|
17
|
+
from letta.schemas.message import Message as PydanticMessage
|
16
18
|
from letta.schemas.user import User as PydanticUser
|
17
19
|
from letta.utils import enforce_types
|
18
20
|
|
@@ -142,6 +144,62 @@ class LLMBatchManager:
|
|
142
144
|
batch = LLMBatchJob.read(db_session=session, identifier=llm_batch_id, actor=actor)
|
143
145
|
batch.hard_delete(db_session=session, actor=actor)
|
144
146
|
|
147
|
+
@enforce_types
|
148
|
+
def get_messages_for_letta_batch(
|
149
|
+
self,
|
150
|
+
letta_batch_job_id: str,
|
151
|
+
limit: int = 100,
|
152
|
+
actor: Optional[PydanticUser] = None,
|
153
|
+
agent_id: Optional[str] = None,
|
154
|
+
sort_descending: bool = True,
|
155
|
+
cursor: Optional[str] = None, # Message ID as cursor
|
156
|
+
) -> List[PydanticMessage]:
|
157
|
+
"""
|
158
|
+
Retrieve messages across all LLM batch jobs associated with a Letta batch job.
|
159
|
+
Optimized for PostgreSQL performance using ID-based keyset pagination.
|
160
|
+
"""
|
161
|
+
with self.session_maker() as session:
|
162
|
+
# If cursor is provided, get sequence_id for that message
|
163
|
+
cursor_sequence_id = None
|
164
|
+
if cursor:
|
165
|
+
cursor_query = session.query(MessageModel.sequence_id).filter(MessageModel.id == cursor).limit(1)
|
166
|
+
cursor_result = cursor_query.first()
|
167
|
+
if cursor_result:
|
168
|
+
cursor_sequence_id = cursor_result[0]
|
169
|
+
else:
|
170
|
+
# If cursor message doesn't exist, ignore it
|
171
|
+
pass
|
172
|
+
|
173
|
+
query = (
|
174
|
+
session.query(MessageModel)
|
175
|
+
.join(LLMBatchItem, MessageModel.batch_item_id == LLMBatchItem.id)
|
176
|
+
.join(LLMBatchJob, LLMBatchItem.llm_batch_id == LLMBatchJob.id)
|
177
|
+
.filter(LLMBatchJob.letta_batch_job_id == letta_batch_job_id)
|
178
|
+
)
|
179
|
+
|
180
|
+
if actor is not None:
|
181
|
+
query = query.filter(MessageModel.organization_id == actor.organization_id)
|
182
|
+
|
183
|
+
if agent_id is not None:
|
184
|
+
query = query.filter(MessageModel.agent_id == agent_id)
|
185
|
+
|
186
|
+
# Apply cursor-based pagination if cursor exists
|
187
|
+
if cursor_sequence_id is not None:
|
188
|
+
if sort_descending:
|
189
|
+
query = query.filter(MessageModel.sequence_id < cursor_sequence_id)
|
190
|
+
else:
|
191
|
+
query = query.filter(MessageModel.sequence_id > cursor_sequence_id)
|
192
|
+
|
193
|
+
if sort_descending:
|
194
|
+
query = query.order_by(desc(MessageModel.sequence_id))
|
195
|
+
else:
|
196
|
+
query = query.order_by(MessageModel.sequence_id)
|
197
|
+
|
198
|
+
query = query.limit(limit)
|
199
|
+
|
200
|
+
results = query.all()
|
201
|
+
return [message.to_pydantic() for message in results]
|
202
|
+
|
145
203
|
@enforce_types
|
146
204
|
def list_running_llm_batches(self, actor: Optional[PydanticUser] = None) -> List[PydanticLLMBatchJob]:
|
147
205
|
"""Return all running LLM batch jobs, optionally filtered by actor's organization."""
|
@@ -196,6 +254,7 @@ class LLMBatchManager:
|
|
196
254
|
orm_items = []
|
197
255
|
for item in llm_batch_items:
|
198
256
|
orm_item = LLMBatchItem(
|
257
|
+
id=item.id,
|
199
258
|
llm_batch_id=item.llm_batch_id,
|
200
259
|
agent_id=item.agent_id,
|
201
260
|
llm_config=item.llm_config,
|
@@ -1,9 +1,8 @@
|
|
1
1
|
import asyncio
|
2
2
|
import json
|
3
3
|
import traceback
|
4
|
-
from typing import List, Tuple
|
4
|
+
from typing import List, Optional, Tuple
|
5
5
|
|
6
|
-
from letta.agents.voice_sleeptime_agent import VoiceSleeptimeAgent
|
7
6
|
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
8
7
|
from letta.log import get_logger
|
9
8
|
from letta.schemas.enums import MessageRole
|
@@ -22,7 +21,11 @@ class Summarizer:
|
|
22
21
|
"""
|
23
22
|
|
24
23
|
def __init__(
|
25
|
-
self,
|
24
|
+
self,
|
25
|
+
mode: SummarizationMode,
|
26
|
+
summarizer_agent: Optional["VoiceSleeptimeAgent"] = None,
|
27
|
+
message_buffer_limit: int = 10,
|
28
|
+
message_buffer_min: int = 3,
|
26
29
|
):
|
27
30
|
self.mode = mode
|
28
31
|
|
@@ -90,39 +93,42 @@ class Summarizer:
|
|
90
93
|
logger.info("Nothing to evict, returning in context messages as is.")
|
91
94
|
return all_in_context_messages, False
|
92
95
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
96
|
+
if self.summarizer_agent:
|
97
|
+
# Only invoke if summarizer agent is passed in
|
98
|
+
|
99
|
+
evicted_messages = all_in_context_messages[1:target_trim_index]
|
100
|
+
|
101
|
+
# Format
|
102
|
+
formatted_evicted_messages = format_transcript(evicted_messages)
|
103
|
+
formatted_in_context_messages = format_transcript(updated_in_context_messages)
|
104
|
+
|
105
|
+
# TODO: This is hyperspecific to voice, generalize!
|
106
|
+
# Update the message transcript of the memory agent
|
107
|
+
self.summarizer_agent.update_message_transcript(message_transcripts=formatted_evicted_messages + formatted_in_context_messages)
|
108
|
+
|
109
|
+
# Add line numbers to the formatted messages
|
110
|
+
line_number = 0
|
111
|
+
for i in range(len(formatted_evicted_messages)):
|
112
|
+
formatted_evicted_messages[i] = f"{line_number}. " + formatted_evicted_messages[i]
|
113
|
+
line_number += 1
|
114
|
+
for i in range(len(formatted_in_context_messages)):
|
115
|
+
formatted_in_context_messages[i] = f"{line_number}. " + formatted_in_context_messages[i]
|
116
|
+
line_number += 1
|
117
|
+
|
118
|
+
evicted_messages_str = "\n".join(formatted_evicted_messages)
|
119
|
+
in_context_messages_str = "\n".join(formatted_in_context_messages)
|
120
|
+
summary_request_text = f"""You’re a memory-recall helper for an AI that can only keep the last {self.message_buffer_min} messages. Scan the conversation history, focusing on messages about to drop out of that window, and write crisp notes that capture any important facts or insights about the human so they aren’t lost.
|
121
|
+
|
122
|
+
(Older) Evicted Messages:\n
|
123
|
+
{evicted_messages_str}\n
|
124
|
+
|
125
|
+
(Newer) In-Context Messages:\n
|
126
|
+
{in_context_messages_str}
|
127
|
+
"""
|
128
|
+
# Fire-and-forget the summarization task
|
129
|
+
self.fire_and_forget(
|
130
|
+
self.summarizer_agent.step([MessageCreate(role=MessageRole.user, content=[TextContent(text=summary_request_text)])])
|
131
|
+
)
|
126
132
|
|
127
133
|
return [all_in_context_messages[0]] + updated_in_context_messages, True
|
128
134
|
|
letta/settings.py
CHANGED
@@ -209,6 +209,7 @@ class Settings(BaseSettings):
|
|
209
209
|
# cron job parameters
|
210
210
|
enable_batch_job_polling: bool = False
|
211
211
|
poll_running_llm_batches_interval_seconds: int = 5 * 60
|
212
|
+
poll_lock_retry_interval_seconds: int = 5 * 60
|
212
213
|
|
213
214
|
@property
|
214
215
|
def letta_pg_uri(self) -> str:
|
letta/tracing.py
CHANGED
@@ -75,6 +75,11 @@ async def update_trace_attributes(request: Request):
|
|
75
75
|
for key, value in request.path_params.items():
|
76
76
|
span.set_attribute(f"http.{key}", value)
|
77
77
|
|
78
|
+
# Add user ID if available
|
79
|
+
user_id = request.headers.get("user_id")
|
80
|
+
if user_id:
|
81
|
+
span.set_attribute("user.id", user_id)
|
82
|
+
|
78
83
|
# Add request body if available
|
79
84
|
try:
|
80
85
|
body = await request.json()
|