letta-nightly 0.7.8.dev20250502104219__py3-none-any.whl → 0.7.9.dev20250502222710__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. letta/__init__.py +2 -2
  2. letta/agents/helpers.py +58 -1
  3. letta/agents/letta_agent.py +13 -3
  4. letta/agents/letta_agent_batch.py +33 -17
  5. letta/agents/voice_agent.py +1 -2
  6. letta/agents/voice_sleeptime_agent.py +75 -320
  7. letta/functions/function_sets/multi_agent.py +1 -1
  8. letta/functions/function_sets/voice.py +20 -32
  9. letta/functions/helpers.py +7 -7
  10. letta/helpers/datetime_helpers.py +6 -0
  11. letta/helpers/message_helper.py +19 -18
  12. letta/jobs/scheduler.py +233 -49
  13. letta/llm_api/google_ai_client.py +13 -4
  14. letta/llm_api/google_vertex_client.py +5 -1
  15. letta/llm_api/openai.py +10 -2
  16. letta/llm_api/openai_client.py +14 -2
  17. letta/orm/message.py +4 -0
  18. letta/prompts/system/voice_sleeptime.txt +2 -3
  19. letta/schemas/letta_message.py +1 -0
  20. letta/schemas/letta_request.py +8 -1
  21. letta/schemas/letta_response.py +5 -0
  22. letta/schemas/llm_batch_job.py +6 -4
  23. letta/schemas/llm_config.py +9 -0
  24. letta/schemas/message.py +23 -2
  25. letta/schemas/providers.py +3 -1
  26. letta/server/rest_api/app.py +15 -7
  27. letta/server/rest_api/routers/v1/agents.py +3 -0
  28. letta/server/rest_api/routers/v1/messages.py +46 -1
  29. letta/server/rest_api/routers/v1/steps.py +1 -1
  30. letta/server/rest_api/utils.py +25 -6
  31. letta/server/server.py +11 -3
  32. letta/services/llm_batch_manager.py +60 -1
  33. letta/services/message_manager.py +1 -0
  34. letta/services/summarizer/summarizer.py +42 -36
  35. letta/settings.py +1 -0
  36. letta/tracing.py +5 -0
  37. {letta_nightly-0.7.8.dev20250502104219.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/METADATA +2 -2
  38. {letta_nightly-0.7.8.dev20250502104219.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/RECORD +41 -41
  39. {letta_nightly-0.7.8.dev20250502104219.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/LICENSE +0 -0
  40. {letta_nightly-0.7.8.dev20250502104219.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/WHEEL +0 -0
  41. {letta_nightly-0.7.8.dev20250502104219.dist-info → letta_nightly-0.7.9.dev20250502222710.dist-info}/entry_points.txt +0 -0
letta/schemas/message.py CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import copy
4
4
  import json
5
+ import re
5
6
  import uuid
6
7
  import warnings
7
8
  from collections import OrderedDict
@@ -84,6 +85,7 @@ class MessageCreate(BaseModel):
84
85
  name: Optional[str] = Field(None, description="The name of the participant.")
85
86
  otid: Optional[str] = Field(None, description="The offline threading id associated with this message")
86
87
  sender_id: Optional[str] = Field(None, description="The id of the sender of the message, can be an identity id or agent id")
88
+ batch_item_id: Optional[str] = Field(None, description="The id of the LLMBatchItem that this message is associated with")
87
89
  group_id: Optional[str] = Field(None, description="The multi-agent group that the message was sent in")
88
90
 
89
91
  def model_dump(self, to_orm: bool = False, **kwargs) -> Dict[str, Any]:
@@ -137,6 +139,11 @@ class Message(BaseMessage):
137
139
  created_at (datetime): The time the message was created.
138
140
  tool_calls (List[OpenAIToolCall,]): The list of tool calls requested.
139
141
  tool_call_id (str): The id of the tool call.
142
+ step_id (str): The id of the step that this message was created in.
143
+ otid (str): The offline threading id associated with this message.
144
+ tool_returns (List[ToolReturn]): The list of tool returns requested.
145
+ group_id (str): The multi-agent group that the message was sent in.
146
+ sender_id (str): The id of the sender of the message, can be an identity id or agent id.
140
147
 
141
148
  """
142
149
 
@@ -162,6 +169,7 @@ class Message(BaseMessage):
162
169
  tool_returns: Optional[List[ToolReturn]] = Field(None, description="Tool execution return information for prior tool calls")
163
170
  group_id: Optional[str] = Field(None, description="The multi-agent group that the message was sent in")
164
171
  sender_id: Optional[str] = Field(None, description="The id of the sender of the message, can be an identity id or agent id")
172
+ batch_item_id: Optional[str] = Field(None, description="The id of the LLMBatchItem that this message is associated with")
165
173
  # This overrides the optional base orm schema, created_at MUST exist on all messages objects
166
174
  created_at: datetime = Field(default_factory=get_utc_time, description="The timestamp when the object was created.")
167
175
 
@@ -252,6 +260,7 @@ class Message(BaseMessage):
252
260
  name=self.name,
253
261
  otid=otid,
254
262
  sender_id=self.sender_id,
263
+ step_id=self.step_id,
255
264
  )
256
265
  )
257
266
  # Otherwise, we may have a list of multiple types
@@ -269,6 +278,7 @@ class Message(BaseMessage):
269
278
  name=self.name,
270
279
  otid=otid,
271
280
  sender_id=self.sender_id,
281
+ step_id=self.step_id,
272
282
  )
273
283
  )
274
284
  elif isinstance(content_part, ReasoningContent):
@@ -282,6 +292,7 @@ class Message(BaseMessage):
282
292
  signature=content_part.signature,
283
293
  name=self.name,
284
294
  otid=otid,
295
+ step_id=self.step_id,
285
296
  )
286
297
  )
287
298
  elif isinstance(content_part, RedactedReasoningContent):
@@ -295,6 +306,7 @@ class Message(BaseMessage):
295
306
  name=self.name,
296
307
  otid=otid,
297
308
  sender_id=self.sender_id,
309
+ step_id=self.step_id,
298
310
  )
299
311
  )
300
312
  elif isinstance(content_part, OmittedReasoningContent):
@@ -307,6 +319,7 @@ class Message(BaseMessage):
307
319
  state="omitted",
308
320
  name=self.name,
309
321
  otid=otid,
322
+ step_id=self.step_id,
310
323
  )
311
324
  )
312
325
  else:
@@ -333,6 +346,7 @@ class Message(BaseMessage):
333
346
  name=self.name,
334
347
  otid=otid,
335
348
  sender_id=self.sender_id,
349
+ step_id=self.step_id,
336
350
  )
337
351
  )
338
352
  else:
@@ -348,6 +362,7 @@ class Message(BaseMessage):
348
362
  name=self.name,
349
363
  otid=otid,
350
364
  sender_id=self.sender_id,
365
+ step_id=self.step_id,
351
366
  )
352
367
  )
353
368
  elif self.role == MessageRole.tool:
@@ -391,6 +406,7 @@ class Message(BaseMessage):
391
406
  name=self.name,
392
407
  otid=self.id.replace("message-", ""),
393
408
  sender_id=self.sender_id,
409
+ step_id=self.step_id,
394
410
  )
395
411
  )
396
412
  elif self.role == MessageRole.user:
@@ -409,6 +425,7 @@ class Message(BaseMessage):
409
425
  name=self.name,
410
426
  otid=self.otid,
411
427
  sender_id=self.sender_id,
428
+ step_id=self.step_id,
412
429
  )
413
430
  )
414
431
  elif self.role == MessageRole.system:
@@ -426,6 +443,7 @@ class Message(BaseMessage):
426
443
  name=self.name,
427
444
  otid=self.otid,
428
445
  sender_id=self.sender_id,
446
+ step_id=self.step_id,
429
447
  )
430
448
  )
431
449
  else:
@@ -700,9 +718,12 @@ class Message(BaseMessage):
700
718
  else:
701
719
  raise ValueError(self.role)
702
720
 
703
- # Optional field, do not include if null
721
+ # Optional field, do not include if null or invalid
704
722
  if self.name is not None:
705
- openai_message["name"] = self.name
723
+ if bool(re.match(r"^[^\s<|\\/>]+$", self.name)):
724
+ openai_message["name"] = self.name
725
+ else:
726
+ warnings.warn(f"Using OpenAI with invalid 'name' field (name={self.name} role={self.role}).")
706
727
 
707
728
  if parse_content_parts:
708
729
  for content in self.content:
@@ -201,7 +201,9 @@ class OpenAIProvider(Provider):
201
201
  # for openai, filter models
202
202
  if self.base_url == "https://api.openai.com/v1":
203
203
  allowed_types = ["gpt-4", "o1", "o3"]
204
- disallowed_types = ["transcribe", "search", "realtime", "tts", "audio", "computer"]
204
+ # NOTE: o1-mini and o1-preview do not support tool calling
205
+ # NOTE: o1-pro is only available in Responses API
206
+ disallowed_types = ["transcribe", "search", "realtime", "tts", "audio", "computer", "o1-mini", "o1-preview", "o1-pro"]
205
207
  skip = True
206
208
  for model_type in allowed_types:
207
209
  if model_name.startswith(model_type):
@@ -17,7 +17,7 @@ from letta.__init__ import __version__
17
17
  from letta.agents.exceptions import IncompatibleAgentType
18
18
  from letta.constants import ADMIN_PREFIX, API_PREFIX, OPENAI_API_PREFIX
19
19
  from letta.errors import BedrockPermissionError, LettaAgentNotFoundError, LettaUserNotFoundError
20
- from letta.jobs.scheduler import shutdown_cron_scheduler, start_cron_jobs
20
+ from letta.jobs.scheduler import shutdown_scheduler_and_release_lock, start_scheduler_with_leader_election
21
21
  from letta.log import get_logger
22
22
  from letta.orm.errors import DatabaseTimeoutError, ForeignKeyConstraintViolationError, NoResultFound, UniqueConstraintViolationError
23
23
  from letta.schemas.letta_message import create_letta_message_union_schema
@@ -150,10 +150,10 @@ def create_application() -> "FastAPI":
150
150
  loop.set_default_executor(executor)
151
151
 
152
152
  @app.on_event("startup")
153
- def on_startup():
153
+ async def on_startup():
154
154
  global server
155
155
 
156
- start_cron_jobs(server)
156
+ await start_scheduler_with_leader_election(server)
157
157
 
158
158
  @app.on_event("shutdown")
159
159
  def shutdown_mcp_clients():
@@ -170,9 +170,16 @@ def create_application() -> "FastAPI":
170
170
  t.start()
171
171
  t.join()
172
172
 
173
- @app.on_event("shutdown")
174
- def shutdown_scheduler():
175
- shutdown_cron_scheduler()
173
+ @app.exception_handler(IncompatibleAgentType)
174
+ async def handle_incompatible_agent_type(request: Request, exc: IncompatibleAgentType):
175
+ return JSONResponse(
176
+ status_code=400,
177
+ content={
178
+ "detail": str(exc),
179
+ "expected_type": exc.expected_type,
180
+ "actual_type": exc.actual_type,
181
+ },
182
+ )
176
183
 
177
184
  @app.exception_handler(IncompatibleAgentType)
178
185
  async def handle_incompatible_agent_type(request: Request, exc: IncompatibleAgentType):
@@ -322,9 +329,10 @@ def create_application() -> "FastAPI":
322
329
  generate_openapi_schema(app)
323
330
 
324
331
  @app.on_event("shutdown")
325
- def on_shutdown():
332
+ async def on_shutdown():
326
333
  global server
327
334
  # server = None
335
+ await shutdown_scheduler_and_release_lock()
328
336
 
329
337
  return app
330
338
 
@@ -13,6 +13,7 @@ from starlette.responses import Response, StreamingResponse
13
13
 
14
14
  from letta.agents.letta_agent import LettaAgent
15
15
  from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
16
+ from letta.helpers.datetime_helpers import get_utc_timestamp_ns
16
17
  from letta.log import get_logger
17
18
  from letta.orm.errors import NoResultFound
18
19
  from letta.schemas.agent import AgentState, AgentType, CreateAgent, UpdateAgent
@@ -684,6 +685,7 @@ async def send_message_streaming(
684
685
  This endpoint accepts a message from a user and processes it through the agent.
685
686
  It will stream the steps of the response always, and stream the tokens if 'stream_tokens' is set to True.
686
687
  """
688
+ request_start_timestamp_ns = get_utc_timestamp_ns()
687
689
  actor = server.user_manager.get_user_or_default(user_id=actor_id)
688
690
  # TODO: This is redundant, remove soon
689
691
  agent = server.agent_manager.get_agent_by_id(agent_id, actor)
@@ -719,6 +721,7 @@ async def send_message_streaming(
719
721
  use_assistant_message=request.use_assistant_message,
720
722
  assistant_message_tool_name=request.assistant_message_tool_name,
721
723
  assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
724
+ request_start_timestamp_ns=request_start_timestamp_ns,
722
725
  )
723
726
 
724
727
  return result
@@ -1,6 +1,6 @@
1
1
  from typing import List, Optional
2
2
 
3
- from fastapi import APIRouter, Body, Depends, Header, status
3
+ from fastapi import APIRouter, Body, Depends, Header, Query, status
4
4
  from fastapi.exceptions import HTTPException
5
5
  from starlette.requests import Request
6
6
 
@@ -9,6 +9,7 @@ from letta.log import get_logger
9
9
  from letta.orm.errors import NoResultFound
10
10
  from letta.schemas.job import BatchJob, JobStatus, JobType, JobUpdate
11
11
  from letta.schemas.letta_request import CreateBatch
12
+ from letta.schemas.letta_response import LettaBatchMessages
12
13
  from letta.server.rest_api.utils import get_letta_server
13
14
  from letta.server.server import SyncServer
14
15
  from letta.settings import settings
@@ -123,6 +124,50 @@ async def list_batch_runs(
123
124
  return [BatchJob.from_job(job) for job in jobs]
124
125
 
125
126
 
127
+ @router.get(
128
+ "/batches/{batch_id}/messages",
129
+ response_model=LettaBatchMessages,
130
+ operation_id="list_batch_messages",
131
+ )
132
+ async def list_batch_messages(
133
+ batch_id: str,
134
+ limit: int = Query(100, description="Maximum number of messages to return"),
135
+ cursor: Optional[str] = Query(
136
+ None, description="Message ID to use as pagination cursor (get messages before/after this ID) depending on sort_descending."
137
+ ),
138
+ agent_id: Optional[str] = Query(None, description="Filter messages by agent ID"),
139
+ sort_descending: bool = Query(True, description="Sort messages by creation time (true=newest first)"),
140
+ actor_id: Optional[str] = Header(None, alias="user_id"),
141
+ server: SyncServer = Depends(get_letta_server),
142
+ ):
143
+ """
144
+ Get messages for a specific batch job.
145
+
146
+ Returns messages associated with the batch in chronological order.
147
+
148
+ Pagination:
149
+ - For the first page, omit the cursor parameter
150
+ - For subsequent pages, use the ID of the last message from the previous response as the cursor
151
+ - Results will include messages before/after the cursor based on sort_descending
152
+ """
153
+ actor = server.user_manager.get_user_or_default(user_id=actor_id)
154
+
155
+ # First, verify the batch job exists and the user has access to it
156
+ try:
157
+ job = server.job_manager.get_job_by_id(job_id=batch_id, actor=actor)
158
+ BatchJob.from_job(job)
159
+ except NoResultFound:
160
+ raise HTTPException(status_code=404, detail="Batch not found")
161
+
162
+ # Get messages directly using our efficient method
163
+ # We'll need to update the underlying implementation to use message_id as cursor
164
+ messages = server.batch_manager.get_messages_for_letta_batch(
165
+ letta_batch_job_id=batch_id, limit=limit, actor=actor, agent_id=agent_id, sort_descending=sort_descending, cursor=cursor
166
+ )
167
+
168
+ return LettaBatchMessages(messages=messages)
169
+
170
+
126
171
  @router.patch("/batches/{batch_id}/cancel", operation_id="cancel_batch_run")
127
172
  async def cancel_batch_run(
128
173
  batch_id: str,
@@ -11,7 +11,7 @@ from letta.server.server import SyncServer
11
11
  router = APIRouter(prefix="/steps", tags=["steps"])
12
12
 
13
13
 
14
- @router.get("", response_model=List[Step], operation_id="list_steps")
14
+ @router.get("/", response_model=List[Step], operation_id="list_steps")
15
15
  def list_steps(
16
16
  before: Optional[str] = Query(None, description="Return steps before this step ID"),
17
17
  after: Optional[str] = Query(None, description="Return steps after this step ID"),
@@ -15,7 +15,7 @@ from pydantic import BaseModel
15
15
 
16
16
  from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE
17
17
  from letta.errors import ContextWindowExceededError, RateLimitExceededError
18
- from letta.helpers.datetime_helpers import get_utc_time
18
+ from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns
19
19
  from letta.helpers.message_helper import convert_message_creates_to_messages
20
20
  from letta.log import get_logger
21
21
  from letta.schemas.enums import MessageRole
@@ -25,6 +25,7 @@ from letta.schemas.usage import LettaUsageStatistics
25
25
  from letta.schemas.user import User
26
26
  from letta.server.rest_api.interface import StreamingServerInterface
27
27
  from letta.system import get_heartbeat, package_function_response
28
+ from letta.tracing import tracer
28
29
 
29
30
  if TYPE_CHECKING:
30
31
  from letta.server.server import SyncServer
@@ -51,18 +52,35 @@ async def sse_async_generator(
51
52
  generator: AsyncGenerator,
52
53
  usage_task: Optional[asyncio.Task] = None,
53
54
  finish_message=True,
55
+ request_start_timestamp_ns: Optional[int] = None,
54
56
  ):
55
57
  """
56
58
  Wraps a generator for use in Server-Sent Events (SSE), handling errors and ensuring a completion message.
57
59
 
58
60
  Args:
59
61
  - generator: An asynchronous generator yielding data chunks.
62
+ - usage_task: Optional task that will return usage statistics.
63
+ - finish_message: Whether to send a completion message.
64
+ - request_start_timestamp_ns: Optional ns timestamp when the request started, used to measure time to first token.
60
65
 
61
66
  Yields:
62
67
  - Formatted Server-Sent Event strings.
63
68
  """
69
+ first_chunk = True
70
+ ttft_span = None
71
+ if request_start_timestamp_ns is not None:
72
+ ttft_span = tracer.start_span("time_to_first_token", start_time=request_start_timestamp_ns)
73
+
64
74
  try:
65
75
  async for chunk in generator:
76
+ # Measure time to first token
77
+ if first_chunk and ttft_span is not None:
78
+ now = get_utc_timestamp_ns()
79
+ ttft_ns = now - request_start_timestamp_ns
80
+ ttft_span.add_event(name="time_to_first_token_ms", attributes={"ttft_ms": ttft_ns // 1_000_000})
81
+ ttft_span.end()
82
+ first_chunk = False
83
+
66
84
  # yield f"data: {json.dumps(chunk)}\n\n"
67
85
  if isinstance(chunk, BaseModel):
68
86
  chunk = chunk.model_dump()
@@ -168,6 +186,7 @@ def create_letta_messages_from_llm_response(
168
186
  reasoning_content: Optional[List[Union[TextContent, ReasoningContent, RedactedReasoningContent, OmittedReasoningContent]]] = None,
169
187
  pre_computed_assistant_message_id: Optional[str] = None,
170
188
  pre_computed_tool_message_id: Optional[str] = None,
189
+ llm_batch_item_id: Optional[str] = None,
171
190
  ) -> List[Message]:
172
191
  messages = []
173
192
 
@@ -192,6 +211,7 @@ def create_letta_messages_from_llm_response(
192
211
  tool_calls=[tool_call],
193
212
  tool_call_id=tool_call_id,
194
213
  created_at=get_utc_time(),
214
+ batch_item_id=llm_batch_item_id,
195
215
  )
196
216
  if pre_computed_assistant_message_id:
197
217
  assistant_message.id = pre_computed_assistant_message_id
@@ -209,6 +229,7 @@ def create_letta_messages_from_llm_response(
209
229
  tool_call_id=tool_call_id,
210
230
  created_at=get_utc_time(),
211
231
  name=function_name,
232
+ batch_item_id=llm_batch_item_id,
212
233
  )
213
234
  if pre_computed_tool_message_id:
214
235
  tool_message.id = pre_computed_tool_message_id
@@ -216,7 +237,7 @@ def create_letta_messages_from_llm_response(
216
237
 
217
238
  if add_heartbeat_request_system_message:
218
239
  heartbeat_system_message = create_heartbeat_system_message(
219
- agent_id=agent_id, model=model, function_call_success=function_call_success, actor=actor
240
+ agent_id=agent_id, model=model, function_call_success=function_call_success, actor=actor, llm_batch_item_id=llm_batch_item_id
220
241
  )
221
242
  messages.append(heartbeat_system_message)
222
243
 
@@ -224,10 +245,7 @@ def create_letta_messages_from_llm_response(
224
245
 
225
246
 
226
247
  def create_heartbeat_system_message(
227
- agent_id: str,
228
- model: str,
229
- function_call_success: bool,
230
- actor: User,
248
+ agent_id: str, model: str, function_call_success: bool, actor: User, llm_batch_item_id: Optional[str] = None
231
249
  ) -> Message:
232
250
  text_content = REQ_HEARTBEAT_MESSAGE if function_call_success else FUNC_FAILED_HEARTBEAT_MESSAGE
233
251
  heartbeat_system_message = Message(
@@ -239,6 +257,7 @@ def create_heartbeat_system_message(
239
257
  tool_calls=[],
240
258
  tool_call_id=None,
241
259
  created_at=get_utc_time(),
260
+ batch_item_id=llm_batch_item_id,
242
261
  )
243
262
  return heartbeat_system_message
244
263
 
letta/server/server.py CHANGED
@@ -244,9 +244,15 @@ class SyncServer(Server):
244
244
  tool_dir = tool_settings.tool_exec_dir or LETTA_TOOL_EXECUTION_DIR
245
245
 
246
246
  venv_dir = Path(tool_dir) / venv_name
247
- if not Path(tool_dir).is_dir():
248
- logger.error(f"Provided LETTA_TOOL_SANDBOX_DIR is not a valid directory: {tool_dir}")
247
+ tool_path = Path(tool_dir)
248
+
249
+ if tool_path.exists() and not tool_path.is_dir():
250
+ logger.error(f"LETTA_TOOL_SANDBOX_DIR exists but is not a directory: {tool_dir}")
249
251
  else:
252
+ if not tool_path.exists():
253
+ logger.warning(f"LETTA_TOOL_SANDBOX_DIR does not exist, creating now: {tool_dir}")
254
+ tool_path.mkdir(parents=True, exist_ok=True)
255
+
250
256
  if tool_settings.tool_exec_venv_name and not venv_dir.is_dir():
251
257
  logger.warning(
252
258
  f"Provided LETTA_TOOL_SANDBOX_VENV_NAME is not a valid venv ({venv_dir}), one will be created for you during tool execution."
@@ -859,7 +865,7 @@ class SyncServer(Server):
859
865
  value=get_persona_text("voice_memory_persona"),
860
866
  ),
861
867
  ],
862
- llm_config=main_agent.llm_config,
868
+ llm_config=LLMConfig.default_config("gpt-4.1"),
863
869
  embedding_config=main_agent.embedding_config,
864
870
  project_id=main_agent.project_id,
865
871
  )
@@ -1633,6 +1639,7 @@ class SyncServer(Server):
1633
1639
  assistant_message_tool_name: str = constants.DEFAULT_MESSAGE_TOOL,
1634
1640
  assistant_message_tool_kwarg: str = constants.DEFAULT_MESSAGE_TOOL_KWARG,
1635
1641
  metadata: Optional[dict] = None,
1642
+ request_start_timestamp_ns: Optional[int] = None,
1636
1643
  ) -> Union[StreamingResponse, LettaResponse]:
1637
1644
  """Split off into a separate function so that it can be imported in the /chat/completion proxy."""
1638
1645
  # TODO: @charles is this the correct way to handle?
@@ -1717,6 +1724,7 @@ class SyncServer(Server):
1717
1724
  streaming_interface.get_generator(),
1718
1725
  usage_task=task,
1719
1726
  finish_message=include_final_message,
1727
+ request_start_timestamp_ns=request_start_timestamp_ns,
1720
1728
  ),
1721
1729
  media_type="text/event-stream",
1722
1730
  )
@@ -2,10 +2,11 @@ import datetime
2
2
  from typing import Any, Dict, List, Optional, Tuple
3
3
 
4
4
  from anthropic.types.beta.messages import BetaMessageBatch, BetaMessageBatchIndividualResponse
5
- from sqlalchemy import func, tuple_
5
+ from sqlalchemy import desc, func, tuple_
6
6
 
7
7
  from letta.jobs.types import BatchPollingResult, ItemUpdateInfo, RequestStatusUpdateInfo, StepStatusUpdateInfo
8
8
  from letta.log import get_logger
9
+ from letta.orm import Message as MessageModel
9
10
  from letta.orm.llm_batch_items import LLMBatchItem
10
11
  from letta.orm.llm_batch_job import LLMBatchJob
11
12
  from letta.schemas.agent import AgentStepState
@@ -13,6 +14,7 @@ from letta.schemas.enums import AgentStepStatus, JobStatus, ProviderType
13
14
  from letta.schemas.llm_batch_job import LLMBatchItem as PydanticLLMBatchItem
14
15
  from letta.schemas.llm_batch_job import LLMBatchJob as PydanticLLMBatchJob
15
16
  from letta.schemas.llm_config import LLMConfig
17
+ from letta.schemas.message import Message as PydanticMessage
16
18
  from letta.schemas.user import User as PydanticUser
17
19
  from letta.utils import enforce_types
18
20
 
@@ -142,6 +144,62 @@ class LLMBatchManager:
142
144
  batch = LLMBatchJob.read(db_session=session, identifier=llm_batch_id, actor=actor)
143
145
  batch.hard_delete(db_session=session, actor=actor)
144
146
 
147
+ @enforce_types
148
+ def get_messages_for_letta_batch(
149
+ self,
150
+ letta_batch_job_id: str,
151
+ limit: int = 100,
152
+ actor: Optional[PydanticUser] = None,
153
+ agent_id: Optional[str] = None,
154
+ sort_descending: bool = True,
155
+ cursor: Optional[str] = None, # Message ID as cursor
156
+ ) -> List[PydanticMessage]:
157
+ """
158
+ Retrieve messages across all LLM batch jobs associated with a Letta batch job.
159
+ Optimized for PostgreSQL performance using ID-based keyset pagination.
160
+ """
161
+ with self.session_maker() as session:
162
+ # If cursor is provided, get sequence_id for that message
163
+ cursor_sequence_id = None
164
+ if cursor:
165
+ cursor_query = session.query(MessageModel.sequence_id).filter(MessageModel.id == cursor).limit(1)
166
+ cursor_result = cursor_query.first()
167
+ if cursor_result:
168
+ cursor_sequence_id = cursor_result[0]
169
+ else:
170
+ # If cursor message doesn't exist, ignore it
171
+ pass
172
+
173
+ query = (
174
+ session.query(MessageModel)
175
+ .join(LLMBatchItem, MessageModel.batch_item_id == LLMBatchItem.id)
176
+ .join(LLMBatchJob, LLMBatchItem.llm_batch_id == LLMBatchJob.id)
177
+ .filter(LLMBatchJob.letta_batch_job_id == letta_batch_job_id)
178
+ )
179
+
180
+ if actor is not None:
181
+ query = query.filter(MessageModel.organization_id == actor.organization_id)
182
+
183
+ if agent_id is not None:
184
+ query = query.filter(MessageModel.agent_id == agent_id)
185
+
186
+ # Apply cursor-based pagination if cursor exists
187
+ if cursor_sequence_id is not None:
188
+ if sort_descending:
189
+ query = query.filter(MessageModel.sequence_id < cursor_sequence_id)
190
+ else:
191
+ query = query.filter(MessageModel.sequence_id > cursor_sequence_id)
192
+
193
+ if sort_descending:
194
+ query = query.order_by(desc(MessageModel.sequence_id))
195
+ else:
196
+ query = query.order_by(MessageModel.sequence_id)
197
+
198
+ query = query.limit(limit)
199
+
200
+ results = query.all()
201
+ return [message.to_pydantic() for message in results]
202
+
145
203
  @enforce_types
146
204
  def list_running_llm_batches(self, actor: Optional[PydanticUser] = None) -> List[PydanticLLMBatchJob]:
147
205
  """Return all running LLM batch jobs, optionally filtered by actor's organization."""
@@ -196,6 +254,7 @@ class LLMBatchManager:
196
254
  orm_items = []
197
255
  for item in llm_batch_items:
198
256
  orm_item = LLMBatchItem(
257
+ id=item.id,
199
258
  llm_batch_id=item.llm_batch_id,
200
259
  agent_id=item.agent_id,
201
260
  llm_config=item.llm_config,
@@ -73,6 +73,7 @@ class MessageManager:
73
73
  Returns:
74
74
  List of created Pydantic message models
75
75
  """
76
+
76
77
  if not pydantic_msgs:
77
78
  return []
78
79
 
@@ -1,9 +1,8 @@
1
1
  import asyncio
2
2
  import json
3
3
  import traceback
4
- from typing import List, Tuple
4
+ from typing import List, Optional, Tuple
5
5
 
6
- from letta.agents.voice_sleeptime_agent import VoiceSleeptimeAgent
7
6
  from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
8
7
  from letta.log import get_logger
9
8
  from letta.schemas.enums import MessageRole
@@ -22,7 +21,11 @@ class Summarizer:
22
21
  """
23
22
 
24
23
  def __init__(
25
- self, mode: SummarizationMode, summarizer_agent: VoiceSleeptimeAgent, message_buffer_limit: int = 10, message_buffer_min: int = 3
24
+ self,
25
+ mode: SummarizationMode,
26
+ summarizer_agent: Optional["VoiceSleeptimeAgent"] = None,
27
+ message_buffer_limit: int = 10,
28
+ message_buffer_min: int = 3,
26
29
  ):
27
30
  self.mode = mode
28
31
 
@@ -90,39 +93,42 @@ class Summarizer:
90
93
  logger.info("Nothing to evict, returning in context messages as is.")
91
94
  return all_in_context_messages, False
92
95
 
93
- evicted_messages = all_in_context_messages[1:target_trim_index]
94
-
95
- # Format
96
- formatted_evicted_messages = format_transcript(evicted_messages)
97
- formatted_in_context_messages = format_transcript(updated_in_context_messages)
98
-
99
- # Update the message transcript of the memory agent
100
- self.summarizer_agent.update_message_transcript(message_transcripts=formatted_evicted_messages + formatted_in_context_messages)
101
-
102
- # Add line numbers to the formatted messages
103
- line_number = 0
104
- for i in range(len(formatted_evicted_messages)):
105
- formatted_evicted_messages[i] = f"{line_number}. " + formatted_evicted_messages[i]
106
- line_number += 1
107
- for i in range(len(formatted_in_context_messages)):
108
- formatted_in_context_messages[i] = f"{line_number}. " + formatted_in_context_messages[i]
109
- line_number += 1
110
-
111
- evicted_messages_str = "\n".join(formatted_evicted_messages)
112
- in_context_messages_str = "\n".join(formatted_in_context_messages)
113
- summary_request_text = f"""You’re a memory-recall helper for an AI that can only keep the last {self.message_buffer_min} messages. Scan the conversation history, focusing on messages about to drop out of that window, and write crisp notes that capture any important facts or insights about the human so they aren’t lost.
114
-
115
- (Older) Evicted Messages:\n
116
- {evicted_messages_str}\n
117
-
118
- (Newer) In-Context Messages:\n
119
- {in_context_messages_str}
120
- """
121
- print(summary_request_text)
122
- # Fire-and-forget the summarization task
123
- self.fire_and_forget(
124
- self.summarizer_agent.step([MessageCreate(role=MessageRole.user, content=[TextContent(text=summary_request_text)])])
125
- )
96
+ if self.summarizer_agent:
97
+ # Only invoke if summarizer agent is passed in
98
+
99
+ evicted_messages = all_in_context_messages[1:target_trim_index]
100
+
101
+ # Format
102
+ formatted_evicted_messages = format_transcript(evicted_messages)
103
+ formatted_in_context_messages = format_transcript(updated_in_context_messages)
104
+
105
+ # TODO: This is hyperspecific to voice, generalize!
106
+ # Update the message transcript of the memory agent
107
+ self.summarizer_agent.update_message_transcript(message_transcripts=formatted_evicted_messages + formatted_in_context_messages)
108
+
109
+ # Add line numbers to the formatted messages
110
+ line_number = 0
111
+ for i in range(len(formatted_evicted_messages)):
112
+ formatted_evicted_messages[i] = f"{line_number}. " + formatted_evicted_messages[i]
113
+ line_number += 1
114
+ for i in range(len(formatted_in_context_messages)):
115
+ formatted_in_context_messages[i] = f"{line_number}. " + formatted_in_context_messages[i]
116
+ line_number += 1
117
+
118
+ evicted_messages_str = "\n".join(formatted_evicted_messages)
119
+ in_context_messages_str = "\n".join(formatted_in_context_messages)
120
+ summary_request_text = f"""You’re a memory-recall helper for an AI that can only keep the last {self.message_buffer_min} messages. Scan the conversation history, focusing on messages about to drop out of that window, and write crisp notes that capture any important facts or insights about the human so they aren’t lost.
121
+
122
+ (Older) Evicted Messages:\n
123
+ {evicted_messages_str}\n
124
+
125
+ (Newer) In-Context Messages:\n
126
+ {in_context_messages_str}
127
+ """
128
+ # Fire-and-forget the summarization task
129
+ self.fire_and_forget(
130
+ self.summarizer_agent.step([MessageCreate(role=MessageRole.user, content=[TextContent(text=summary_request_text)])])
131
+ )
126
132
 
127
133
  return [all_in_context_messages[0]] + updated_in_context_messages, True
128
134
 
letta/settings.py CHANGED
@@ -209,6 +209,7 @@ class Settings(BaseSettings):
209
209
  # cron job parameters
210
210
  enable_batch_job_polling: bool = False
211
211
  poll_running_llm_batches_interval_seconds: int = 5 * 60
212
+ poll_lock_retry_interval_seconds: int = 5 * 60
212
213
 
213
214
  @property
214
215
  def letta_pg_uri(self) -> str:
letta/tracing.py CHANGED
@@ -75,6 +75,11 @@ async def update_trace_attributes(request: Request):
75
75
  for key, value in request.path_params.items():
76
76
  span.set_attribute(f"http.{key}", value)
77
77
 
78
+ # Add user ID if available
79
+ user_id = request.headers.get("user_id")
80
+ if user_id:
81
+ span.set_attribute("user.id", user_id)
82
+
78
83
  # Add request body if available
79
84
  try:
80
85
  body = await request.json()