letta-nightly 0.11.7.dev20250912104045__py3-none-any.whl → 0.11.7.dev20250913103940__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -149,7 +149,7 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
149
149
  request_json=self.request_data,
150
150
  response_json={
151
151
  "content": {
152
- "tool_call": self.tool_call.model_dump_json(),
152
+ "tool_call": self.tool_call.model_dump_json() if self.tool_call else None,
153
153
  "reasoning": [content.model_dump_json() for content in self.reasoning_content],
154
154
  },
155
155
  "id": self.interface.message_id,
@@ -19,7 +19,7 @@ from letta.agents.helpers import (
19
19
  generate_step_id,
20
20
  )
21
21
  from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX
22
- from letta.errors import ContextWindowExceededError
22
+ from letta.errors import ContextWindowExceededError, LLMError
23
23
  from letta.helpers import ToolRulesSolver
24
24
  from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns, ns_to_ms
25
25
  from letta.helpers.reasoning_helper import scrub_inner_thoughts_from_messages
@@ -306,7 +306,7 @@ class LettaAgentV2(BaseAgentV2):
306
306
  )
307
307
 
308
308
  except:
309
- if self.stop_reason:
309
+ if self.stop_reason and not first_chunk:
310
310
  yield f"data: {self.stop_reason.model_dump_json()}\n\n"
311
311
  raise
312
312
 
@@ -431,6 +431,9 @@ class LettaAgentV2(BaseAgentV2):
431
431
  except ValueError as e:
432
432
  self.stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response.value)
433
433
  raise e
434
+ except LLMError as e:
435
+ self.stop_reason = LettaStopReason(stop_reason=StopReasonType.llm_api_error.value)
436
+ raise e
434
437
  except Exception as e:
435
438
  if isinstance(e, ContextWindowExceededError) and llm_request_attempt < summarizer_settings.max_summarizer_retries:
436
439
  # Retry case
@@ -497,6 +500,17 @@ class LettaAgentV2(BaseAgentV2):
497
500
  if include_return_message_types is None or message.message_type in include_return_message_types:
498
501
  yield message
499
502
 
503
+ # Persist approval responses immediately to prevent agent from getting into a bad state
504
+ if (
505
+ len(input_messages_to_persist) == 1
506
+ and input_messages_to_persist[0].role == "approval"
507
+ and persisted_messages[0].role == "approval"
508
+ and persisted_messages[1].role == "tool"
509
+ ):
510
+ self.agent_state.message_ids = self.agent_state.message_ids + [m.id for m in persisted_messages[:2]]
511
+ await self.agent_manager.update_message_ids_async(
512
+ agent_id=self.agent_state.id, message_ids=self.agent_state.message_ids, actor=self.actor
513
+ )
500
514
  step_progression, step_metrics = await self._step_checkpoint_finish(step_metrics, agent_step_span, logged_step)
501
515
  except Exception as e:
502
516
  self.logger.error(f"Error during step processing: {e}")
@@ -511,6 +525,7 @@ class LettaAgentV2(BaseAgentV2):
511
525
  StopReasonType.no_tool_call,
512
526
  StopReasonType.invalid_tool_call,
513
527
  StopReasonType.invalid_llm_response,
528
+ StopReasonType.llm_api_error,
514
529
  ):
515
530
  self.logger.error("Error occurred during step processing, with unexpected stop reason: %s", self.stop_reason.stop_reason)
516
531
  raise e
@@ -278,6 +278,8 @@ class OpenAIStreamingInterface:
278
278
  self.prev_assistant_message_id = self.function_id_buffer
279
279
  # Reset message reader at the start of a new send_message stream
280
280
  self.assistant_message_json_reader.reset()
281
+ self.assistant_message_json_reader.in_message = True
282
+ self.assistant_message_json_reader.message_started = True
281
283
 
282
284
  else:
283
285
  if prev_message_type and prev_message_type != "tool_call_message":
@@ -497,7 +497,7 @@ class AnthropicClient(LLMClientBase):
497
497
  try:
498
498
  args_json = json.loads(arguments)
499
499
  if not isinstance(args_json, dict):
500
- raise ValueError("Expected parseable json object for arguments")
500
+ raise LLMServerError("Expected parseable json object for arguments")
501
501
  except:
502
502
  arguments = str(tool_input["function"]["arguments"])
503
503
  else:
@@ -854,7 +854,7 @@ def remap_finish_reason(stop_reason: str) -> str:
854
854
  elif stop_reason == "tool_use":
855
855
  return "function_call"
856
856
  else:
857
- raise ValueError(f"Unexpected stop_reason: {stop_reason}")
857
+ raise LLMServerError(f"Unexpected stop_reason: {stop_reason}")
858
858
 
859
859
 
860
860
  def strip_xml_tags(string: str, tag: Optional[str]) -> str:
@@ -54,9 +54,12 @@ class AzureClient(OpenAIClient):
54
54
  api_key = model_settings.azure_api_key or os.environ.get("AZURE_API_KEY")
55
55
  base_url = model_settings.azure_base_url or os.environ.get("AZURE_BASE_URL")
56
56
  api_version = model_settings.azure_api_version or os.environ.get("AZURE_API_VERSION")
57
+ try:
58
+ client = AsyncAzureOpenAI(api_key=api_key, azure_endpoint=base_url, api_version=api_version)
59
+ response: ChatCompletion = await client.chat.completions.create(**request_data)
60
+ except Exception as e:
61
+ raise self.handle_llm_error(e)
57
62
 
58
- client = AsyncAzureOpenAI(api_key=api_key, azure_endpoint=base_url, api_version=api_version)
59
- response: ChatCompletion = await client.chat.completions.create(**request_data)
60
63
  return response.model_dump()
61
64
 
62
65
  @trace_method
@@ -14,6 +14,19 @@ from google.genai.types import (
14
14
  )
15
15
 
16
16
  from letta.constants import NON_USER_MSG_PREFIX
17
+ from letta.errors import (
18
+ ContextWindowExceededError,
19
+ ErrorCode,
20
+ LLMAuthenticationError,
21
+ LLMBadRequestError,
22
+ LLMConnectionError,
23
+ LLMNotFoundError,
24
+ LLMPermissionDeniedError,
25
+ LLMRateLimitError,
26
+ LLMServerError,
27
+ LLMTimeoutError,
28
+ LLMUnprocessableEntityError,
29
+ )
17
30
  from letta.helpers.datetime_helpers import get_utc_time_int
18
31
  from letta.helpers.json_helpers import json_dumps, json_loads
19
32
  from letta.llm_api.llm_client_base import LLMClientBase
@@ -48,13 +61,16 @@ class GoogleVertexClient(LLMClientBase):
48
61
  """
49
62
  Performs underlying request to llm and returns raw response.
50
63
  """
51
- client = self._get_client()
52
- response = client.models.generate_content(
53
- model=llm_config.model,
54
- contents=request_data["contents"],
55
- config=request_data["config"],
56
- )
57
- return response.model_dump()
64
+ try:
65
+ client = self._get_client()
66
+ response = client.models.generate_content(
67
+ model=llm_config.model,
68
+ contents=request_data["contents"],
69
+ config=request_data["config"],
70
+ )
71
+ return response.model_dump()
72
+ except Exception as e:
73
+ raise self.handle_llm_error(e)
58
74
 
59
75
  @trace_method
60
76
  async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
@@ -77,15 +93,15 @@ class GoogleVertexClient(LLMClientBase):
77
93
  )
78
94
  except errors.APIError as e:
79
95
  # Retry on 503 and 500 errors as well, usually ephemeral from Gemini
80
- if e.code == 503 or e.code == 500:
96
+ if e.code == 503 or e.code == 500 or e.code == 504:
81
97
  logger.warning(f"Received {e}, retrying {retry_count}/{self.MAX_RETRIES}")
82
98
  retry_count += 1
83
99
  if retry_count > self.MAX_RETRIES:
84
- raise e
100
+ raise self.handle_llm_error(e)
85
101
  continue
86
- raise e
102
+ raise self.handle_llm_error(e)
87
103
  except Exception as e:
88
- raise e
104
+ raise self.handle_llm_error(e)
89
105
  response_data = response.model_dump()
90
106
  is_malformed_function_call = self.is_malformed_function_call(response_data)
91
107
  if is_malformed_function_call:
@@ -363,11 +379,10 @@ class GoogleVertexClient(LLMClientBase):
363
379
 
364
380
  if content is None or content.role is None or content.parts is None:
365
381
  # This means the response is malformed like MALFORMED_FUNCTION_CALL
366
- # NOTE: must be a ValueError to trigger a retry
367
382
  if candidate.finish_reason == "MALFORMED_FUNCTION_CALL":
368
- raise ValueError(f"Error in response data from LLM: {candidate.finish_reason}")
383
+ raise LLMServerError(f"Malformed response from Google Vertex: {candidate.finish_reason}")
369
384
  else:
370
- raise ValueError(f"Error in response data from LLM: {candidate.model_dump()}")
385
+ raise LLMServerError(f"Invalid response data from Google Vertex: {candidate.model_dump()}")
371
386
 
372
387
  role = content.role
373
388
  assert role == "model", f"Unknown role in response: {role}"
@@ -461,7 +476,7 @@ class GoogleVertexClient(LLMClientBase):
461
476
 
462
477
  except json.decoder.JSONDecodeError:
463
478
  if candidate.finish_reason == "MAX_TOKENS":
464
- raise ValueError("Could not parse response data from LLM: exceeded max token limit")
479
+ raise LLMServerError("Could not parse response data from LLM: exceeded max token limit")
465
480
  # Inner thoughts are the content by default
466
481
  inner_thoughts = response_message.text
467
482
 
@@ -490,7 +505,7 @@ class GoogleVertexClient(LLMClientBase):
490
505
  elif finish_reason == "RECITATION":
491
506
  openai_finish_reason = "content_filter"
492
507
  else:
493
- raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
508
+ raise LLMServerError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
494
509
 
495
510
  choices.append(
496
511
  Choice(
@@ -581,5 +596,127 @@ class GoogleVertexClient(LLMClientBase):
581
596
 
582
597
  @trace_method
583
598
  def handle_llm_error(self, e: Exception) -> Exception:
584
- # Fallback to base implementation
599
+ # Handle Google GenAI specific errors
600
+ if isinstance(e, errors.ClientError):
601
+ logger.warning(f"[Google Vertex] Client error ({e.code}): {e}")
602
+
603
+ # Handle specific error codes
604
+ if e.code == 400:
605
+ error_str = str(e).lower()
606
+ if "context" in error_str and ("exceed" in error_str or "limit" in error_str or "too long" in error_str):
607
+ return ContextWindowExceededError(
608
+ message=f"Bad request to Google Vertex (context window exceeded): {str(e)}",
609
+ )
610
+ else:
611
+ return LLMBadRequestError(
612
+ message=f"Bad request to Google Vertex: {str(e)}",
613
+ code=ErrorCode.INTERNAL_SERVER_ERROR,
614
+ )
615
+ elif e.code == 401:
616
+ return LLMAuthenticationError(
617
+ message=f"Authentication failed with Google Vertex: {str(e)}",
618
+ code=ErrorCode.INTERNAL_SERVER_ERROR,
619
+ )
620
+ elif e.code == 403:
621
+ return LLMPermissionDeniedError(
622
+ message=f"Permission denied by Google Vertex: {str(e)}",
623
+ code=ErrorCode.INTERNAL_SERVER_ERROR,
624
+ )
625
+ elif e.code == 404:
626
+ return LLMNotFoundError(
627
+ message=f"Resource not found in Google Vertex: {str(e)}",
628
+ code=ErrorCode.INTERNAL_SERVER_ERROR,
629
+ )
630
+ elif e.code == 408:
631
+ return LLMTimeoutError(
632
+ message=f"Request to Google Vertex timed out: {str(e)}",
633
+ code=ErrorCode.TIMEOUT,
634
+ details={"cause": str(e.__cause__) if e.__cause__ else None},
635
+ )
636
+ elif e.code == 422:
637
+ return LLMUnprocessableEntityError(
638
+ message=f"Invalid request content for Google Vertex: {str(e)}",
639
+ code=ErrorCode.INTERNAL_SERVER_ERROR,
640
+ )
641
+ elif e.code == 429:
642
+ logger.warning("[Google Vertex] Rate limited (429). Consider backoff.")
643
+ return LLMRateLimitError(
644
+ message=f"Rate limited by Google Vertex: {str(e)}",
645
+ code=ErrorCode.RATE_LIMIT_EXCEEDED,
646
+ )
647
+ else:
648
+ return LLMServerError(
649
+ message=f"Google Vertex client error: {str(e)}",
650
+ code=ErrorCode.INTERNAL_SERVER_ERROR,
651
+ details={
652
+ "status_code": e.code,
653
+ "response_json": getattr(e, "response_json", None),
654
+ },
655
+ )
656
+
657
+ if isinstance(e, errors.ServerError):
658
+ logger.warning(f"[Google Vertex] Server error ({e.code}): {e}")
659
+
660
+ # Handle specific server error codes
661
+ if e.code == 500:
662
+ return LLMServerError(
663
+ message=f"Google Vertex internal server error: {str(e)}",
664
+ code=ErrorCode.INTERNAL_SERVER_ERROR,
665
+ details={
666
+ "status_code": e.code,
667
+ "response_json": getattr(e, "response_json", None),
668
+ },
669
+ )
670
+ elif e.code == 502:
671
+ return LLMConnectionError(
672
+ message=f"Bad gateway from Google Vertex: {str(e)}",
673
+ code=ErrorCode.INTERNAL_SERVER_ERROR,
674
+ details={"cause": str(e.__cause__) if e.__cause__ else None},
675
+ )
676
+ elif e.code == 503:
677
+ return LLMServerError(
678
+ message=f"Google Vertex service unavailable: {str(e)}",
679
+ code=ErrorCode.INTERNAL_SERVER_ERROR,
680
+ details={
681
+ "status_code": e.code,
682
+ "response_json": getattr(e, "response_json", None),
683
+ },
684
+ )
685
+ elif e.code == 504:
686
+ return LLMTimeoutError(
687
+ message=f"Gateway timeout from Google Vertex: {str(e)}",
688
+ code=ErrorCode.TIMEOUT,
689
+ details={"cause": str(e.__cause__) if e.__cause__ else None},
690
+ )
691
+ else:
692
+ return LLMServerError(
693
+ message=f"Google Vertex server error: {str(e)}",
694
+ code=ErrorCode.INTERNAL_SERVER_ERROR,
695
+ details={
696
+ "status_code": e.code,
697
+ "response_json": getattr(e, "response_json", None),
698
+ },
699
+ )
700
+
701
+ if isinstance(e, errors.APIError):
702
+ logger.warning(f"[Google Vertex] API error ({e.code}): {e}")
703
+ return LLMServerError(
704
+ message=f"Google Vertex API error: {str(e)}",
705
+ code=ErrorCode.INTERNAL_SERVER_ERROR,
706
+ details={
707
+ "status_code": e.code,
708
+ "response_json": getattr(e, "response_json", None),
709
+ },
710
+ )
711
+
712
+ # Handle connection-related errors
713
+ if "connection" in str(e).lower() or "timeout" in str(e).lower():
714
+ logger.warning(f"[Google Vertex] Connection/timeout error: {e}")
715
+ return LLMConnectionError(
716
+ message=f"Failed to connect to Google Vertex: {str(e)}",
717
+ code=ErrorCode.INTERNAL_SERVER_ERROR,
718
+ details={"cause": str(e.__cause__) if e.__cause__ else None},
719
+ )
720
+
721
+ # Fallback to base implementation for other errors
585
722
  return super().handle_llm_error(e)
@@ -99,7 +99,7 @@ def supports_structured_output(llm_config: LLMConfig) -> bool:
99
99
 
100
100
  # FIXME pretty hacky - turn off for providers we know users will use,
101
101
  # but also don't support structured output
102
- if "nebius.com" in llm_config.model_endpoint:
102
+ if llm_config.model_endpoint and "nebius.com" in llm_config.model_endpoint:
103
103
  return False
104
104
  else:
105
105
  return True
@@ -108,7 +108,7 @@ def supports_structured_output(llm_config: LLMConfig) -> bool:
108
108
  # TODO move into LLMConfig as a field?
109
109
  def requires_auto_tool_choice(llm_config: LLMConfig) -> bool:
110
110
  """Certain providers require the tool choice to be set to 'auto'."""
111
- if "nebius.com" in llm_config.model_endpoint:
111
+ if llm_config.model_endpoint and "nebius.com" in llm_config.model_endpoint:
112
112
  return True
113
113
  if llm_config.handle and "vllm" in llm_config.handle:
114
114
  return True
@@ -168,7 +168,9 @@ class OpenAIClient(LLMClientBase):
168
168
  # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
169
169
  # TODO(fix)
170
170
  inner_thoughts_desc = (
171
- INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST if ":1234" in llm_config.model_endpoint else INNER_THOUGHTS_KWARG_DESCRIPTION
171
+ INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
172
+ if llm_config.model_endpoint and ":1234" in llm_config.model_endpoint
173
+ else INNER_THOUGHTS_KWARG_DESCRIPTION
172
174
  )
173
175
  tools = add_inner_thoughts_to_functions(
174
176
  functions=tools,
@@ -146,11 +146,16 @@ def _instrument_engine_events(engine: Engine) -> None:
146
146
  span.end()
147
147
  context._sync_instrumentation_span = None
148
148
 
149
- def handle_cursor_error(conn, cursor, statement, parameters, context, executemany):
149
+ def handle_cursor_error(exception_context):
150
150
  """Handle cursor execution errors."""
151
151
  if not _config["enabled"]:
152
152
  return
153
153
 
154
+ # Extract context from exception_context
155
+ context = getattr(exception_context, "execution_context", None)
156
+ if not context:
157
+ return
158
+
154
159
  span = getattr(context, "_sync_instrumentation_span", None)
155
160
  if span:
156
161
  span.set_status(Status(StatusCode.ERROR, "Database operation failed"))
@@ -9,6 +9,7 @@ from letta.schemas.enums import JobStatus
9
9
  class StopReasonType(str, Enum):
10
10
  end_turn = "end_turn"
11
11
  error = "error"
12
+ llm_api_error = "llm_api_error"
12
13
  invalid_llm_response = "invalid_llm_response"
13
14
  invalid_tool_call = "invalid_tool_call"
14
15
  max_steps = "max_steps"
@@ -31,6 +32,7 @@ class StopReasonType(str, Enum):
31
32
  StopReasonType.invalid_tool_call,
32
33
  StopReasonType.no_tool_call,
33
34
  StopReasonType.invalid_llm_response,
35
+ StopReasonType.llm_api_error,
34
36
  ):
35
37
  return JobStatus.failed
36
38
  elif self == StopReasonType.cancelled:
@@ -17,7 +17,15 @@ from starlette.middleware.cors import CORSMiddleware
17
17
  from letta.__init__ import __version__ as letta_version
18
18
  from letta.agents.exceptions import IncompatibleAgentType
19
19
  from letta.constants import ADMIN_PREFIX, API_PREFIX, OPENAI_API_PREFIX
20
- from letta.errors import BedrockPermissionError, LettaAgentNotFoundError, LettaUserNotFoundError
20
+ from letta.errors import (
21
+ BedrockPermissionError,
22
+ LettaAgentNotFoundError,
23
+ LettaUserNotFoundError,
24
+ LLMAuthenticationError,
25
+ LLMError,
26
+ LLMRateLimitError,
27
+ LLMTimeoutError,
28
+ )
21
29
  from letta.helpers.pinecone_utils import get_pinecone_indices, should_use_pinecone, upsert_pinecone_indices
22
30
  from letta.jobs.scheduler import start_scheduler_with_leader_election
23
31
  from letta.log import get_logger
@@ -276,6 +284,58 @@ def create_application() -> "FastAPI":
276
284
  },
277
285
  )
278
286
 
287
+ @app.exception_handler(LLMTimeoutError)
288
+ async def llm_timeout_error_handler(request: Request, exc: LLMTimeoutError):
289
+ return JSONResponse(
290
+ status_code=504,
291
+ content={
292
+ "error": {
293
+ "type": "llm_timeout",
294
+ "message": "The LLM request timed out. Please try again.",
295
+ "detail": str(exc),
296
+ }
297
+ },
298
+ )
299
+
300
+ @app.exception_handler(LLMRateLimitError)
301
+ async def llm_rate_limit_error_handler(request: Request, exc: LLMRateLimitError):
302
+ return JSONResponse(
303
+ status_code=429,
304
+ content={
305
+ "error": {
306
+ "type": "llm_rate_limit",
307
+ "message": "Rate limit exceeded for LLM model provider. Please wait before making another request.",
308
+ "detail": str(exc),
309
+ }
310
+ },
311
+ )
312
+
313
+ @app.exception_handler(LLMAuthenticationError)
314
+ async def llm_auth_error_handler(request: Request, exc: LLMAuthenticationError):
315
+ return JSONResponse(
316
+ status_code=401,
317
+ content={
318
+ "error": {
319
+ "type": "llm_authentication",
320
+ "message": "Authentication failed with the LLM model provider.",
321
+ "detail": str(exc),
322
+ }
323
+ },
324
+ )
325
+
326
+ @app.exception_handler(LLMError)
327
+ async def llm_error_handler(request: Request, exc: LLMError):
328
+ return JSONResponse(
329
+ status_code=502,
330
+ content={
331
+ "error": {
332
+ "type": "llm_error",
333
+ "message": "An error occurred with the LLM request.",
334
+ "detail": str(exc),
335
+ }
336
+ },
337
+ )
338
+
279
339
  settings.cors_origins.append("https://app.letta.com")
280
340
 
281
341
  if (os.getenv("LETTA_SERVER_SECURE") == "true") or "--secure" in sys.argv:
@@ -536,9 +536,7 @@ async def attach_source(
536
536
 
537
537
  if agent_state.enable_sleeptime:
538
538
  source = await server.source_manager.get_source_by_id(source_id=source_id)
539
- safe_create_task(
540
- server.sleeptime_document_ingest_async(agent_state, source, actor), logger=logger, label="sleeptime_document_ingest_async"
541
- )
539
+ safe_create_task(server.sleeptime_document_ingest_async(agent_state, source, actor), label="sleeptime_document_ingest_async")
542
540
 
543
541
  return agent_state
544
542
 
@@ -565,9 +563,7 @@ async def attach_folder_to_agent(
565
563
 
566
564
  if agent_state.enable_sleeptime:
567
565
  source = await server.source_manager.get_source_by_id(source_id=folder_id)
568
- safe_create_task(
569
- server.sleeptime_document_ingest_async(agent_state, source, actor), logger=logger, label="sleeptime_document_ingest_async"
570
- )
566
+ safe_create_task(server.sleeptime_document_ingest_async(agent_state, source, actor), label="sleeptime_document_ingest_async")
571
567
 
572
568
  return agent_state
573
569
 
@@ -1320,15 +1316,55 @@ async def send_message_streaming(
1320
1316
  try:
1321
1317
  if agent_eligible and model_compatible:
1322
1318
  agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
1323
- raw_stream = agent_loop.stream(
1324
- input_messages=request.messages,
1325
- max_steps=request.max_steps,
1326
- stream_tokens=request.stream_tokens and model_compatible_token_streaming,
1327
- run_id=run.id if run else None,
1328
- use_assistant_message=request.use_assistant_message,
1329
- request_start_timestamp_ns=request_start_timestamp_ns,
1330
- include_return_message_types=request.include_return_message_types,
1331
- )
1319
+
1320
+ async def error_aware_stream():
1321
+ """Stream that handles early LLM errors gracefully in streaming format."""
1322
+ from letta.errors import LLMAuthenticationError, LLMError, LLMRateLimitError, LLMTimeoutError
1323
+
1324
+ try:
1325
+ stream = agent_loop.stream(
1326
+ input_messages=request.messages,
1327
+ max_steps=request.max_steps,
1328
+ stream_tokens=request.stream_tokens and model_compatible_token_streaming,
1329
+ run_id=run.id if run else None,
1330
+ use_assistant_message=request.use_assistant_message,
1331
+ request_start_timestamp_ns=request_start_timestamp_ns,
1332
+ include_return_message_types=request.include_return_message_types,
1333
+ )
1334
+ async for chunk in stream:
1335
+ yield chunk
1336
+
1337
+ except LLMTimeoutError as e:
1338
+ error_data = {
1339
+ "error": {"type": "llm_timeout", "message": "The LLM request timed out. Please try again.", "detail": str(e)}
1340
+ }
1341
+ yield (f"data: {json.dumps(error_data)}\n\n", 504)
1342
+ except LLMRateLimitError as e:
1343
+ error_data = {
1344
+ "error": {
1345
+ "type": "llm_rate_limit",
1346
+ "message": "Rate limit exceeded for LLM model provider. Please wait before making another request.",
1347
+ "detail": str(e),
1348
+ }
1349
+ }
1350
+ yield (f"data: {json.dumps(error_data)}\n\n", 429)
1351
+ except LLMAuthenticationError as e:
1352
+ error_data = {
1353
+ "error": {
1354
+ "type": "llm_authentication",
1355
+ "message": "Authentication failed with the LLM model provider.",
1356
+ "detail": str(e),
1357
+ }
1358
+ }
1359
+ yield (f"data: {json.dumps(error_data)}\n\n", 401)
1360
+ except LLMError as e:
1361
+ error_data = {"error": {"type": "llm_error", "message": "An error occurred with the LLM request.", "detail": str(e)}}
1362
+ yield (f"data: {json.dumps(error_data)}\n\n", 502)
1363
+ except Exception as e:
1364
+ error_data = {"error": {"type": "internal_error", "message": "An internal server error occurred.", "detail": str(e)}}
1365
+ yield (f"data: {json.dumps(error_data)}\n\n", 500)
1366
+
1367
+ raw_stream = error_aware_stream()
1332
1368
 
1333
1369
  from letta.server.rest_api.streaming_response import StreamingResponseWithStatusCode, add_keepalive_to_stream
1334
1370
 
@@ -218,8 +218,17 @@ class JobManager:
218
218
  """
219
219
  try:
220
220
  job_update_builder = partial(JobUpdate, status=new_status)
221
+
222
+ # If metadata is provided, merge it with existing metadata
221
223
  if metadata:
222
- job_update_builder = partial(job_update_builder, metadata=metadata)
224
+ # Get the current job to access existing metadata
225
+ current_job = await self.get_job_by_id_async(job_id=job_id, actor=actor)
226
+ merged_metadata = {}
227
+ if current_job.metadata:
228
+ merged_metadata.update(current_job.metadata)
229
+ merged_metadata.update(metadata)
230
+ job_update_builder = partial(job_update_builder, metadata=merged_metadata)
231
+
223
232
  if new_status.is_terminal:
224
233
  job_update_builder = partial(job_update_builder, completed_at=get_utc_time())
225
234
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: letta-nightly
3
- Version: 0.11.7.dev20250912104045
3
+ Version: 0.11.7.dev20250913103940
4
4
  Summary: Create LLM agents with long-term memory and custom tools
5
5
  Author-email: Letta Team <contact@letta.com>
6
6
  License: Apache License
@@ -16,7 +16,7 @@ letta/system.py,sha256=kHF7n3Viq7gV5UIUEXixod2gWa2jroUgztpEzMC1Sew,8925
16
16
  letta/utils.py,sha256=bSq3St7MUw9gN1g0ICdOhNNaUFYBC3EfJLG6qsRLSFA,43290
17
17
  letta/adapters/letta_llm_adapter.py,sha256=11wkOkEQfPXUuJoJxbK22wCa-8gnWiDAb3UOXOxLt5U,3427
18
18
  letta/adapters/letta_llm_request_adapter.py,sha256=wJhK5M_qOhRPAhgMmYI7EJcM8Op19tClnXe0kJ29a3Q,4831
19
- letta/adapters/letta_llm_stream_adapter.py,sha256=Q6nFr8uKc1DyAHHiHxHGNmqhRIScEKXO3TwsBgqW5QI,7630
19
+ letta/adapters/letta_llm_stream_adapter.py,sha256=G8IqtXor0LUuW-dKtGJWsUt6DfJreVCn5h6W2lHEPBg,7658
20
20
  letta/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
21
  letta/agents/agent_loop.py,sha256=cTSlGt1g9aZWG5vIMYtzdeJG1UcrqfjpLGmZU6j89zU,854
22
22
  letta/agents/base_agent.py,sha256=rUAcPxWmTnmi50AWOXwrWc-v5sPIod0W_xXaPQShjcE,8540
@@ -27,7 +27,7 @@ letta/agents/exceptions.py,sha256=BQY4D4w32OYHM63CM19ko7dPwZiAzUs3NbKvzmCTcJg,31
27
27
  letta/agents/helpers.py,sha256=eCHsvZEkTe0L_uZHYkfNAztsEJW0FTnKZMgVbqlI0Yg,11618
28
28
  letta/agents/letta_agent.py,sha256=6nRTh5kzUpqK7eNMk4DlcgEoPmDxFmRb5ysoVHa-vh8,99488
29
29
  letta/agents/letta_agent_batch.py,sha256=17RpYVXpGh9dlKxdMOLMCOHWFsi6N5S9FJHxooxkJCI,27998
30
- letta/agents/letta_agent_v2.py,sha256=Xs54mewx9SgHHFAz8uLJ_6OHv9RHU1PtkwAB_Pu0XMk,58992
30
+ letta/agents/letta_agent_v2.py,sha256=vMzVZL6Px5XBODEh3BHbbADtBSDIuNJrjsHqMGQfnwg,59930
31
31
  letta/agents/voice_agent.py,sha256=y-n6qadfKsswvGODzXH02pLIQQ44wnaDSE6oUgKHVkA,23381
32
32
  letta/agents/voice_sleeptime_agent.py,sha256=_JzCbWBOKrmo1cTaqZFTrQudpJEapwAyrXYtAHUILGo,8675
33
33
  letta/cli/cli.py,sha256=tKtghlX36Rp0_HbkMosvlAapL07JXhA0vKLGTNKnxSQ,1615
@@ -85,7 +85,7 @@ letta/humans/examples/cs_phd.txt,sha256=9C9ZAV_VuG7GB31ksy3-_NAyk8rjE6YtVOkhp08k
85
85
  letta/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
86
86
  letta/interfaces/anthropic_streaming_interface.py,sha256=0VyK8kTRgCLNDLQN6vX1gJ0dfJhqguL_NL1GYgFr6fU,25614
87
87
  letta/interfaces/openai_chat_completions_streaming_interface.py,sha256=3xHXh8cW79EkiMUTYfvcH_s92nkLjxXfvtVOVC3bfLo,5050
88
- letta/interfaces/openai_streaming_interface.py,sha256=t_TKcZSH0Bv_ajOh2mTd4RetrCr-rahkjmGIZIIGDXQ,23593
88
+ letta/interfaces/openai_streaming_interface.py,sha256=abmtQhWWbXSZGTPBPbMGuAJCyMo9euwttPsjI6joiVU,23768
89
89
  letta/interfaces/utils.py,sha256=c6jvO0dBYHh8DQnlN-B0qeNC64d3CSunhfqlFA4pJTY,278
90
90
  letta/jobs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
91
91
  letta/jobs/helpers.py,sha256=kO4aj954xsQ1RAmkjY6LQQ7JEIGuhaxB1e9pzrYKHAY,914
@@ -93,13 +93,13 @@ letta/jobs/llm_batch_job_polling.py,sha256=HUCTa1lTOiLAB_8m95RUfeNJa4lxlF8paGdCV
93
93
  letta/jobs/scheduler.py,sha256=Ub5VTCA8P5C9Y-0mPK2YIPJSEzKbSd2l5Sp0sOWctD8,8697
94
94
  letta/jobs/types.py,sha256=K8GKEnqEgAT6Kq4F2hUrBC4ZAFM9OkfOjVMStzxKuXQ,742
95
95
  letta/llm_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
96
- letta/llm_api/anthropic_client.py,sha256=Xplb-r2c6GmdBsBqDs67vjZim7HnNBhq1x5ExsLMM_M,37372
97
- letta/llm_api/azure_client.py,sha256=uAIFEFlhe0cdMm62F9M5wQeuWKxcyL4bL1MVMCOSOvM,3746
96
+ letta/llm_api/anthropic_client.py,sha256=L8M4i08bHfNh1uS_M2_bDf3yeEuHpr5pungyu0pqo60,37380
97
+ letta/llm_api/azure_client.py,sha256=BeChGsH4brrSgZBbCf8UE5RkW-3ZughpKnsBY2VYxwI,3841
98
98
  letta/llm_api/bedrock_client.py,sha256=gNKSFGCbrrLMPvtBItAOz1nme4K_opgkZdFa3cUzp7M,3434
99
99
  letta/llm_api/deepseek_client.py,sha256=di6ApSQu1DewXw0_JIP7AK4IHvXQHd0e32tQfFf5F34,16975
100
100
  letta/llm_api/google_ai_client.py,sha256=JweTUHZXvK6kcZBGXA7XEU53KP4vM7_zdD7AorCtsdI,8166
101
101
  letta/llm_api/google_constants.py,sha256=eOjOv-FImyJ4b4QGIaod-mEROMtrBFz0yhuYHqOEkwY,797
102
- letta/llm_api/google_vertex_client.py,sha256=57qrBe5dY-ERB9xI9_tWRwW_uSxGbHqR02mvnWmCLGY,28910
102
+ letta/llm_api/google_vertex_client.py,sha256=p6MNUFHhkzFkGfWgldjVJC6SIvSMriMeCoenNYynU6E,34970
103
103
  letta/llm_api/groq_client.py,sha256=nNeWSgDVOLn3iFiicDKyhHj7f73JxrB9-7_M2Pv2e1I,3192
104
104
  letta/llm_api/helpers.py,sha256=GXV_SuaU7uSCDj6bxDcCCF7CUjuZQCVWd5qZ3OsHVNk,17587
105
105
  letta/llm_api/llm_api_tools.py,sha256=lsZ6OeIHesyOfbNQi5CVw5hn1lTQP5gJyforp-D0nk8,12294
@@ -107,7 +107,7 @@ letta/llm_api/llm_client.py,sha256=iXiPbrhluP2DBczv9nkFlAXdwWGOkg0lNDA9LzLrG4o,3
107
107
  letta/llm_api/llm_client_base.py,sha256=RFo8H4ILxVyzB3DeF4rJoJJYjRF8ScVO4yyDrhuN0DY,10052
108
108
  letta/llm_api/mistral.py,sha256=ruOTBt07Uzx7S30_eXhedVWngtpjtlzG6Ox1Iw0_mQs,662
109
109
  letta/llm_api/openai.py,sha256=56cwdS9l-75cMTtY9df6Dbb1M9crH8YQsSdF3Pm3Rpg,27393
110
- letta/llm_api/openai_client.py,sha256=Ww68D103uQolsALOzfPD5-CTuEaIFBbkdnrtMBIaZlc,22475
110
+ letta/llm_api/openai_client.py,sha256=QDIRIG-4MVA-Jug8qx0HUkhg3qtUfHGvE6QCbSYGK-c,22597
111
111
  letta/llm_api/together_client.py,sha256=HeDMDDa525yfDTKciODDfX_t93QBfFmX0n2P-FT1QTU,2284
112
112
  letta/llm_api/xai_client.py,sha256=3mpSQ9OoWyjqo2VhNM_m0EPBzS69r4p-OEwL7UWc9oY,3772
113
113
  letta/llm_api/sample_response_jsons/aws_bedrock.json,sha256=RS3VqyxPB9hQQCPm42hWoga0bisKv_0e8ZF-c3Ag1FA,930
@@ -202,7 +202,7 @@ letta/otel/events.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
202
202
  letta/otel/metric_registry.py,sha256=TdRBJrwDuyZV2Uretnq0lYIoYKA2JUqWkENGqLhOCBc,9344
203
203
  letta/otel/metrics.py,sha256=GlIt8XLkP-igTXptah8UBonpHF7nEtSqTONSkAEERAs,4740
204
204
  letta/otel/resource.py,sha256=kqvEywP2LTmuxv2Or3Irtm2zwic863j1DWUvBC0IONc,735
205
- letta/otel/sqlalchemy_instrumentation.py,sha256=dkgQTDVSdre27r0EQzfTsV4d49btglLU-CdozHwEFHc,18767
205
+ letta/otel/sqlalchemy_instrumentation.py,sha256=yiZvHjDA8Sd5j5RGbokiaOgRwCIE5hkvhWavVSOXs7U,18892
206
206
  letta/otel/sqlalchemy_instrumentation_integration.py,sha256=CwGPd5mb4PasBDnSlulSqfaupN-kB8Wz6EBHWBDNuuo,3902
207
207
  letta/otel/tracing.py,sha256=kyLsl00Zka3z3uEnOZqgantHya_bsmpvulABYHvsUo8,10422
208
208
  letta/personas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -270,7 +270,7 @@ letta/schemas/letta_message_content.py,sha256=7FuholmKauP5Z-FJdsCH_-4IUGl_8jVqi5
270
270
  letta/schemas/letta_ping.py,sha256=9JphoKhWZ63JqsakIx4aaj8dYMtYVa7HxSkT5cMh5cI,863
271
271
  letta/schemas/letta_request.py,sha256=ll0QTt-tzaJ3zxpPyaifz7mtWcPy6QmvPUDOzngbxfQ,4526
272
272
  letta/schemas/letta_response.py,sha256=e6FcAhRX3heB0FoWAAozB3RJboMwi_JpelTdc5JupVA,8188
273
- letta/schemas/letta_stop_reason.py,sha256=n060NkGItD1OarfviHUW9Mp7tv2_6spW1wkPCCzjepU,2271
273
+ letta/schemas/letta_stop_reason.py,sha256=6vF66Dsyzog3X_d2PjfQxJLyiDarlqJ-hG7NMJpxbuc,2349
274
274
  letta/schemas/llm_batch_job.py,sha256=xr7RmMc9ItmL344vcIn1MJaT2nOf0F7qEHrsXkQNFQI,3136
275
275
  letta/schemas/llm_config.py,sha256=8nyi9r3o3feh_hUy6pdRWp3E6M612xZhvV3gkFB4aqE,13642
276
276
  letta/schemas/llm_config_overrides.py,sha256=E6qJuVA8TwAAy3VjGitJ5jSQo5PbN-6VPcZOF5qhP9A,1815
@@ -337,7 +337,7 @@ letta/server/server.py,sha256=KFFbyl7Djn8CS0aPxz3jL8RwmXPr9nKY3wDu3ymUWjI,109265
337
337
  letta/server/startup.sh,sha256=z-Fea-7LiuS_aG1tJqS8JAsDQaamwC_kuDhv9D3PPPY,2698
338
338
  letta/server/utils.py,sha256=rRvW6L1lzau4u9boamiyZH54lf5tQ91ypXzUW9cfSPA,1667
339
339
  letta/server/rest_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
340
- letta/server/rest_api/app.py,sha256=W3lCWe2iGumNIAyuDyH2sNO1EwGKqh7iSo82NXCXrVw,19270
340
+ letta/server/rest_api/app.py,sha256=T3LLveXRJmfWqR0uEzoaLY8LXwYrwCQGb80XMbSCDUo,21172
341
341
  letta/server/rest_api/auth_token.py,sha256=725EFEIiNj4dh70hrSd94UysmFD8vcJLrTRfNHkzxDo,774
342
342
  letta/server/rest_api/chat_completions_interface.py,sha256=-7wO7pNBWXMqblVkJpuZ8JPJ-LjudLTtT6BJu-q_XAM,11138
343
343
  letta/server/rest_api/interface.py,sha256=X5NZ8oerDcipG9y1AfD92zJ_2TgVMO4eJ42RP82GFF8,70952
@@ -355,7 +355,7 @@ letta/server/rest_api/routers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5N
355
355
  letta/server/rest_api/routers/openai/chat_completions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
356
356
  letta/server/rest_api/routers/openai/chat_completions/chat_completions.py,sha256=ohM1i8BsNxTiw8duuRT5X_0tSUzBwctQM4fJ5DXURic,5157
357
357
  letta/server/rest_api/routers/v1/__init__.py,sha256=9MnEA7CgtIxyU_dDNG0jm-Ziqu1somBml-e5gKjgd9I,1997
358
- letta/server/rest_api/routers/v1/agents.py,sha256=2eo7EDXTpybTPfOvgEGhm81LenIJcXNxv-bf5CcqjkU,75673
358
+ letta/server/rest_api/routers/v1/agents.py,sha256=2lGLtfgB2ZtAa0EgaiaDlNV0GJhAi_kQQy7XqXB2DG0,77771
359
359
  letta/server/rest_api/routers/v1/blocks.py,sha256=ykI77xnmIxPLqdAy5kzGyGw0w0ZRyVXn-O5Xcdj6-70,7690
360
360
  letta/server/rest_api/routers/v1/embeddings.py,sha256=PRaQlrmEXPiIdWsTbadrFsv3Afyv5oEFUdhgHA8FTi8,989
361
361
  letta/server/rest_api/routers/v1/folders.py,sha256=8Yb-bw2JdXBxMfrJNIZQk9_FKN2fet9Ccp8T83_c2sc,23539
@@ -397,7 +397,7 @@ letta/services/file_manager.py,sha256=d4uX8RblmqNGk1MsfeGzQ5uDWKVFP-AH63Jz5xOkj2
397
397
  letta/services/files_agents_manager.py,sha256=QJrJTgDn3RXUjZIGiIw4GQ5k2iKj-Wvzs-WQetpQ154,30059
398
398
  letta/services/group_manager.py,sha256=dD4DDHjOptMrtbWqw1ErlhpBqChw2ubLJdILjeLTY8I,29183
399
399
  letta/services/identity_manager.py,sha256=JI9Xc7EsBagSwDS2na4rFNhoO_LuaxlkVO_1oIK_ITQ,11841
400
- letta/services/job_manager.py,sha256=nDrnr_r8ELwf8KMKyRRrWHsysrTGldgCTplJdaSiNiQ,35543
400
+ letta/services/job_manager.py,sha256=E-w9_4BMErMuqVf2dFlTPTobrvBKhPyyEDfuqLnbACI,35970
401
401
  letta/services/llm_batch_manager.py,sha256=iDzLFfmgpQooGY4zpN_w8q1SZ27fr2Cv6Ks3ltZErL8,20929
402
402
  letta/services/mcp_manager.py,sha256=QuvKQnwxMXrhiCaYlF50GZwXmbSU7PxmcOZ85sQ3t7I,47848
403
403
  letta/services/message_manager.py,sha256=tomsZidPT-I95sJsEsls-vj3qglehV7XNTs-m2zF8Bg,60629
@@ -470,8 +470,8 @@ letta/templates/sandbox_code_file_async.py.j2,sha256=lb7nh_P2W9VZHzU_9TxSCEMUod7
470
470
  letta/templates/summary_request_text.j2,sha256=ZttQwXonW2lk4pJLYzLK0pmo4EO4EtUUIXjgXKiizuc,842
471
471
  letta/templates/template_helper.py,sha256=HkG3zwRc5NVGmSTQu5PUTpz7LevK43bzXVaQuN8urf0,1634
472
472
  letta/types/__init__.py,sha256=hokKjCVFGEfR7SLMrtZsRsBfsC7yTIbgKPLdGg4K1eY,147
473
- letta_nightly-0.11.7.dev20250912104045.dist-info/METADATA,sha256=tqJlpOfovWrr9Go7iI1cwIOkAgFx0Qwf7JYX11vg2JI,24424
474
- letta_nightly-0.11.7.dev20250912104045.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
475
- letta_nightly-0.11.7.dev20250912104045.dist-info/entry_points.txt,sha256=m-94Paj-kxiR6Ktu0us0_2qfhn29DzF2oVzqBE6cu8w,41
476
- letta_nightly-0.11.7.dev20250912104045.dist-info/licenses/LICENSE,sha256=mExtuZ_GYJgDEI38GWdiEYZizZS4KkVt2SF1g_GPNhI,10759
477
- letta_nightly-0.11.7.dev20250912104045.dist-info/RECORD,,
473
+ letta_nightly-0.11.7.dev20250913103940.dist-info/METADATA,sha256=bBw5qZ0Uuj_L5Hs-2tji7ZiKtmp3V0LWaPeCiOkGV9A,24424
474
+ letta_nightly-0.11.7.dev20250913103940.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
475
+ letta_nightly-0.11.7.dev20250913103940.dist-info/entry_points.txt,sha256=m-94Paj-kxiR6Ktu0us0_2qfhn29DzF2oVzqBE6cu8w,41
476
+ letta_nightly-0.11.7.dev20250913103940.dist-info/licenses/LICENSE,sha256=mExtuZ_GYJgDEI38GWdiEYZizZS4KkVt2SF1g_GPNhI,10759
477
+ letta_nightly-0.11.7.dev20250913103940.dist-info/RECORD,,