PyPI - letta-nightly - Versions diffs - 0.11.7.dev20250912104045__py3-none-any.whl → 0.11.7.dev20250913103940__py3-none-any.whl - Mend

letta-nightly 0.11.7.dev20250912104045py3-none-any.whl → 0.11.7.dev20250913103940py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

letta/adapters/letta_llm_stream_adapter.py CHANGED Viewed

@@ -149,7 +149,7 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
                     request_json=self.request_data,
                     response_json={
                         "content": {
-                            "tool_call": self.tool_call.model_dump_json(),
+                            "tool_call": self.tool_call.model_dump_json() if self.tool_call else None,
                             "reasoning": [content.model_dump_json() for content in self.reasoning_content],
                         },
                         "id": self.interface.message_id,

letta/agents/letta_agent_v2.py CHANGED Viewed

@@ -19,7 +19,7 @@ from letta.agents.helpers import (
     generate_step_id,
 )
 from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX
-from letta.errors import ContextWindowExceededError
+from letta.errors import ContextWindowExceededError, LLMError
 from letta.helpers import ToolRulesSolver
 from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns, ns_to_ms
 from letta.helpers.reasoning_helper import scrub_inner_thoughts_from_messages
@@ -306,7 +306,7 @@ class LettaAgentV2(BaseAgentV2):
                 )
         except:
-            if self.stop_reason:
+            if self.stop_reason and not first_chunk:
                 yield f"data: {self.stop_reason.model_dump_json()}\n\n"
             raise
@@ -431,6 +431,9 @@ class LettaAgentV2(BaseAgentV2):
                     except ValueError as e:
                         self.stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response.value)
                         raise e
+                    except LLMError as e:
+                        self.stop_reason = LettaStopReason(stop_reason=StopReasonType.llm_api_error.value)
+                        raise e
                     except Exception as e:
                         if isinstance(e, ContextWindowExceededError) and llm_request_attempt < summarizer_settings.max_summarizer_retries:
                             # Retry case
@@ -497,6 +500,17 @@ class LettaAgentV2(BaseAgentV2):
                     if include_return_message_types is None or message.message_type in include_return_message_types:
                         yield message
+            # Persist approval responses immediately to prevent agent from getting into a bad state
+            if (
+                len(input_messages_to_persist) == 1
+                and input_messages_to_persist[0].role == "approval"
+                and persisted_messages[0].role == "approval"
+                and persisted_messages[1].role == "tool"
+            ):
+                self.agent_state.message_ids = self.agent_state.message_ids + [m.id for m in persisted_messages[:2]]
+                await self.agent_manager.update_message_ids_async(
+                    agent_id=self.agent_state.id, message_ids=self.agent_state.message_ids, actor=self.actor
+                )
             step_progression, step_metrics = await self._step_checkpoint_finish(step_metrics, agent_step_span, logged_step)
         except Exception as e:
             self.logger.error(f"Error during step processing: {e}")
@@ -511,6 +525,7 @@ class LettaAgentV2(BaseAgentV2):
                 StopReasonType.no_tool_call,
                 StopReasonType.invalid_tool_call,
                 StopReasonType.invalid_llm_response,
+                StopReasonType.llm_api_error,
             ):
                 self.logger.error("Error occurred during step processing, with unexpected stop reason: %s", self.stop_reason.stop_reason)
             raise e

letta/interfaces/openai_streaming_interface.py CHANGED Viewed

@@ -278,6 +278,8 @@ class OpenAIStreamingInterface:
                                     self.prev_assistant_message_id = self.function_id_buffer
                                 # Reset message reader at the start of a new send_message stream
                                 self.assistant_message_json_reader.reset()
+                                self.assistant_message_json_reader.in_message = True
+                                self.assistant_message_json_reader.message_started = True
                             else:
                                 if prev_message_type and prev_message_type != "tool_call_message":

letta/llm_api/anthropic_client.py CHANGED Viewed

@@ -497,7 +497,7 @@ class AnthropicClient(LLMClientBase):
                         try:
                             args_json = json.loads(arguments)
                             if not isinstance(args_json, dict):
-                                raise ValueError("Expected parseable json object for arguments")
+                                raise LLMServerError("Expected parseable json object for arguments")
                         except:
                             arguments = str(tool_input["function"]["arguments"])
                     else:
@@ -854,7 +854,7 @@ def remap_finish_reason(stop_reason: str) -> str:
     elif stop_reason == "tool_use":
         return "function_call"
     else:
-        raise ValueError(f"Unexpected stop_reason: {stop_reason}")
+        raise LLMServerError(f"Unexpected stop_reason: {stop_reason}")
 def strip_xml_tags(string: str, tag: Optional[str]) -> str:

letta/llm_api/azure_client.py CHANGED Viewed

@@ -54,9 +54,12 @@ class AzureClient(OpenAIClient):
             api_key = model_settings.azure_api_key or os.environ.get("AZURE_API_KEY")
             base_url = model_settings.azure_base_url or os.environ.get("AZURE_BASE_URL")
             api_version = model_settings.azure_api_version or os.environ.get("AZURE_API_VERSION")
+        try:
+            client = AsyncAzureOpenAI(api_key=api_key, azure_endpoint=base_url, api_version=api_version)
+            response: ChatCompletion = await client.chat.completions.create(**request_data)
+        except Exception as e:
+            raise self.handle_llm_error(e)
-        client = AsyncAzureOpenAI(api_key=api_key, azure_endpoint=base_url, api_version=api_version)
-        response: ChatCompletion = await client.chat.completions.create(**request_data)
         return response.model_dump()
     @trace_method

letta/llm_api/google_vertex_client.py CHANGED Viewed

@@ -14,6 +14,19 @@ from google.genai.types import (
 )
 from letta.constants import NON_USER_MSG_PREFIX
+from letta.errors import (
+    ContextWindowExceededError,
+    ErrorCode,
+    LLMAuthenticationError,
+    LLMBadRequestError,
+    LLMConnectionError,
+    LLMNotFoundError,
+    LLMPermissionDeniedError,
+    LLMRateLimitError,
+    LLMServerError,
+    LLMTimeoutError,
+    LLMUnprocessableEntityError,
+)
 from letta.helpers.datetime_helpers import get_utc_time_int
 from letta.helpers.json_helpers import json_dumps, json_loads
 from letta.llm_api.llm_client_base import LLMClientBase
@@ -48,13 +61,16 @@ class GoogleVertexClient(LLMClientBase):
         """
         Performs underlying request to llm and returns raw response.
         """
-        client = self._get_client()
-        response = client.models.generate_content(
-            model=llm_config.model,
-            contents=request_data["contents"],
-            config=request_data["config"],
-        )
-        return response.model_dump()
+        try:
+            client = self._get_client()
+            response = client.models.generate_content(
+                model=llm_config.model,
+                contents=request_data["contents"],
+                config=request_data["config"],
+            )
+            return response.model_dump()
+        except Exception as e:
+            raise self.handle_llm_error(e)
     @trace_method
     async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
@@ -77,15 +93,15 @@ class GoogleVertexClient(LLMClientBase):
                 )
             except errors.APIError as e:
                 # Retry on 503 and 500 errors as well, usually ephemeral from Gemini
-                if e.code == 503 or e.code == 500:
+                if e.code == 503 or e.code == 500 or e.code == 504:
                     logger.warning(f"Received {e}, retrying {retry_count}/{self.MAX_RETRIES}")
                     retry_count += 1
                     if retry_count > self.MAX_RETRIES:
-                        raise e
+                        raise self.handle_llm_error(e)
                     continue
-                raise e
+                raise self.handle_llm_error(e)
             except Exception as e:
-                raise e
+                raise self.handle_llm_error(e)
             response_data = response.model_dump()
             is_malformed_function_call = self.is_malformed_function_call(response_data)
             if is_malformed_function_call:
@@ -363,11 +379,10 @@ class GoogleVertexClient(LLMClientBase):
                 if content is None or content.role is None or content.parts is None:
                     # This means the response is malformed like MALFORMED_FUNCTION_CALL
-                    # NOTE: must be a ValueError to trigger a retry
                     if candidate.finish_reason == "MALFORMED_FUNCTION_CALL":
-                        raise ValueError(f"Error in response data from LLM: {candidate.finish_reason}")
+                        raise LLMServerError(f"Malformed response from Google Vertex: {candidate.finish_reason}")
                     else:
-                        raise ValueError(f"Error in response data from LLM: {candidate.model_dump()}")
+                        raise LLMServerError(f"Invalid response data from Google Vertex: {candidate.model_dump()}")
                 role = content.role
                 assert role == "model", f"Unknown role in response: {role}"
@@ -461,7 +476,7 @@ class GoogleVertexClient(LLMClientBase):
                         except json.decoder.JSONDecodeError:
                             if candidate.finish_reason == "MAX_TOKENS":
-                                raise ValueError("Could not parse response data from LLM: exceeded max token limit")
+                                raise LLMServerError("Could not parse response data from LLM: exceeded max token limit")
                             # Inner thoughts are the content by default
                             inner_thoughts = response_message.text
@@ -490,7 +505,7 @@ class GoogleVertexClient(LLMClientBase):
                     elif finish_reason == "RECITATION":
                         openai_finish_reason = "content_filter"
                     else:
-                        raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
+                        raise LLMServerError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
                     choices.append(
                         Choice(
@@ -581,5 +596,127 @@ class GoogleVertexClient(LLMClientBase):
     @trace_method
     def handle_llm_error(self, e: Exception) -> Exception:
-        # Fallback to base implementation
+        # Handle Google GenAI specific errors
+        if isinstance(e, errors.ClientError):
+            logger.warning(f"[Google Vertex] Client error ({e.code}): {e}")
+            # Handle specific error codes
+            if e.code == 400:
+                error_str = str(e).lower()
+                if "context" in error_str and ("exceed" in error_str or "limit" in error_str or "too long" in error_str):
+                    return ContextWindowExceededError(
+                        message=f"Bad request to Google Vertex (context window exceeded): {str(e)}",
+                    )
+                else:
+                    return LLMBadRequestError(
+                        message=f"Bad request to Google Vertex: {str(e)}",
+                        code=ErrorCode.INTERNAL_SERVER_ERROR,
+                    )
+            elif e.code == 401:
+                return LLMAuthenticationError(
+                    message=f"Authentication failed with Google Vertex: {str(e)}",
+                    code=ErrorCode.INTERNAL_SERVER_ERROR,
+                )
+            elif e.code == 403:
+                return LLMPermissionDeniedError(
+                    message=f"Permission denied by Google Vertex: {str(e)}",
+                    code=ErrorCode.INTERNAL_SERVER_ERROR,
+                )
+            elif e.code == 404:
+                return LLMNotFoundError(
+                    message=f"Resource not found in Google Vertex: {str(e)}",
+                    code=ErrorCode.INTERNAL_SERVER_ERROR,
+                )
+            elif e.code == 408:
+                return LLMTimeoutError(
+                    message=f"Request to Google Vertex timed out: {str(e)}",
+                    code=ErrorCode.TIMEOUT,
+                    details={"cause": str(e.__cause__) if e.__cause__ else None},
+                )
+            elif e.code == 422:
+                return LLMUnprocessableEntityError(
+                    message=f"Invalid request content for Google Vertex: {str(e)}",
+                    code=ErrorCode.INTERNAL_SERVER_ERROR,
+                )
+            elif e.code == 429:
+                logger.warning("[Google Vertex] Rate limited (429). Consider backoff.")
+                return LLMRateLimitError(
+                    message=f"Rate limited by Google Vertex: {str(e)}",
+                    code=ErrorCode.RATE_LIMIT_EXCEEDED,
+                )
+            else:
+                return LLMServerError(
+                    message=f"Google Vertex client error: {str(e)}",
+                    code=ErrorCode.INTERNAL_SERVER_ERROR,
+                    details={
+                        "status_code": e.code,
+                        "response_json": getattr(e, "response_json", None),
+                    },
+                )
+        if isinstance(e, errors.ServerError):
+            logger.warning(f"[Google Vertex] Server error ({e.code}): {e}")
+            # Handle specific server error codes
+            if e.code == 500:
+                return LLMServerError(
+                    message=f"Google Vertex internal server error: {str(e)}",
+                    code=ErrorCode.INTERNAL_SERVER_ERROR,
+                    details={
+                        "status_code": e.code,
+                        "response_json": getattr(e, "response_json", None),
+                    },
+                )
+            elif e.code == 502:
+                return LLMConnectionError(
+                    message=f"Bad gateway from Google Vertex: {str(e)}",
+                    code=ErrorCode.INTERNAL_SERVER_ERROR,
+                    details={"cause": str(e.__cause__) if e.__cause__ else None},
+                )
+            elif e.code == 503:
+                return LLMServerError(
+                    message=f"Google Vertex service unavailable: {str(e)}",
+                    code=ErrorCode.INTERNAL_SERVER_ERROR,
+                    details={
+                        "status_code": e.code,
+                        "response_json": getattr(e, "response_json", None),
+                    },
+                )
+            elif e.code == 504:
+                return LLMTimeoutError(
+                    message=f"Gateway timeout from Google Vertex: {str(e)}",
+                    code=ErrorCode.TIMEOUT,
+                    details={"cause": str(e.__cause__) if e.__cause__ else None},
+                )
+            else:
+                return LLMServerError(
+                    message=f"Google Vertex server error: {str(e)}",
+                    code=ErrorCode.INTERNAL_SERVER_ERROR,
+                    details={
+                        "status_code": e.code,
+                        "response_json": getattr(e, "response_json", None),
+                    },
+                )
+        if isinstance(e, errors.APIError):
+            logger.warning(f"[Google Vertex] API error ({e.code}): {e}")
+            return LLMServerError(
+                message=f"Google Vertex API error: {str(e)}",
+                code=ErrorCode.INTERNAL_SERVER_ERROR,
+                details={
+                    "status_code": e.code,
+                    "response_json": getattr(e, "response_json", None),
+                },
+            )
+        # Handle connection-related errors
+        if "connection" in str(e).lower() or "timeout" in str(e).lower():
+            logger.warning(f"[Google Vertex] Connection/timeout error: {e}")
+            return LLMConnectionError(
+                message=f"Failed to connect to Google Vertex: {str(e)}",
+                code=ErrorCode.INTERNAL_SERVER_ERROR,
+                details={"cause": str(e.__cause__) if e.__cause__ else None},
+            )
+        # Fallback to base implementation for other errors
         return super().handle_llm_error(e)

letta/llm_api/openai_client.py CHANGED Viewed

@@ -99,7 +99,7 @@ def supports_structured_output(llm_config: LLMConfig) -> bool:
     # FIXME pretty hacky - turn off for providers we know users will use,
     #       but also don't support structured output
-    if "nebius.com" in llm_config.model_endpoint:
+    if llm_config.model_endpoint and "nebius.com" in llm_config.model_endpoint:
         return False
     else:
         return True
@@ -108,7 +108,7 @@ def supports_structured_output(llm_config: LLMConfig) -> bool:
 # TODO move into LLMConfig as a field?
 def requires_auto_tool_choice(llm_config: LLMConfig) -> bool:
     """Certain providers require the tool choice to be set to 'auto'."""
-    if "nebius.com" in llm_config.model_endpoint:
+    if llm_config.model_endpoint and "nebius.com" in llm_config.model_endpoint:
         return True
     if llm_config.handle and "vllm" in llm_config.handle:
         return True
@@ -168,7 +168,9 @@ class OpenAIClient(LLMClientBase):
             # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
             # TODO(fix)
             inner_thoughts_desc = (
-                INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST if ":1234" in llm_config.model_endpoint else INNER_THOUGHTS_KWARG_DESCRIPTION
+                INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
+                if llm_config.model_endpoint and ":1234" in llm_config.model_endpoint
+                else INNER_THOUGHTS_KWARG_DESCRIPTION
             )
             tools = add_inner_thoughts_to_functions(
                 functions=tools,

letta/otel/sqlalchemy_instrumentation.py CHANGED Viewed

@@ -146,11 +146,16 @@ def _instrument_engine_events(engine: Engine) -> None:
             span.end()
             context._sync_instrumentation_span = None
-    def handle_cursor_error(conn, cursor, statement, parameters, context, executemany):
+    def handle_cursor_error(exception_context):
         """Handle cursor execution errors."""
         if not _config["enabled"]:
             return
+        # Extract context from exception_context
+        context = getattr(exception_context, "execution_context", None)
+        if not context:
+            return
         span = getattr(context, "_sync_instrumentation_span", None)
         if span:
             span.set_status(Status(StatusCode.ERROR, "Database operation failed"))

letta/schemas/letta_stop_reason.py CHANGED Viewed

@@ -9,6 +9,7 @@ from letta.schemas.enums import JobStatus
 class StopReasonType(str, Enum):
     end_turn = "end_turn"
     error = "error"
+    llm_api_error = "llm_api_error"
     invalid_llm_response = "invalid_llm_response"
     invalid_tool_call = "invalid_tool_call"
     max_steps = "max_steps"
@@ -31,6 +32,7 @@ class StopReasonType(str, Enum):
             StopReasonType.invalid_tool_call,
             StopReasonType.no_tool_call,
             StopReasonType.invalid_llm_response,
+            StopReasonType.llm_api_error,
         ):
             return JobStatus.failed
         elif self == StopReasonType.cancelled:

letta/server/rest_api/app.py CHANGED Viewed

@@ -17,7 +17,15 @@ from starlette.middleware.cors import CORSMiddleware
 from letta.__init__ import __version__ as letta_version
 from letta.agents.exceptions import IncompatibleAgentType
 from letta.constants import ADMIN_PREFIX, API_PREFIX, OPENAI_API_PREFIX
-from letta.errors import BedrockPermissionError, LettaAgentNotFoundError, LettaUserNotFoundError
+from letta.errors import (
+    BedrockPermissionError,
+    LettaAgentNotFoundError,
+    LettaUserNotFoundError,
+    LLMAuthenticationError,
+    LLMError,
+    LLMRateLimitError,
+    LLMTimeoutError,
+)
 from letta.helpers.pinecone_utils import get_pinecone_indices, should_use_pinecone, upsert_pinecone_indices
 from letta.jobs.scheduler import start_scheduler_with_leader_election
 from letta.log import get_logger
@@ -276,6 +284,58 @@ def create_application() -> "FastAPI":
             },
         )
+    @app.exception_handler(LLMTimeoutError)
+    async def llm_timeout_error_handler(request: Request, exc: LLMTimeoutError):
+        return JSONResponse(
+            status_code=504,
+            content={
+                "error": {
+                    "type": "llm_timeout",
+                    "message": "The LLM request timed out. Please try again.",
+                    "detail": str(exc),
+                }
+            },
+        )
+    @app.exception_handler(LLMRateLimitError)
+    async def llm_rate_limit_error_handler(request: Request, exc: LLMRateLimitError):
+        return JSONResponse(
+            status_code=429,
+            content={
+                "error": {
+                    "type": "llm_rate_limit",
+                    "message": "Rate limit exceeded for LLM model provider. Please wait before making another request.",
+                    "detail": str(exc),
+                }
+            },
+        )
+    @app.exception_handler(LLMAuthenticationError)
+    async def llm_auth_error_handler(request: Request, exc: LLMAuthenticationError):
+        return JSONResponse(
+            status_code=401,
+            content={
+                "error": {
+                    "type": "llm_authentication",
+                    "message": "Authentication failed with the LLM model provider.",
+                    "detail": str(exc),
+                }
+            },
+        )
+    @app.exception_handler(LLMError)
+    async def llm_error_handler(request: Request, exc: LLMError):
+        return JSONResponse(
+            status_code=502,
+            content={
+                "error": {
+                    "type": "llm_error",
+                    "message": "An error occurred with the LLM request.",
+                    "detail": str(exc),
+                }
+            },
+        )
     settings.cors_origins.append("https://app.letta.com")
     if (os.getenv("LETTA_SERVER_SECURE") == "true") or "--secure" in sys.argv:

letta/server/rest_api/routers/v1/agents.py CHANGED Viewed

@@ -536,9 +536,7 @@ async def attach_source(
     if agent_state.enable_sleeptime:
         source = await server.source_manager.get_source_by_id(source_id=source_id)
-        safe_create_task(
-            server.sleeptime_document_ingest_async(agent_state, source, actor), logger=logger, label="sleeptime_document_ingest_async"
-        )
+        safe_create_task(server.sleeptime_document_ingest_async(agent_state, source, actor), label="sleeptime_document_ingest_async")
     return agent_state
@@ -565,9 +563,7 @@ async def attach_folder_to_agent(
     if agent_state.enable_sleeptime:
         source = await server.source_manager.get_source_by_id(source_id=folder_id)
-        safe_create_task(
-            server.sleeptime_document_ingest_async(agent_state, source, actor), logger=logger, label="sleeptime_document_ingest_async"
-        )
+        safe_create_task(server.sleeptime_document_ingest_async(agent_state, source, actor), label="sleeptime_document_ingest_async")
     return agent_state
@@ -1320,15 +1316,55 @@ async def send_message_streaming(
     try:
         if agent_eligible and model_compatible:
             agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
-            raw_stream = agent_loop.stream(
-                input_messages=request.messages,
-                max_steps=request.max_steps,
-                stream_tokens=request.stream_tokens and model_compatible_token_streaming,
-                run_id=run.id if run else None,
-                use_assistant_message=request.use_assistant_message,
-                request_start_timestamp_ns=request_start_timestamp_ns,
-                include_return_message_types=request.include_return_message_types,
-            )
+            async def error_aware_stream():
+                """Stream that handles early LLM errors gracefully in streaming format."""
+                from letta.errors import LLMAuthenticationError, LLMError, LLMRateLimitError, LLMTimeoutError
+                try:
+                    stream = agent_loop.stream(
+                        input_messages=request.messages,
+                        max_steps=request.max_steps,
+                        stream_tokens=request.stream_tokens and model_compatible_token_streaming,
+                        run_id=run.id if run else None,
+                        use_assistant_message=request.use_assistant_message,
+                        request_start_timestamp_ns=request_start_timestamp_ns,
+                        include_return_message_types=request.include_return_message_types,
+                    )
+                    async for chunk in stream:
+                        yield chunk
+                except LLMTimeoutError as e:
+                    error_data = {
+                        "error": {"type": "llm_timeout", "message": "The LLM request timed out. Please try again.", "detail": str(e)}
+                    }
+                    yield (f"data: {json.dumps(error_data)}\n\n", 504)
+                except LLMRateLimitError as e:
+                    error_data = {
+                        "error": {
+                            "type": "llm_rate_limit",
+                            "message": "Rate limit exceeded for LLM model provider. Please wait before making another request.",
+                            "detail": str(e),
+                        }
+                    }
+                    yield (f"data: {json.dumps(error_data)}\n\n", 429)
+                except LLMAuthenticationError as e:
+                    error_data = {
+                        "error": {
+                            "type": "llm_authentication",
+                            "message": "Authentication failed with the LLM model provider.",
+                            "detail": str(e),
+                        }
+                    }
+                    yield (f"data: {json.dumps(error_data)}\n\n", 401)
+                except LLMError as e:
+                    error_data = {"error": {"type": "llm_error", "message": "An error occurred with the LLM request.", "detail": str(e)}}
+                    yield (f"data: {json.dumps(error_data)}\n\n", 502)
+                except Exception as e:
+                    error_data = {"error": {"type": "internal_error", "message": "An internal server error occurred.", "detail": str(e)}}
+                    yield (f"data: {json.dumps(error_data)}\n\n", 500)
+            raw_stream = error_aware_stream()
             from letta.server.rest_api.streaming_response import StreamingResponseWithStatusCode, add_keepalive_to_stream

letta/services/job_manager.py CHANGED Viewed

@@ -218,8 +218,17 @@ class JobManager:
         """
         try:
             job_update_builder = partial(JobUpdate, status=new_status)
+            # If metadata is provided, merge it with existing metadata
             if metadata:
-                job_update_builder = partial(job_update_builder, metadata=metadata)
+                # Get the current job to access existing metadata
+                current_job = await self.get_job_by_id_async(job_id=job_id, actor=actor)
+                merged_metadata = {}
+                if current_job.metadata:
+                    merged_metadata.update(current_job.metadata)
+                merged_metadata.update(metadata)
+                job_update_builder = partial(job_update_builder, metadata=merged_metadata)
             if new_status.is_terminal:
                 job_update_builder = partial(job_update_builder, completed_at=get_utc_time())

{letta_nightly-0.11.7.dev20250912104045.dist-info → letta_nightly-0.11.7.dev20250913103940.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: letta-nightly
-Version: 0.11.7.dev20250912104045
+Version: 0.11.7.dev20250913103940
 Summary: Create LLM agents with long-term memory and custom tools
 Author-email: Letta Team <contact@letta.com>
 License: Apache License

{letta_nightly-0.11.7.dev20250912104045.dist-info → letta_nightly-0.11.7.dev20250913103940.dist-info}/RECORD RENAMED Viewed

@@ -16,7 +16,7 @@ letta/system.py,sha256=kHF7n3Viq7gV5UIUEXixod2gWa2jroUgztpEzMC1Sew,8925
 letta/utils.py,sha256=bSq3St7MUw9gN1g0ICdOhNNaUFYBC3EfJLG6qsRLSFA,43290
 letta/adapters/letta_llm_adapter.py,sha256=11wkOkEQfPXUuJoJxbK22wCa-8gnWiDAb3UOXOxLt5U,3427
 letta/adapters/letta_llm_request_adapter.py,sha256=wJhK5M_qOhRPAhgMmYI7EJcM8Op19tClnXe0kJ29a3Q,4831
-letta/adapters/letta_llm_stream_adapter.py,sha256=Q6nFr8uKc1DyAHHiHxHGNmqhRIScEKXO3TwsBgqW5QI,7630
+letta/adapters/letta_llm_stream_adapter.py,sha256=G8IqtXor0LUuW-dKtGJWsUt6DfJreVCn5h6W2lHEPBg,7658
 letta/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/agents/agent_loop.py,sha256=cTSlGt1g9aZWG5vIMYtzdeJG1UcrqfjpLGmZU6j89zU,854
 letta/agents/base_agent.py,sha256=rUAcPxWmTnmi50AWOXwrWc-v5sPIod0W_xXaPQShjcE,8540
@@ -27,7 +27,7 @@ letta/agents/exceptions.py,sha256=BQY4D4w32OYHM63CM19ko7dPwZiAzUs3NbKvzmCTcJg,31
 letta/agents/helpers.py,sha256=eCHsvZEkTe0L_uZHYkfNAztsEJW0FTnKZMgVbqlI0Yg,11618
 letta/agents/letta_agent.py,sha256=6nRTh5kzUpqK7eNMk4DlcgEoPmDxFmRb5ysoVHa-vh8,99488
 letta/agents/letta_agent_batch.py,sha256=17RpYVXpGh9dlKxdMOLMCOHWFsi6N5S9FJHxooxkJCI,27998
-letta/agents/letta_agent_v2.py,sha256=Xs54mewx9SgHHFAz8uLJ_6OHv9RHU1PtkwAB_Pu0XMk,58992
+letta/agents/letta_agent_v2.py,sha256=vMzVZL6Px5XBODEh3BHbbADtBSDIuNJrjsHqMGQfnwg,59930
 letta/agents/voice_agent.py,sha256=y-n6qadfKsswvGODzXH02pLIQQ44wnaDSE6oUgKHVkA,23381
 letta/agents/voice_sleeptime_agent.py,sha256=_JzCbWBOKrmo1cTaqZFTrQudpJEapwAyrXYtAHUILGo,8675
 letta/cli/cli.py,sha256=tKtghlX36Rp0_HbkMosvlAapL07JXhA0vKLGTNKnxSQ,1615
@@ -85,7 +85,7 @@ letta/humans/examples/cs_phd.txt,sha256=9C9ZAV_VuG7GB31ksy3-_NAyk8rjE6YtVOkhp08k
 letta/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/interfaces/anthropic_streaming_interface.py,sha256=0VyK8kTRgCLNDLQN6vX1gJ0dfJhqguL_NL1GYgFr6fU,25614
 letta/interfaces/openai_chat_completions_streaming_interface.py,sha256=3xHXh8cW79EkiMUTYfvcH_s92nkLjxXfvtVOVC3bfLo,5050
-letta/interfaces/openai_streaming_interface.py,sha256=t_TKcZSH0Bv_ajOh2mTd4RetrCr-rahkjmGIZIIGDXQ,23593
+letta/interfaces/openai_streaming_interface.py,sha256=abmtQhWWbXSZGTPBPbMGuAJCyMo9euwttPsjI6joiVU,23768
 letta/interfaces/utils.py,sha256=c6jvO0dBYHh8DQnlN-B0qeNC64d3CSunhfqlFA4pJTY,278
 letta/jobs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/jobs/helpers.py,sha256=kO4aj954xsQ1RAmkjY6LQQ7JEIGuhaxB1e9pzrYKHAY,914
@@ -93,13 +93,13 @@ letta/jobs/llm_batch_job_polling.py,sha256=HUCTa1lTOiLAB_8m95RUfeNJa4lxlF8paGdCV
 letta/jobs/scheduler.py,sha256=Ub5VTCA8P5C9Y-0mPK2YIPJSEzKbSd2l5Sp0sOWctD8,8697
 letta/jobs/types.py,sha256=K8GKEnqEgAT6Kq4F2hUrBC4ZAFM9OkfOjVMStzxKuXQ,742
 letta/llm_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-letta/llm_api/anthropic_client.py,sha256=Xplb-r2c6GmdBsBqDs67vjZim7HnNBhq1x5ExsLMM_M,37372
-letta/llm_api/azure_client.py,sha256=uAIFEFlhe0cdMm62F9M5wQeuWKxcyL4bL1MVMCOSOvM,3746
+letta/llm_api/anthropic_client.py,sha256=L8M4i08bHfNh1uS_M2_bDf3yeEuHpr5pungyu0pqo60,37380
+letta/llm_api/azure_client.py,sha256=BeChGsH4brrSgZBbCf8UE5RkW-3ZughpKnsBY2VYxwI,3841
 letta/llm_api/bedrock_client.py,sha256=gNKSFGCbrrLMPvtBItAOz1nme4K_opgkZdFa3cUzp7M,3434
 letta/llm_api/deepseek_client.py,sha256=di6ApSQu1DewXw0_JIP7AK4IHvXQHd0e32tQfFf5F34,16975
 letta/llm_api/google_ai_client.py,sha256=JweTUHZXvK6kcZBGXA7XEU53KP4vM7_zdD7AorCtsdI,8166
 letta/llm_api/google_constants.py,sha256=eOjOv-FImyJ4b4QGIaod-mEROMtrBFz0yhuYHqOEkwY,797
-letta/llm_api/google_vertex_client.py,sha256=57qrBe5dY-ERB9xI9_tWRwW_uSxGbHqR02mvnWmCLGY,28910
+letta/llm_api/google_vertex_client.py,sha256=p6MNUFHhkzFkGfWgldjVJC6SIvSMriMeCoenNYynU6E,34970
 letta/llm_api/groq_client.py,sha256=nNeWSgDVOLn3iFiicDKyhHj7f73JxrB9-7_M2Pv2e1I,3192
 letta/llm_api/helpers.py,sha256=GXV_SuaU7uSCDj6bxDcCCF7CUjuZQCVWd5qZ3OsHVNk,17587
 letta/llm_api/llm_api_tools.py,sha256=lsZ6OeIHesyOfbNQi5CVw5hn1lTQP5gJyforp-D0nk8,12294
@@ -107,7 +107,7 @@ letta/llm_api/llm_client.py,sha256=iXiPbrhluP2DBczv9nkFlAXdwWGOkg0lNDA9LzLrG4o,3
 letta/llm_api/llm_client_base.py,sha256=RFo8H4ILxVyzB3DeF4rJoJJYjRF8ScVO4yyDrhuN0DY,10052
 letta/llm_api/mistral.py,sha256=ruOTBt07Uzx7S30_eXhedVWngtpjtlzG6Ox1Iw0_mQs,662
 letta/llm_api/openai.py,sha256=56cwdS9l-75cMTtY9df6Dbb1M9crH8YQsSdF3Pm3Rpg,27393
-letta/llm_api/openai_client.py,sha256=Ww68D103uQolsALOzfPD5-CTuEaIFBbkdnrtMBIaZlc,22475
+letta/llm_api/openai_client.py,sha256=QDIRIG-4MVA-Jug8qx0HUkhg3qtUfHGvE6QCbSYGK-c,22597
 letta/llm_api/together_client.py,sha256=HeDMDDa525yfDTKciODDfX_t93QBfFmX0n2P-FT1QTU,2284
 letta/llm_api/xai_client.py,sha256=3mpSQ9OoWyjqo2VhNM_m0EPBzS69r4p-OEwL7UWc9oY,3772
 letta/llm_api/sample_response_jsons/aws_bedrock.json,sha256=RS3VqyxPB9hQQCPm42hWoga0bisKv_0e8ZF-c3Ag1FA,930
@@ -202,7 +202,7 @@ letta/otel/events.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/otel/metric_registry.py,sha256=TdRBJrwDuyZV2Uretnq0lYIoYKA2JUqWkENGqLhOCBc,9344
 letta/otel/metrics.py,sha256=GlIt8XLkP-igTXptah8UBonpHF7nEtSqTONSkAEERAs,4740
 letta/otel/resource.py,sha256=kqvEywP2LTmuxv2Or3Irtm2zwic863j1DWUvBC0IONc,735
-letta/otel/sqlalchemy_instrumentation.py,sha256=dkgQTDVSdre27r0EQzfTsV4d49btglLU-CdozHwEFHc,18767
+letta/otel/sqlalchemy_instrumentation.py,sha256=yiZvHjDA8Sd5j5RGbokiaOgRwCIE5hkvhWavVSOXs7U,18892
 letta/otel/sqlalchemy_instrumentation_integration.py,sha256=CwGPd5mb4PasBDnSlulSqfaupN-kB8Wz6EBHWBDNuuo,3902
 letta/otel/tracing.py,sha256=kyLsl00Zka3z3uEnOZqgantHya_bsmpvulABYHvsUo8,10422
 letta/personas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -270,7 +270,7 @@ letta/schemas/letta_message_content.py,sha256=7FuholmKauP5Z-FJdsCH_-4IUGl_8jVqi5
 letta/schemas/letta_ping.py,sha256=9JphoKhWZ63JqsakIx4aaj8dYMtYVa7HxSkT5cMh5cI,863
 letta/schemas/letta_request.py,sha256=ll0QTt-tzaJ3zxpPyaifz7mtWcPy6QmvPUDOzngbxfQ,4526
 letta/schemas/letta_response.py,sha256=e6FcAhRX3heB0FoWAAozB3RJboMwi_JpelTdc5JupVA,8188
-letta/schemas/letta_stop_reason.py,sha256=n060NkGItD1OarfviHUW9Mp7tv2_6spW1wkPCCzjepU,2271
+letta/schemas/letta_stop_reason.py,sha256=6vF66Dsyzog3X_d2PjfQxJLyiDarlqJ-hG7NMJpxbuc,2349
 letta/schemas/llm_batch_job.py,sha256=xr7RmMc9ItmL344vcIn1MJaT2nOf0F7qEHrsXkQNFQI,3136
 letta/schemas/llm_config.py,sha256=8nyi9r3o3feh_hUy6pdRWp3E6M612xZhvV3gkFB4aqE,13642
 letta/schemas/llm_config_overrides.py,sha256=E6qJuVA8TwAAy3VjGitJ5jSQo5PbN-6VPcZOF5qhP9A,1815
@@ -337,7 +337,7 @@ letta/server/server.py,sha256=KFFbyl7Djn8CS0aPxz3jL8RwmXPr9nKY3wDu3ymUWjI,109265
 letta/server/startup.sh,sha256=z-Fea-7LiuS_aG1tJqS8JAsDQaamwC_kuDhv9D3PPPY,2698
 letta/server/utils.py,sha256=rRvW6L1lzau4u9boamiyZH54lf5tQ91ypXzUW9cfSPA,1667
 letta/server/rest_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-letta/server/rest_api/app.py,sha256=W3lCWe2iGumNIAyuDyH2sNO1EwGKqh7iSo82NXCXrVw,19270
+letta/server/rest_api/app.py,sha256=T3LLveXRJmfWqR0uEzoaLY8LXwYrwCQGb80XMbSCDUo,21172
 letta/server/rest_api/auth_token.py,sha256=725EFEIiNj4dh70hrSd94UysmFD8vcJLrTRfNHkzxDo,774
 letta/server/rest_api/chat_completions_interface.py,sha256=-7wO7pNBWXMqblVkJpuZ8JPJ-LjudLTtT6BJu-q_XAM,11138
 letta/server/rest_api/interface.py,sha256=X5NZ8oerDcipG9y1AfD92zJ_2TgVMO4eJ42RP82GFF8,70952
@@ -355,7 +355,7 @@ letta/server/rest_api/routers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5N
 letta/server/rest_api/routers/openai/chat_completions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/server/rest_api/routers/openai/chat_completions/chat_completions.py,sha256=ohM1i8BsNxTiw8duuRT5X_0tSUzBwctQM4fJ5DXURic,5157
 letta/server/rest_api/routers/v1/__init__.py,sha256=9MnEA7CgtIxyU_dDNG0jm-Ziqu1somBml-e5gKjgd9I,1997
-letta/server/rest_api/routers/v1/agents.py,sha256=2eo7EDXTpybTPfOvgEGhm81LenIJcXNxv-bf5CcqjkU,75673
+letta/server/rest_api/routers/v1/agents.py,sha256=2lGLtfgB2ZtAa0EgaiaDlNV0GJhAi_kQQy7XqXB2DG0,77771
 letta/server/rest_api/routers/v1/blocks.py,sha256=ykI77xnmIxPLqdAy5kzGyGw0w0ZRyVXn-O5Xcdj6-70,7690
 letta/server/rest_api/routers/v1/embeddings.py,sha256=PRaQlrmEXPiIdWsTbadrFsv3Afyv5oEFUdhgHA8FTi8,989
 letta/server/rest_api/routers/v1/folders.py,sha256=8Yb-bw2JdXBxMfrJNIZQk9_FKN2fet9Ccp8T83_c2sc,23539
@@ -397,7 +397,7 @@ letta/services/file_manager.py,sha256=d4uX8RblmqNGk1MsfeGzQ5uDWKVFP-AH63Jz5xOkj2
 letta/services/files_agents_manager.py,sha256=QJrJTgDn3RXUjZIGiIw4GQ5k2iKj-Wvzs-WQetpQ154,30059
 letta/services/group_manager.py,sha256=dD4DDHjOptMrtbWqw1ErlhpBqChw2ubLJdILjeLTY8I,29183
 letta/services/identity_manager.py,sha256=JI9Xc7EsBagSwDS2na4rFNhoO_LuaxlkVO_1oIK_ITQ,11841
-letta/services/job_manager.py,sha256=nDrnr_r8ELwf8KMKyRRrWHsysrTGldgCTplJdaSiNiQ,35543
+letta/services/job_manager.py,sha256=E-w9_4BMErMuqVf2dFlTPTobrvBKhPyyEDfuqLnbACI,35970
 letta/services/llm_batch_manager.py,sha256=iDzLFfmgpQooGY4zpN_w8q1SZ27fr2Cv6Ks3ltZErL8,20929
 letta/services/mcp_manager.py,sha256=QuvKQnwxMXrhiCaYlF50GZwXmbSU7PxmcOZ85sQ3t7I,47848
 letta/services/message_manager.py,sha256=tomsZidPT-I95sJsEsls-vj3qglehV7XNTs-m2zF8Bg,60629
@@ -470,8 +470,8 @@ letta/templates/sandbox_code_file_async.py.j2,sha256=lb7nh_P2W9VZHzU_9TxSCEMUod7
 letta/templates/summary_request_text.j2,sha256=ZttQwXonW2lk4pJLYzLK0pmo4EO4EtUUIXjgXKiizuc,842
 letta/templates/template_helper.py,sha256=HkG3zwRc5NVGmSTQu5PUTpz7LevK43bzXVaQuN8urf0,1634
 letta/types/__init__.py,sha256=hokKjCVFGEfR7SLMrtZsRsBfsC7yTIbgKPLdGg4K1eY,147
-letta_nightly-0.11.7.dev20250912104045.dist-info/METADATA,sha256=tqJlpOfovWrr9Go7iI1cwIOkAgFx0Qwf7JYX11vg2JI,24424
-letta_nightly-0.11.7.dev20250912104045.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-letta_nightly-0.11.7.dev20250912104045.dist-info/entry_points.txt,sha256=m-94Paj-kxiR6Ktu0us0_2qfhn29DzF2oVzqBE6cu8w,41
-letta_nightly-0.11.7.dev20250912104045.dist-info/licenses/LICENSE,sha256=mExtuZ_GYJgDEI38GWdiEYZizZS4KkVt2SF1g_GPNhI,10759
-letta_nightly-0.11.7.dev20250912104045.dist-info/RECORD,,
+letta_nightly-0.11.7.dev20250913103940.dist-info/METADATA,sha256=bBw5qZ0Uuj_L5Hs-2tji7ZiKtmp3V0LWaPeCiOkGV9A,24424
+letta_nightly-0.11.7.dev20250913103940.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+letta_nightly-0.11.7.dev20250913103940.dist-info/entry_points.txt,sha256=m-94Paj-kxiR6Ktu0us0_2qfhn29DzF2oVzqBE6cu8w,41
+letta_nightly-0.11.7.dev20250913103940.dist-info/licenses/LICENSE,sha256=mExtuZ_GYJgDEI38GWdiEYZizZS4KkVt2SF1g_GPNhI,10759
+letta_nightly-0.11.7.dev20250913103940.dist-info/RECORD,,

{letta_nightly-0.11.7.dev20250912104045.dist-info → letta_nightly-0.11.7.dev20250913103940.dist-info}/WHEEL RENAMED Viewed

File without changes

{letta_nightly-0.11.7.dev20250912104045.dist-info → letta_nightly-0.11.7.dev20250913103940.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{letta_nightly-0.11.7.dev20250912104045.dist-info → letta_nightly-0.11.7.dev20250913103940.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

letta-nightly 0.11.7.dev20250912104045__py3-none-any.whl → 0.11.7.dev20250913103940__py3-none-any.whl

letta-nightly 0.11.7.dev20250912104045py3-none-any.whl → 0.11.7.dev20250913103940py3-none-any.whl