PyPI - letta-nightly - Versions diffs - 0.11.7.dev20250912104045__py3-none-any.whl → 0.11.7.dev20250914103918__py3-none-any.whl - Mend

letta-nightly 0.11.7.dev20250912104045py3-none-any.whl → 0.11.7.dev20250914103918py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

letta/adapters/letta_llm_stream_adapter.py CHANGED Viewed

@@ -149,7 +149,7 @@ class LettaLLMStreamAdapter(LettaLLMAdapter):
                     request_json=self.request_data,
                     response_json={
                         "content": {
-                            "tool_call": self.tool_call.model_dump_json(),
+                            "tool_call": self.tool_call.model_dump_json() if self.tool_call else None,
                             "reasoning": [content.model_dump_json() for content in self.reasoning_content],
                         },
                         "id": self.interface.message_id,

letta/agents/letta_agent_v2.py CHANGED Viewed

@@ -19,7 +19,7 @@ from letta.agents.helpers import (
     generate_step_id,
 )
 from letta.constants import DEFAULT_MAX_STEPS, NON_USER_MSG_PREFIX
-from letta.errors import ContextWindowExceededError
+from letta.errors import ContextWindowExceededError, LLMError
 from letta.helpers import ToolRulesSolver
 from letta.helpers.datetime_helpers import get_utc_time, get_utc_timestamp_ns, ns_to_ms
 from letta.helpers.reasoning_helper import scrub_inner_thoughts_from_messages
@@ -306,7 +306,7 @@ class LettaAgentV2(BaseAgentV2):
                 )
         except:
-            if self.stop_reason:
+            if self.stop_reason and not first_chunk:
                 yield f"data: {self.stop_reason.model_dump_json()}\n\n"
             raise
@@ -431,6 +431,9 @@ class LettaAgentV2(BaseAgentV2):
                     except ValueError as e:
                         self.stop_reason = LettaStopReason(stop_reason=StopReasonType.invalid_llm_response.value)
                         raise e
+                    except LLMError as e:
+                        self.stop_reason = LettaStopReason(stop_reason=StopReasonType.llm_api_error.value)
+                        raise e
                     except Exception as e:
                         if isinstance(e, ContextWindowExceededError) and llm_request_attempt < summarizer_settings.max_summarizer_retries:
                             # Retry case
@@ -497,6 +500,17 @@ class LettaAgentV2(BaseAgentV2):
                     if include_return_message_types is None or message.message_type in include_return_message_types:
                         yield message
+            # Persist approval responses immediately to prevent agent from getting into a bad state
+            if (
+                len(input_messages_to_persist) == 1
+                and input_messages_to_persist[0].role == "approval"
+                and persisted_messages[0].role == "approval"
+                and persisted_messages[1].role == "tool"
+            ):
+                self.agent_state.message_ids = self.agent_state.message_ids + [m.id for m in persisted_messages[:2]]
+                await self.agent_manager.update_message_ids_async(
+                    agent_id=self.agent_state.id, message_ids=self.agent_state.message_ids, actor=self.actor
+                )
             step_progression, step_metrics = await self._step_checkpoint_finish(step_metrics, agent_step_span, logged_step)
         except Exception as e:
             self.logger.error(f"Error during step processing: {e}")
@@ -511,6 +525,7 @@ class LettaAgentV2(BaseAgentV2):
                 StopReasonType.no_tool_call,
                 StopReasonType.invalid_tool_call,
                 StopReasonType.invalid_llm_response,
+                StopReasonType.llm_api_error,
             ):
                 self.logger.error("Error occurred during step processing, with unexpected stop reason: %s", self.stop_reason.stop_reason)
             raise e

letta/interfaces/openai_streaming_interface.py CHANGED Viewed

@@ -24,7 +24,11 @@ from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
 from letta.schemas.message import Message
 from letta.schemas.openai.chat_completion_response import FunctionCall, ToolCall
 from letta.server.rest_api.json_parser import OptimisticJSONParser
-from letta.streaming_utils import FunctionArgumentsStreamHandler, JSONInnerThoughtsExtractor
+from letta.streaming_utils import (
+    FunctionArgumentsStreamHandler,
+    JSONInnerThoughtsExtractor,
+    sanitize_streamed_message_content,
+)
 from letta.utils import count_tokens
 logger = get_logger(__name__)
@@ -332,8 +336,15 @@ class OpenAIStreamingInterface:
                                 self.last_flushed_function_name is not None
                                 and self.last_flushed_function_name == self.assistant_message_tool_name
                             ):
-                                # Minimal, robust extraction: only emit the value of "message"
-                                extracted = self.assistant_message_json_reader.process_json_chunk(tool_call.function.arguments)
+                                # Minimal, robust extraction: only emit the value of "message".
+                                # If we buffered a prefix while name was streaming, feed it first.
+                                if self.function_args_buffer:
+                                    payload = self.function_args_buffer + tool_call.function.arguments
+                                    self.function_args_buffer = None
+                                else:
+                                    payload = tool_call.function.arguments
+                                extracted = self.assistant_message_json_reader.process_json_chunk(payload)
+                                extracted = sanitize_streamed_message_content(extracted or "")
                                 if extracted:
                                     if prev_message_type and prev_message_type != "assistant_message":
                                         message_index += 1

letta/llm_api/anthropic_client.py CHANGED Viewed

@@ -497,7 +497,7 @@ class AnthropicClient(LLMClientBase):
                         try:
                             args_json = json.loads(arguments)
                             if not isinstance(args_json, dict):
-                                raise ValueError("Expected parseable json object for arguments")
+                                raise LLMServerError("Expected parseable json object for arguments")
                         except:
                             arguments = str(tool_input["function"]["arguments"])
                     else:
@@ -854,7 +854,7 @@ def remap_finish_reason(stop_reason: str) -> str:
     elif stop_reason == "tool_use":
         return "function_call"
     else:
-        raise ValueError(f"Unexpected stop_reason: {stop_reason}")
+        raise LLMServerError(f"Unexpected stop_reason: {stop_reason}")
 def strip_xml_tags(string: str, tag: Optional[str]) -> str:

letta/llm_api/azure_client.py CHANGED Viewed

@@ -54,9 +54,12 @@ class AzureClient(OpenAIClient):
             api_key = model_settings.azure_api_key or os.environ.get("AZURE_API_KEY")
             base_url = model_settings.azure_base_url or os.environ.get("AZURE_BASE_URL")
             api_version = model_settings.azure_api_version or os.environ.get("AZURE_API_VERSION")
+        try:
+            client = AsyncAzureOpenAI(api_key=api_key, azure_endpoint=base_url, api_version=api_version)
+            response: ChatCompletion = await client.chat.completions.create(**request_data)
+        except Exception as e:
+            raise self.handle_llm_error(e)
-        client = AsyncAzureOpenAI(api_key=api_key, azure_endpoint=base_url, api_version=api_version)
-        response: ChatCompletion = await client.chat.completions.create(**request_data)
         return response.model_dump()
     @trace_method

letta/llm_api/google_vertex_client.py CHANGED Viewed

@@ -14,6 +14,19 @@ from google.genai.types import (
 )
 from letta.constants import NON_USER_MSG_PREFIX
+from letta.errors import (
+    ContextWindowExceededError,
+    ErrorCode,
+    LLMAuthenticationError,
+    LLMBadRequestError,
+    LLMConnectionError,
+    LLMNotFoundError,
+    LLMPermissionDeniedError,
+    LLMRateLimitError,
+    LLMServerError,
+    LLMTimeoutError,
+    LLMUnprocessableEntityError,
+)
 from letta.helpers.datetime_helpers import get_utc_time_int
 from letta.helpers.json_helpers import json_dumps, json_loads
 from letta.llm_api.llm_client_base import LLMClientBase
@@ -48,13 +61,16 @@ class GoogleVertexClient(LLMClientBase):
         """
         Performs underlying request to llm and returns raw response.
         """
-        client = self._get_client()
-        response = client.models.generate_content(
-            model=llm_config.model,
-            contents=request_data["contents"],
-            config=request_data["config"],
-        )
-        return response.model_dump()
+        try:
+            client = self._get_client()
+            response = client.models.generate_content(
+                model=llm_config.model,
+                contents=request_data["contents"],
+                config=request_data["config"],
+            )
+            return response.model_dump()
+        except Exception as e:
+            raise self.handle_llm_error(e)
     @trace_method
     async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
@@ -77,15 +93,15 @@ class GoogleVertexClient(LLMClientBase):
                 )
             except errors.APIError as e:
                 # Retry on 503 and 500 errors as well, usually ephemeral from Gemini
-                if e.code == 503 or e.code == 500:
+                if e.code == 503 or e.code == 500 or e.code == 504:
                     logger.warning(f"Received {e}, retrying {retry_count}/{self.MAX_RETRIES}")
                     retry_count += 1
                     if retry_count > self.MAX_RETRIES:
-                        raise e
+                        raise self.handle_llm_error(e)
                     continue
-                raise e
+                raise self.handle_llm_error(e)
             except Exception as e:
-                raise e
+                raise self.handle_llm_error(e)
             response_data = response.model_dump()
             is_malformed_function_call = self.is_malformed_function_call(response_data)
             if is_malformed_function_call:
@@ -363,11 +379,10 @@ class GoogleVertexClient(LLMClientBase):
                 if content is None or content.role is None or content.parts is None:
                     # This means the response is malformed like MALFORMED_FUNCTION_CALL
-                    # NOTE: must be a ValueError to trigger a retry
                     if candidate.finish_reason == "MALFORMED_FUNCTION_CALL":
-                        raise ValueError(f"Error in response data from LLM: {candidate.finish_reason}")
+                        raise LLMServerError(f"Malformed response from Google Vertex: {candidate.finish_reason}")
                     else:
-                        raise ValueError(f"Error in response data from LLM: {candidate.model_dump()}")
+                        raise LLMServerError(f"Invalid response data from Google Vertex: {candidate.model_dump()}")
                 role = content.role
                 assert role == "model", f"Unknown role in response: {role}"
@@ -461,7 +476,7 @@ class GoogleVertexClient(LLMClientBase):
                         except json.decoder.JSONDecodeError:
                             if candidate.finish_reason == "MAX_TOKENS":
-                                raise ValueError("Could not parse response data from LLM: exceeded max token limit")
+                                raise LLMServerError("Could not parse response data from LLM: exceeded max token limit")
                             # Inner thoughts are the content by default
                             inner_thoughts = response_message.text
@@ -490,7 +505,7 @@ class GoogleVertexClient(LLMClientBase):
                     elif finish_reason == "RECITATION":
                         openai_finish_reason = "content_filter"
                     else:
-                        raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
+                        raise LLMServerError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
                     choices.append(
                         Choice(
@@ -581,5 +596,127 @@ class GoogleVertexClient(LLMClientBase):
     @trace_method
     def handle_llm_error(self, e: Exception) -> Exception:
-        # Fallback to base implementation
+        # Handle Google GenAI specific errors
+        if isinstance(e, errors.ClientError):
+            logger.warning(f"[Google Vertex] Client error ({e.code}): {e}")
+            # Handle specific error codes
+            if e.code == 400:
+                error_str = str(e).lower()
+                if "context" in error_str and ("exceed" in error_str or "limit" in error_str or "too long" in error_str):
+                    return ContextWindowExceededError(
+                        message=f"Bad request to Google Vertex (context window exceeded): {str(e)}",
+                    )
+                else:
+                    return LLMBadRequestError(
+                        message=f"Bad request to Google Vertex: {str(e)}",
+                        code=ErrorCode.INTERNAL_SERVER_ERROR,
+                    )
+            elif e.code == 401:
+                return LLMAuthenticationError(
+                    message=f"Authentication failed with Google Vertex: {str(e)}",
+                    code=ErrorCode.INTERNAL_SERVER_ERROR,
+                )
+            elif e.code == 403:
+                return LLMPermissionDeniedError(
+                    message=f"Permission denied by Google Vertex: {str(e)}",
+                    code=ErrorCode.INTERNAL_SERVER_ERROR,
+                )
+            elif e.code == 404:
+                return LLMNotFoundError(
+                    message=f"Resource not found in Google Vertex: {str(e)}",
+                    code=ErrorCode.INTERNAL_SERVER_ERROR,
+                )
+            elif e.code == 408:
+                return LLMTimeoutError(
+                    message=f"Request to Google Vertex timed out: {str(e)}",
+                    code=ErrorCode.TIMEOUT,
+                    details={"cause": str(e.__cause__) if e.__cause__ else None},
+                )
+            elif e.code == 422:
+                return LLMUnprocessableEntityError(
+                    message=f"Invalid request content for Google Vertex: {str(e)}",
+                    code=ErrorCode.INTERNAL_SERVER_ERROR,
+                )
+            elif e.code == 429:
+                logger.warning("[Google Vertex] Rate limited (429). Consider backoff.")
+                return LLMRateLimitError(
+                    message=f"Rate limited by Google Vertex: {str(e)}",
+                    code=ErrorCode.RATE_LIMIT_EXCEEDED,
+                )
+            else:
+                return LLMServerError(
+                    message=f"Google Vertex client error: {str(e)}",
+                    code=ErrorCode.INTERNAL_SERVER_ERROR,
+                    details={
+                        "status_code": e.code,
+                        "response_json": getattr(e, "response_json", None),
+                    },
+                )
+        if isinstance(e, errors.ServerError):
+            logger.warning(f"[Google Vertex] Server error ({e.code}): {e}")
+            # Handle specific server error codes
+            if e.code == 500:
+                return LLMServerError(
+                    message=f"Google Vertex internal server error: {str(e)}",
+                    code=ErrorCode.INTERNAL_SERVER_ERROR,
+                    details={
+                        "status_code": e.code,
+                        "response_json": getattr(e, "response_json", None),
+                    },
+                )
+            elif e.code == 502:
+                return LLMConnectionError(
+                    message=f"Bad gateway from Google Vertex: {str(e)}",
+                    code=ErrorCode.INTERNAL_SERVER_ERROR,
+                    details={"cause": str(e.__cause__) if e.__cause__ else None},
+                )
+            elif e.code == 503:
+                return LLMServerError(
+                    message=f"Google Vertex service unavailable: {str(e)}",
+                    code=ErrorCode.INTERNAL_SERVER_ERROR,
+                    details={
+                        "status_code": e.code,
+                        "response_json": getattr(e, "response_json", None),
+                    },
+                )
+            elif e.code == 504:
+                return LLMTimeoutError(
+                    message=f"Gateway timeout from Google Vertex: {str(e)}",
+                    code=ErrorCode.TIMEOUT,
+                    details={"cause": str(e.__cause__) if e.__cause__ else None},
+                )
+            else:
+                return LLMServerError(
+                    message=f"Google Vertex server error: {str(e)}",
+                    code=ErrorCode.INTERNAL_SERVER_ERROR,
+                    details={
+                        "status_code": e.code,
+                        "response_json": getattr(e, "response_json", None),
+                    },
+                )
+        if isinstance(e, errors.APIError):
+            logger.warning(f"[Google Vertex] API error ({e.code}): {e}")
+            return LLMServerError(
+                message=f"Google Vertex API error: {str(e)}",
+                code=ErrorCode.INTERNAL_SERVER_ERROR,
+                details={
+                    "status_code": e.code,
+                    "response_json": getattr(e, "response_json", None),
+                },
+            )
+        # Handle connection-related errors
+        if "connection" in str(e).lower() or "timeout" in str(e).lower():
+            logger.warning(f"[Google Vertex] Connection/timeout error: {e}")
+            return LLMConnectionError(
+                message=f"Failed to connect to Google Vertex: {str(e)}",
+                code=ErrorCode.INTERNAL_SERVER_ERROR,
+                details={"cause": str(e.__cause__) if e.__cause__ else None},
+            )
+        # Fallback to base implementation for other errors
         return super().handle_llm_error(e)

letta/llm_api/openai_client.py CHANGED Viewed

@@ -99,7 +99,7 @@ def supports_structured_output(llm_config: LLMConfig) -> bool:
     # FIXME pretty hacky - turn off for providers we know users will use,
     #       but also don't support structured output
-    if "nebius.com" in llm_config.model_endpoint:
+    if llm_config.model_endpoint and "nebius.com" in llm_config.model_endpoint:
         return False
     else:
         return True
@@ -108,7 +108,7 @@ def supports_structured_output(llm_config: LLMConfig) -> bool:
 # TODO move into LLMConfig as a field?
 def requires_auto_tool_choice(llm_config: LLMConfig) -> bool:
     """Certain providers require the tool choice to be set to 'auto'."""
-    if "nebius.com" in llm_config.model_endpoint:
+    if llm_config.model_endpoint and "nebius.com" in llm_config.model_endpoint:
         return True
     if llm_config.handle and "vllm" in llm_config.handle:
         return True
@@ -168,7 +168,9 @@ class OpenAIClient(LLMClientBase):
             # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
             # TODO(fix)
             inner_thoughts_desc = (
-                INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST if ":1234" in llm_config.model_endpoint else INNER_THOUGHTS_KWARG_DESCRIPTION
+                INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
+                if llm_config.model_endpoint and ":1234" in llm_config.model_endpoint
+                else INNER_THOUGHTS_KWARG_DESCRIPTION
             )
             tools = add_inner_thoughts_to_functions(
                 functions=tools,

letta/otel/sqlalchemy_instrumentation.py CHANGED Viewed

@@ -146,11 +146,16 @@ def _instrument_engine_events(engine: Engine) -> None:
             span.end()
             context._sync_instrumentation_span = None
-    def handle_cursor_error(conn, cursor, statement, parameters, context, executemany):
+    def handle_cursor_error(exception_context):
         """Handle cursor execution errors."""
         if not _config["enabled"]:
             return
+        # Extract context from exception_context
+        context = getattr(exception_context, "execution_context", None)
+        if not context:
+            return
         span = getattr(context, "_sync_instrumentation_span", None)
         if span:
             span.set_status(Status(StatusCode.ERROR, "Database operation failed"))

letta/schemas/letta_stop_reason.py CHANGED Viewed

@@ -9,6 +9,7 @@ from letta.schemas.enums import JobStatus
 class StopReasonType(str, Enum):
     end_turn = "end_turn"
     error = "error"
+    llm_api_error = "llm_api_error"
     invalid_llm_response = "invalid_llm_response"
     invalid_tool_call = "invalid_tool_call"
     max_steps = "max_steps"
@@ -31,6 +32,7 @@ class StopReasonType(str, Enum):
             StopReasonType.invalid_tool_call,
             StopReasonType.no_tool_call,
             StopReasonType.invalid_llm_response,
+            StopReasonType.llm_api_error,
         ):
             return JobStatus.failed
         elif self == StopReasonType.cancelled:

letta/server/rest_api/app.py CHANGED Viewed

@@ -17,7 +17,15 @@ from starlette.middleware.cors import CORSMiddleware
 from letta.__init__ import __version__ as letta_version
 from letta.agents.exceptions import IncompatibleAgentType
 from letta.constants import ADMIN_PREFIX, API_PREFIX, OPENAI_API_PREFIX
-from letta.errors import BedrockPermissionError, LettaAgentNotFoundError, LettaUserNotFoundError
+from letta.errors import (
+    BedrockPermissionError,
+    LettaAgentNotFoundError,
+    LettaUserNotFoundError,
+    LLMAuthenticationError,
+    LLMError,
+    LLMRateLimitError,
+    LLMTimeoutError,
+)
 from letta.helpers.pinecone_utils import get_pinecone_indices, should_use_pinecone, upsert_pinecone_indices
 from letta.jobs.scheduler import start_scheduler_with_leader_election
 from letta.log import get_logger
@@ -276,6 +284,58 @@ def create_application() -> "FastAPI":
             },
         )
+    @app.exception_handler(LLMTimeoutError)
+    async def llm_timeout_error_handler(request: Request, exc: LLMTimeoutError):
+        return JSONResponse(
+            status_code=504,
+            content={
+                "error": {
+                    "type": "llm_timeout",
+                    "message": "The LLM request timed out. Please try again.",
+                    "detail": str(exc),
+                }
+            },
+        )
+    @app.exception_handler(LLMRateLimitError)
+    async def llm_rate_limit_error_handler(request: Request, exc: LLMRateLimitError):
+        return JSONResponse(
+            status_code=429,
+            content={
+                "error": {
+                    "type": "llm_rate_limit",
+                    "message": "Rate limit exceeded for LLM model provider. Please wait before making another request.",
+                    "detail": str(exc),
+                }
+            },
+        )
+    @app.exception_handler(LLMAuthenticationError)
+    async def llm_auth_error_handler(request: Request, exc: LLMAuthenticationError):
+        return JSONResponse(
+            status_code=401,
+            content={
+                "error": {
+                    "type": "llm_authentication",
+                    "message": "Authentication failed with the LLM model provider.",
+                    "detail": str(exc),
+                }
+            },
+        )
+    @app.exception_handler(LLMError)
+    async def llm_error_handler(request: Request, exc: LLMError):
+        return JSONResponse(
+            status_code=502,
+            content={
+                "error": {
+                    "type": "llm_error",
+                    "message": "An error occurred with the LLM request.",
+                    "detail": str(exc),
+                }
+            },
+        )
     settings.cors_origins.append("https://app.letta.com")
     if (os.getenv("LETTA_SERVER_SECURE") == "true") or "--secure" in sys.argv:

letta/server/rest_api/interface.py CHANGED Viewed

@@ -808,86 +808,33 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                         # If there was nothing in the name buffer, we can proceed to
                         # output the arguments chunk as a ToolCallMessage
                         else:
-                            # use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
+                            # use_assistant_message means we should emit only the value of "message"
                             if self.use_assistant_message and (
                                 self.last_flushed_function_name is not None
                                 and self.last_flushed_function_name == self.assistant_message_tool_name
                             ):
-                                # do an additional parse on the updates_main_json
-                                if self.function_args_buffer:
-                                    updates_main_json = self.function_args_buffer + updates_main_json
-                                    self.function_args_buffer = None
-                                    # Pretty gross hardcoding that assumes that if we're toggling into the keywords, we have the full prefix
-                                    match_str = '{"' + self.assistant_message_tool_kwarg + '":"'
-                                    if updates_main_json == match_str:
-                                        updates_main_json = None
-                                else:
-                                    # Some hardcoding to strip off the trailing "}"
-                                    if updates_main_json in ["}", '"}']:
-                                        updates_main_json = None
-                                    if updates_main_json and len(updates_main_json) > 0 and updates_main_json[-1:] == '"':
-                                        updates_main_json = updates_main_json[:-1]
-                                if not updates_main_json:
-                                    # early exit to turn into content mode
+                                # Feed any buffered prefix first to avoid missing the start of the value
+                                payload = (self.function_args_buffer or "") + (updates_main_json or "")
+                                self.function_args_buffer = None
+                                cleaned = self.streaming_chat_completion_json_reader.process_json_chunk(payload)
+                                from letta.streaming_utils import sanitize_streamed_message_content
+                                cleaned = sanitize_streamed_message_content(cleaned or "")
+                                if not cleaned:
                                     return None
-                                # There may be a buffer from a previous chunk, for example
-                                # if the previous chunk had arguments but we needed to flush name
-                                if self.function_args_buffer:
-                                    # In this case, we should release the buffer + new data at once
-                                    combined_chunk = self.function_args_buffer + updates_main_json
-                                    if prev_message_type and prev_message_type != "assistant_message":
-                                        message_index += 1
-                                    processed_chunk = AssistantMessage(
-                                        id=message_id,
-                                        date=message_date,
-                                        content=combined_chunk,
-                                        name=name,
-                                        otid=Message.generate_otid_from_id(message_id, message_index),
-                                    )
-                                    # Store the ID of the tool call so allow skipping the corresponding response
-                                    if self.function_id_buffer:
-                                        self.prev_assistant_message_id = self.function_id_buffer
-                                    # clear buffer
-                                    self.function_args_buffer = None
-                                    self.function_id_buffer = None
-                                else:
-                                    # If there's no buffer to clear, just output a new chunk with new data
-                                    # TODO: THIS IS HORRIBLE
-                                    # TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
-                                    # TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
-                                    parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
-                                    if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
-                                        self.assistant_message_tool_kwarg
-                                    ) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
-                                        new_content = parsed_args.get(self.assistant_message_tool_kwarg)
-                                        prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
-                                        # TODO: Assumes consistent state and that prev_content is subset of new_content
-                                        diff = new_content.replace(prev_content, "", 1)
-                                        self.current_json_parse_result = parsed_args
-                                        if prev_message_type and prev_message_type != "assistant_message":
-                                            message_index += 1
-                                        processed_chunk = AssistantMessage(
-                                            id=message_id,
-                                            date=message_date,
-                                            content=diff,
-                                            name=name,
-                                            otid=Message.generate_otid_from_id(message_id, message_index),
-                                        )
-                                    else:
-                                        return None
-                                    # Store the ID of the tool call so allow skipping the corresponding response
-                                    if self.function_id_buffer:
-                                        self.prev_assistant_message_id = self.function_id_buffer
-                                    # clear buffers
-                                    self.function_id_buffer = None
+                                if prev_message_type and prev_message_type != "assistant_message":
+                                    message_index += 1
+                                processed_chunk = AssistantMessage(
+                                    id=message_id,
+                                    date=message_date,
+                                    content=cleaned,
+                                    name=name,
+                                    otid=Message.generate_otid_from_id(message_id, message_index),
+                                )
+                                # Store the ID of the tool call so allow skipping the corresponding response
+                                if self.function_id_buffer:
+                                    self.prev_assistant_message_id = self.function_id_buffer
+                                # Do not clear function_id_buffer here — we may still need it
                             else:
                                 # There may be a buffer from a previous chunk, for example
                                 # if the previous chunk had arguments but we needed to flush name

letta/server/rest_api/routers/v1/agents.py CHANGED Viewed

@@ -536,9 +536,7 @@ async def attach_source(
     if agent_state.enable_sleeptime:
         source = await server.source_manager.get_source_by_id(source_id=source_id)
-        safe_create_task(
-            server.sleeptime_document_ingest_async(agent_state, source, actor), logger=logger, label="sleeptime_document_ingest_async"
-        )
+        safe_create_task(server.sleeptime_document_ingest_async(agent_state, source, actor), label="sleeptime_document_ingest_async")
     return agent_state
@@ -565,9 +563,7 @@ async def attach_folder_to_agent(
     if agent_state.enable_sleeptime:
         source = await server.source_manager.get_source_by_id(source_id=folder_id)
-        safe_create_task(
-            server.sleeptime_document_ingest_async(agent_state, source, actor), logger=logger, label="sleeptime_document_ingest_async"
-        )
+        safe_create_task(server.sleeptime_document_ingest_async(agent_state, source, actor), label="sleeptime_document_ingest_async")
     return agent_state
@@ -1320,15 +1316,55 @@ async def send_message_streaming(
     try:
         if agent_eligible and model_compatible:
             agent_loop = AgentLoop.load(agent_state=agent, actor=actor)
-            raw_stream = agent_loop.stream(
-                input_messages=request.messages,
-                max_steps=request.max_steps,
-                stream_tokens=request.stream_tokens and model_compatible_token_streaming,
-                run_id=run.id if run else None,
-                use_assistant_message=request.use_assistant_message,
-                request_start_timestamp_ns=request_start_timestamp_ns,
-                include_return_message_types=request.include_return_message_types,
-            )
+            async def error_aware_stream():
+                """Stream that handles early LLM errors gracefully in streaming format."""
+                from letta.errors import LLMAuthenticationError, LLMError, LLMRateLimitError, LLMTimeoutError
+                try:
+                    stream = agent_loop.stream(
+                        input_messages=request.messages,
+                        max_steps=request.max_steps,
+                        stream_tokens=request.stream_tokens and model_compatible_token_streaming,
+                        run_id=run.id if run else None,
+                        use_assistant_message=request.use_assistant_message,
+                        request_start_timestamp_ns=request_start_timestamp_ns,
+                        include_return_message_types=request.include_return_message_types,
+                    )
+                    async for chunk in stream:
+                        yield chunk
+                except LLMTimeoutError as e:
+                    error_data = {
+                        "error": {"type": "llm_timeout", "message": "The LLM request timed out. Please try again.", "detail": str(e)}
+                    }
+                    yield (f"data: {json.dumps(error_data)}\n\n", 504)
+                except LLMRateLimitError as e:
+                    error_data = {
+                        "error": {
+                            "type": "llm_rate_limit",
+                            "message": "Rate limit exceeded for LLM model provider. Please wait before making another request.",
+                            "detail": str(e),
+                        }
+                    }
+                    yield (f"data: {json.dumps(error_data)}\n\n", 429)
+                except LLMAuthenticationError as e:
+                    error_data = {
+                        "error": {
+                            "type": "llm_authentication",
+                            "message": "Authentication failed with the LLM model provider.",
+                            "detail": str(e),
+                        }
+                    }
+                    yield (f"data: {json.dumps(error_data)}\n\n", 401)
+                except LLMError as e:
+                    error_data = {"error": {"type": "llm_error", "message": "An error occurred with the LLM request.", "detail": str(e)}}
+                    yield (f"data: {json.dumps(error_data)}\n\n", 502)
+                except Exception as e:
+                    error_data = {"error": {"type": "internal_error", "message": "An internal server error occurred.", "detail": str(e)}}
+                    yield (f"data: {json.dumps(error_data)}\n\n", 500)
+            raw_stream = error_aware_stream()
             from letta.server.rest_api.streaming_response import StreamingResponseWithStatusCode, add_keepalive_to_stream

letta/services/job_manager.py CHANGED Viewed

@@ -218,8 +218,17 @@ class JobManager:
         """
         try:
             job_update_builder = partial(JobUpdate, status=new_status)
+            # If metadata is provided, merge it with existing metadata
             if metadata:
-                job_update_builder = partial(job_update_builder, metadata=metadata)
+                # Get the current job to access existing metadata
+                current_job = await self.get_job_by_id_async(job_id=job_id, actor=actor)
+                merged_metadata = {}
+                if current_job.metadata:
+                    merged_metadata.update(current_job.metadata)
+                merged_metadata.update(metadata)
+                job_update_builder = partial(job_update_builder, metadata=merged_metadata)
             if new_status.is_terminal:
                 job_update_builder = partial(job_update_builder, completed_at=get_utc_time())

letta/streaming_utils.py CHANGED Viewed

@@ -264,39 +264,100 @@ class FunctionArgumentsStreamHandler:
     def process_json_chunk(self, chunk: str) -> Optional[str]:
         """Process a chunk from the function arguments and return the plaintext version"""
-        # Use strip to handle only leading and trailing whitespace in control structures
-        if self.accumulating:
-            clean_chunk = chunk.strip()
-            if self.json_key in self.key_buffer:
-                if ":" in clean_chunk:
-                    self.in_message = True
-                    self.accumulating = False
-                    return None
+        clean_chunk = chunk.strip()
+        # Not in message yet: accumulate until we see '<json_key>': (robust to split fragments)
+        if not self.in_message:
+            if clean_chunk == "{":
+                self.key_buffer = ""
+                self.accumulating = True
+                return None
             self.key_buffer += clean_chunk
+            if self.json_key in self.key_buffer and ":" in clean_chunk:
+                # Enter value mode; attempt to extract inline content if it exists in this same chunk
+                self.in_message = True
+                self.accumulating = False
+                # Try to find the first quote after the colon within the original (unstripped) chunk
+                s = chunk
+                colon_idx = s.find(":")
+                if colon_idx != -1:
+                    q_idx = s.find('"', colon_idx + 1)
+                    if q_idx != -1:
+                        self.message_started = True
+                        rem = s[q_idx + 1 :]
+                        # Check if this same chunk also contains the terminating quote (and optional delimiter)
+                        j = len(rem) - 1
+                        while j >= 0 and rem[j] in " \t\r\n":
+                            j -= 1
+                        if j >= 1 and rem[j - 1] == '"' and rem[j] in ",}]":
+                            out = rem[: j - 1]
+                            self.in_message = False
+                            self.message_started = False
+                            return out
+                        if j >= 0 and rem[j] == '"':
+                            out = rem[:j]
+                            self.in_message = False
+                            self.message_started = False
+                            return out
+                        # No terminator yet; emit remainder as content
+                        return rem
+                return None
+            if clean_chunk == "}":
+                self.in_message = False
+                self.message_started = False
+                self.key_buffer = ""
             return None
+        # Inside message value
         if self.in_message:
-            if chunk.strip() == '"' and self.message_started:
+            # Bare opening/closing quote tokens
+            if clean_chunk == '"' and self.message_started:
                 self.in_message = False
                 self.message_started = False
                 return None
-            if not self.message_started and chunk.strip() == '"':
+            if not self.message_started and clean_chunk == '"':
                 self.message_started = True
                 return None
             if self.message_started:
-                if chunk.strip().endswith('"'):
+                # Detect closing patterns: '"', '",', '"}' (with optional whitespace)
+                i = len(chunk) - 1
+                while i >= 0 and chunk[i] in " \t\r\n":
+                    i -= 1
+                if i >= 1 and chunk[i - 1] == '"' and chunk[i] in ",}]":
+                    out = chunk[: i - 1]
                     self.in_message = False
-                    return chunk.rstrip('"\n')
+                    self.message_started = False
+                    return out
+                if i >= 0 and chunk[i] == '"':
+                    out = chunk[:i]
+                    self.in_message = False
+                    self.message_started = False
+                    return out
+                # Otherwise, still mid-string
                 return chunk
-        if chunk.strip() == "{":
-            self.key_buffer = ""
-            self.accumulating = True
-            return None
-        if chunk.strip() == "}":
+        if clean_chunk == "}":
             self.in_message = False
             self.message_started = False
+            self.key_buffer = ""
             return None
         return None
+def sanitize_streamed_message_content(text: str) -> str:
+    """Remove trailing JSON delimiters that can leak into assistant text.
+    Specifically handles cases where a message string is immediately followed
+    by a JSON delimiter in the stream (e.g., '"', '",', '"}', '" ]').
+    Internal commas inside the message are preserved.
+    """
+    if not text:
+        return text
+    t = text.rstrip()
+    # strip trailing quote + delimiter
+    if len(t) >= 2 and t[-2] == '"' and t[-1] in ",}]":
+        return t[:-2]
+    # strip lone trailing quote
+    if t.endswith('"'):
+        return t[:-1]
+    return t

{letta_nightly-0.11.7.dev20250912104045.dist-info → letta_nightly-0.11.7.dev20250914103918.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: letta-nightly
-Version: 0.11.7.dev20250912104045
+Version: 0.11.7.dev20250914103918
 Summary: Create LLM agents with long-term memory and custom tools
 Author-email: Letta Team <contact@letta.com>
 License: Apache License

{letta_nightly-0.11.7.dev20250912104045.dist-info → letta_nightly-0.11.7.dev20250914103918.dist-info}/RECORD RENAMED Viewed

@@ -11,12 +11,12 @@ letta/memory.py,sha256=l5iNhLAR_xzgTb0GBlQx4SVgH8kuZh8siJdC_CFPKEs,4278
 letta/pytest.ini,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/settings.py,sha256=QEjNUwRXGBgsQpQAs2kksQmGN5CbxKlxPPydrklx_Ms,15011
 letta/streaming_interface.py,sha256=rPMfwUcjqITWk2tVqFQm1hmP99tU2IOHg9gU2dgPSo8,16400
-letta/streaming_utils.py,sha256=_UhLa0EtUkd6WL_oBYIU65tDcJ9jf3uWEHuzfQ4HCa8,13769
+letta/streaming_utils.py,sha256=ZRFGFpQqn9ujCEbgZdLM7yTjiuNNvqQ47sNhV8ix-yQ,16553
 letta/system.py,sha256=kHF7n3Viq7gV5UIUEXixod2gWa2jroUgztpEzMC1Sew,8925
 letta/utils.py,sha256=bSq3St7MUw9gN1g0ICdOhNNaUFYBC3EfJLG6qsRLSFA,43290
 letta/adapters/letta_llm_adapter.py,sha256=11wkOkEQfPXUuJoJxbK22wCa-8gnWiDAb3UOXOxLt5U,3427
 letta/adapters/letta_llm_request_adapter.py,sha256=wJhK5M_qOhRPAhgMmYI7EJcM8Op19tClnXe0kJ29a3Q,4831
-letta/adapters/letta_llm_stream_adapter.py,sha256=Q6nFr8uKc1DyAHHiHxHGNmqhRIScEKXO3TwsBgqW5QI,7630
+letta/adapters/letta_llm_stream_adapter.py,sha256=G8IqtXor0LUuW-dKtGJWsUt6DfJreVCn5h6W2lHEPBg,7658
 letta/agents/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/agents/agent_loop.py,sha256=cTSlGt1g9aZWG5vIMYtzdeJG1UcrqfjpLGmZU6j89zU,854
 letta/agents/base_agent.py,sha256=rUAcPxWmTnmi50AWOXwrWc-v5sPIod0W_xXaPQShjcE,8540
@@ -27,7 +27,7 @@ letta/agents/exceptions.py,sha256=BQY4D4w32OYHM63CM19ko7dPwZiAzUs3NbKvzmCTcJg,31
 letta/agents/helpers.py,sha256=eCHsvZEkTe0L_uZHYkfNAztsEJW0FTnKZMgVbqlI0Yg,11618
 letta/agents/letta_agent.py,sha256=6nRTh5kzUpqK7eNMk4DlcgEoPmDxFmRb5ysoVHa-vh8,99488
 letta/agents/letta_agent_batch.py,sha256=17RpYVXpGh9dlKxdMOLMCOHWFsi6N5S9FJHxooxkJCI,27998
-letta/agents/letta_agent_v2.py,sha256=Xs54mewx9SgHHFAz8uLJ_6OHv9RHU1PtkwAB_Pu0XMk,58992
+letta/agents/letta_agent_v2.py,sha256=vMzVZL6Px5XBODEh3BHbbADtBSDIuNJrjsHqMGQfnwg,59930
 letta/agents/voice_agent.py,sha256=y-n6qadfKsswvGODzXH02pLIQQ44wnaDSE6oUgKHVkA,23381
 letta/agents/voice_sleeptime_agent.py,sha256=_JzCbWBOKrmo1cTaqZFTrQudpJEapwAyrXYtAHUILGo,8675
 letta/cli/cli.py,sha256=tKtghlX36Rp0_HbkMosvlAapL07JXhA0vKLGTNKnxSQ,1615
@@ -85,7 +85,7 @@ letta/humans/examples/cs_phd.txt,sha256=9C9ZAV_VuG7GB31ksy3-_NAyk8rjE6YtVOkhp08k
 letta/interfaces/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/interfaces/anthropic_streaming_interface.py,sha256=0VyK8kTRgCLNDLQN6vX1gJ0dfJhqguL_NL1GYgFr6fU,25614
 letta/interfaces/openai_chat_completions_streaming_interface.py,sha256=3xHXh8cW79EkiMUTYfvcH_s92nkLjxXfvtVOVC3bfLo,5050
-letta/interfaces/openai_streaming_interface.py,sha256=t_TKcZSH0Bv_ajOh2mTd4RetrCr-rahkjmGIZIIGDXQ,23593
+letta/interfaces/openai_streaming_interface.py,sha256=YLArar2ypOEaVt7suJxpg1QZr0ErwEmPSEVhzaP6JWc,24166
 letta/interfaces/utils.py,sha256=c6jvO0dBYHh8DQnlN-B0qeNC64d3CSunhfqlFA4pJTY,278
 letta/jobs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/jobs/helpers.py,sha256=kO4aj954xsQ1RAmkjY6LQQ7JEIGuhaxB1e9pzrYKHAY,914
@@ -93,13 +93,13 @@ letta/jobs/llm_batch_job_polling.py,sha256=HUCTa1lTOiLAB_8m95RUfeNJa4lxlF8paGdCV
 letta/jobs/scheduler.py,sha256=Ub5VTCA8P5C9Y-0mPK2YIPJSEzKbSd2l5Sp0sOWctD8,8697
 letta/jobs/types.py,sha256=K8GKEnqEgAT6Kq4F2hUrBC4ZAFM9OkfOjVMStzxKuXQ,742
 letta/llm_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-letta/llm_api/anthropic_client.py,sha256=Xplb-r2c6GmdBsBqDs67vjZim7HnNBhq1x5ExsLMM_M,37372
-letta/llm_api/azure_client.py,sha256=uAIFEFlhe0cdMm62F9M5wQeuWKxcyL4bL1MVMCOSOvM,3746
+letta/llm_api/anthropic_client.py,sha256=L8M4i08bHfNh1uS_M2_bDf3yeEuHpr5pungyu0pqo60,37380
+letta/llm_api/azure_client.py,sha256=BeChGsH4brrSgZBbCf8UE5RkW-3ZughpKnsBY2VYxwI,3841
 letta/llm_api/bedrock_client.py,sha256=gNKSFGCbrrLMPvtBItAOz1nme4K_opgkZdFa3cUzp7M,3434
 letta/llm_api/deepseek_client.py,sha256=di6ApSQu1DewXw0_JIP7AK4IHvXQHd0e32tQfFf5F34,16975
 letta/llm_api/google_ai_client.py,sha256=JweTUHZXvK6kcZBGXA7XEU53KP4vM7_zdD7AorCtsdI,8166
 letta/llm_api/google_constants.py,sha256=eOjOv-FImyJ4b4QGIaod-mEROMtrBFz0yhuYHqOEkwY,797
-letta/llm_api/google_vertex_client.py,sha256=57qrBe5dY-ERB9xI9_tWRwW_uSxGbHqR02mvnWmCLGY,28910
+letta/llm_api/google_vertex_client.py,sha256=p6MNUFHhkzFkGfWgldjVJC6SIvSMriMeCoenNYynU6E,34970
 letta/llm_api/groq_client.py,sha256=nNeWSgDVOLn3iFiicDKyhHj7f73JxrB9-7_M2Pv2e1I,3192
 letta/llm_api/helpers.py,sha256=GXV_SuaU7uSCDj6bxDcCCF7CUjuZQCVWd5qZ3OsHVNk,17587
 letta/llm_api/llm_api_tools.py,sha256=lsZ6OeIHesyOfbNQi5CVw5hn1lTQP5gJyforp-D0nk8,12294
@@ -107,7 +107,7 @@ letta/llm_api/llm_client.py,sha256=iXiPbrhluP2DBczv9nkFlAXdwWGOkg0lNDA9LzLrG4o,3
 letta/llm_api/llm_client_base.py,sha256=RFo8H4ILxVyzB3DeF4rJoJJYjRF8ScVO4yyDrhuN0DY,10052
 letta/llm_api/mistral.py,sha256=ruOTBt07Uzx7S30_eXhedVWngtpjtlzG6Ox1Iw0_mQs,662
 letta/llm_api/openai.py,sha256=56cwdS9l-75cMTtY9df6Dbb1M9crH8YQsSdF3Pm3Rpg,27393
-letta/llm_api/openai_client.py,sha256=Ww68D103uQolsALOzfPD5-CTuEaIFBbkdnrtMBIaZlc,22475
+letta/llm_api/openai_client.py,sha256=QDIRIG-4MVA-Jug8qx0HUkhg3qtUfHGvE6QCbSYGK-c,22597
 letta/llm_api/together_client.py,sha256=HeDMDDa525yfDTKciODDfX_t93QBfFmX0n2P-FT1QTU,2284
 letta/llm_api/xai_client.py,sha256=3mpSQ9OoWyjqo2VhNM_m0EPBzS69r4p-OEwL7UWc9oY,3772
 letta/llm_api/sample_response_jsons/aws_bedrock.json,sha256=RS3VqyxPB9hQQCPm42hWoga0bisKv_0e8ZF-c3Ag1FA,930
@@ -202,7 +202,7 @@ letta/otel/events.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/otel/metric_registry.py,sha256=TdRBJrwDuyZV2Uretnq0lYIoYKA2JUqWkENGqLhOCBc,9344
 letta/otel/metrics.py,sha256=GlIt8XLkP-igTXptah8UBonpHF7nEtSqTONSkAEERAs,4740
 letta/otel/resource.py,sha256=kqvEywP2LTmuxv2Or3Irtm2zwic863j1DWUvBC0IONc,735
-letta/otel/sqlalchemy_instrumentation.py,sha256=dkgQTDVSdre27r0EQzfTsV4d49btglLU-CdozHwEFHc,18767
+letta/otel/sqlalchemy_instrumentation.py,sha256=yiZvHjDA8Sd5j5RGbokiaOgRwCIE5hkvhWavVSOXs7U,18892
 letta/otel/sqlalchemy_instrumentation_integration.py,sha256=CwGPd5mb4PasBDnSlulSqfaupN-kB8Wz6EBHWBDNuuo,3902
 letta/otel/tracing.py,sha256=kyLsl00Zka3z3uEnOZqgantHya_bsmpvulABYHvsUo8,10422
 letta/personas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -270,7 +270,7 @@ letta/schemas/letta_message_content.py,sha256=7FuholmKauP5Z-FJdsCH_-4IUGl_8jVqi5
 letta/schemas/letta_ping.py,sha256=9JphoKhWZ63JqsakIx4aaj8dYMtYVa7HxSkT5cMh5cI,863
 letta/schemas/letta_request.py,sha256=ll0QTt-tzaJ3zxpPyaifz7mtWcPy6QmvPUDOzngbxfQ,4526
 letta/schemas/letta_response.py,sha256=e6FcAhRX3heB0FoWAAozB3RJboMwi_JpelTdc5JupVA,8188
-letta/schemas/letta_stop_reason.py,sha256=n060NkGItD1OarfviHUW9Mp7tv2_6spW1wkPCCzjepU,2271
+letta/schemas/letta_stop_reason.py,sha256=6vF66Dsyzog3X_d2PjfQxJLyiDarlqJ-hG7NMJpxbuc,2349
 letta/schemas/llm_batch_job.py,sha256=xr7RmMc9ItmL344vcIn1MJaT2nOf0F7qEHrsXkQNFQI,3136
 letta/schemas/llm_config.py,sha256=8nyi9r3o3feh_hUy6pdRWp3E6M612xZhvV3gkFB4aqE,13642
 letta/schemas/llm_config_overrides.py,sha256=E6qJuVA8TwAAy3VjGitJ5jSQo5PbN-6VPcZOF5qhP9A,1815
@@ -337,10 +337,10 @@ letta/server/server.py,sha256=KFFbyl7Djn8CS0aPxz3jL8RwmXPr9nKY3wDu3ymUWjI,109265
 letta/server/startup.sh,sha256=z-Fea-7LiuS_aG1tJqS8JAsDQaamwC_kuDhv9D3PPPY,2698
 letta/server/utils.py,sha256=rRvW6L1lzau4u9boamiyZH54lf5tQ91ypXzUW9cfSPA,1667
 letta/server/rest_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-letta/server/rest_api/app.py,sha256=W3lCWe2iGumNIAyuDyH2sNO1EwGKqh7iSo82NXCXrVw,19270
+letta/server/rest_api/app.py,sha256=T3LLveXRJmfWqR0uEzoaLY8LXwYrwCQGb80XMbSCDUo,21172
 letta/server/rest_api/auth_token.py,sha256=725EFEIiNj4dh70hrSd94UysmFD8vcJLrTRfNHkzxDo,774
 letta/server/rest_api/chat_completions_interface.py,sha256=-7wO7pNBWXMqblVkJpuZ8JPJ-LjudLTtT6BJu-q_XAM,11138
-letta/server/rest_api/interface.py,sha256=X5NZ8oerDcipG9y1AfD92zJ_2TgVMO4eJ42RP82GFF8,70952
+letta/server/rest_api/interface.py,sha256=_GQfKYUp9w4Wo2HSE_8Ff7QU16t1blspLaqmukpER9s,67099
 letta/server/rest_api/json_parser.py,sha256=yoakaCkSMdf0Y_pyILoFKZlvzXeqF-E1KNeHzatLMDc,9157
 letta/server/rest_api/redis_stream_manager.py,sha256=hz85CigFWdLkK1FWUmF-i6ObgoKkuoEgkiwshZ6QPKI,10764
 letta/server/rest_api/static_files.py,sha256=NG8sN4Z5EJ8JVQdj19tkFa9iQ1kBPTab9f_CUxd_u4Q,3143
@@ -355,7 +355,7 @@ letta/server/rest_api/routers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5N
 letta/server/rest_api/routers/openai/chat_completions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/server/rest_api/routers/openai/chat_completions/chat_completions.py,sha256=ohM1i8BsNxTiw8duuRT5X_0tSUzBwctQM4fJ5DXURic,5157
 letta/server/rest_api/routers/v1/__init__.py,sha256=9MnEA7CgtIxyU_dDNG0jm-Ziqu1somBml-e5gKjgd9I,1997
-letta/server/rest_api/routers/v1/agents.py,sha256=2eo7EDXTpybTPfOvgEGhm81LenIJcXNxv-bf5CcqjkU,75673
+letta/server/rest_api/routers/v1/agents.py,sha256=2lGLtfgB2ZtAa0EgaiaDlNV0GJhAi_kQQy7XqXB2DG0,77771
 letta/server/rest_api/routers/v1/blocks.py,sha256=ykI77xnmIxPLqdAy5kzGyGw0w0ZRyVXn-O5Xcdj6-70,7690
 letta/server/rest_api/routers/v1/embeddings.py,sha256=PRaQlrmEXPiIdWsTbadrFsv3Afyv5oEFUdhgHA8FTi8,989
 letta/server/rest_api/routers/v1/folders.py,sha256=8Yb-bw2JdXBxMfrJNIZQk9_FKN2fet9Ccp8T83_c2sc,23539
@@ -397,7 +397,7 @@ letta/services/file_manager.py,sha256=d4uX8RblmqNGk1MsfeGzQ5uDWKVFP-AH63Jz5xOkj2
 letta/services/files_agents_manager.py,sha256=QJrJTgDn3RXUjZIGiIw4GQ5k2iKj-Wvzs-WQetpQ154,30059
 letta/services/group_manager.py,sha256=dD4DDHjOptMrtbWqw1ErlhpBqChw2ubLJdILjeLTY8I,29183
 letta/services/identity_manager.py,sha256=JI9Xc7EsBagSwDS2na4rFNhoO_LuaxlkVO_1oIK_ITQ,11841
-letta/services/job_manager.py,sha256=nDrnr_r8ELwf8KMKyRRrWHsysrTGldgCTplJdaSiNiQ,35543
+letta/services/job_manager.py,sha256=E-w9_4BMErMuqVf2dFlTPTobrvBKhPyyEDfuqLnbACI,35970
 letta/services/llm_batch_manager.py,sha256=iDzLFfmgpQooGY4zpN_w8q1SZ27fr2Cv6Ks3ltZErL8,20929
 letta/services/mcp_manager.py,sha256=QuvKQnwxMXrhiCaYlF50GZwXmbSU7PxmcOZ85sQ3t7I,47848
 letta/services/message_manager.py,sha256=tomsZidPT-I95sJsEsls-vj3qglehV7XNTs-m2zF8Bg,60629
@@ -470,8 +470,8 @@ letta/templates/sandbox_code_file_async.py.j2,sha256=lb7nh_P2W9VZHzU_9TxSCEMUod7
 letta/templates/summary_request_text.j2,sha256=ZttQwXonW2lk4pJLYzLK0pmo4EO4EtUUIXjgXKiizuc,842
 letta/templates/template_helper.py,sha256=HkG3zwRc5NVGmSTQu5PUTpz7LevK43bzXVaQuN8urf0,1634
 letta/types/__init__.py,sha256=hokKjCVFGEfR7SLMrtZsRsBfsC7yTIbgKPLdGg4K1eY,147
-letta_nightly-0.11.7.dev20250912104045.dist-info/METADATA,sha256=tqJlpOfovWrr9Go7iI1cwIOkAgFx0Qwf7JYX11vg2JI,24424
-letta_nightly-0.11.7.dev20250912104045.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-letta_nightly-0.11.7.dev20250912104045.dist-info/entry_points.txt,sha256=m-94Paj-kxiR6Ktu0us0_2qfhn29DzF2oVzqBE6cu8w,41
-letta_nightly-0.11.7.dev20250912104045.dist-info/licenses/LICENSE,sha256=mExtuZ_GYJgDEI38GWdiEYZizZS4KkVt2SF1g_GPNhI,10759
-letta_nightly-0.11.7.dev20250912104045.dist-info/RECORD,,
+letta_nightly-0.11.7.dev20250914103918.dist-info/METADATA,sha256=znAgbibaDvvLthC_McJ-W-HokPJdRIUijKN7KtgqoE0,24424
+letta_nightly-0.11.7.dev20250914103918.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+letta_nightly-0.11.7.dev20250914103918.dist-info/entry_points.txt,sha256=m-94Paj-kxiR6Ktu0us0_2qfhn29DzF2oVzqBE6cu8w,41
+letta_nightly-0.11.7.dev20250914103918.dist-info/licenses/LICENSE,sha256=mExtuZ_GYJgDEI38GWdiEYZizZS4KkVt2SF1g_GPNhI,10759
+letta_nightly-0.11.7.dev20250914103918.dist-info/RECORD,,

{letta_nightly-0.11.7.dev20250912104045.dist-info → letta_nightly-0.11.7.dev20250914103918.dist-info}/WHEEL RENAMED Viewed

File without changes

{letta_nightly-0.11.7.dev20250912104045.dist-info → letta_nightly-0.11.7.dev20250914103918.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{letta_nightly-0.11.7.dev20250912104045.dist-info → letta_nightly-0.11.7.dev20250914103918.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

letta-nightly 0.11.7.dev20250912104045__py3-none-any.whl → 0.11.7.dev20250914103918__py3-none-any.whl

letta-nightly 0.11.7.dev20250912104045py3-none-any.whl → 0.11.7.dev20250914103918py3-none-any.whl