PyPI - letta-nightly - Versions diffs - 0.6.23.dev20250211104055__py3-none-any.whl → 0.6.24.dev20250212104045__py3-none-any.whl - Mend

letta-nightly 0.6.23.dev20250211104055py3-none-any.whl → 0.6.24.dev20250212104045py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (23) hide show

letta/__init__.py +1 -2
letta/agent.py +11 -7
letta/cli/cli.py +3 -2
letta/client/streaming.py +39 -44
letta/constants.py +0 -3
letta/embeddings.py +30 -0
letta/llm_api/anthropic.py +73 -8
letta/llm_api/llm_api_tools.py +17 -11
letta/llm_api/openai.py +6 -5
letta/schemas/llm_config.py +5 -0
letta/schemas/message.py +20 -4
letta/schemas/providers.py +10 -0
letta/server/rest_api/chat_completions_interface.py +39 -30
letta/server/rest_api/interface.py +51 -28
letta/server/rest_api/routers/v1/health.py +2 -2
letta/server/rest_api/utils.py +13 -2
letta/services/agent_manager.py +6 -6
letta/settings.py +1 -1
{letta_nightly-0.6.23.dev20250211104055.dist-info → letta_nightly-0.6.24.dev20250212104045.dist-info}/METADATA +2 -1
{letta_nightly-0.6.23.dev20250211104055.dist-info → letta_nightly-0.6.24.dev20250212104045.dist-info}/RECORD +23 -23
{letta_nightly-0.6.23.dev20250211104055.dist-info → letta_nightly-0.6.24.dev20250212104045.dist-info}/LICENSE +0 -0
{letta_nightly-0.6.23.dev20250211104055.dist-info → letta_nightly-0.6.24.dev20250212104045.dist-info}/WHEEL +0 -0
{letta_nightly-0.6.23.dev20250211104055.dist-info → letta_nightly-0.6.24.dev20250212104045.dist-info}/entry_points.txt +0 -0

letta/__init__.py CHANGED Viewed

@@ -1,5 +1,4 @@
-__version__ = "0.6.23"
+__version__ = "0.6.24"
 # import clients
 from letta.client.client import LocalClient, RESTClient, create_client

letta/agent.py CHANGED Viewed

@@ -260,6 +260,7 @@ class Agent(BaseAgent):
         error_msg: str,
         tool_call_id: str,
         function_name: str,
+        function_args: dict,
         function_response: str,
         messages: List[Message],
         include_function_failed_message: bool = False,
@@ -394,6 +395,7 @@ class Agent(BaseAgent):
         messages = []  # append these to the history when done
         function_name = None
+        function_args = {}
         # Step 2: check if LLM wanted to call a function
         if response_message.function_call or (response_message.tool_calls is not None and len(response_message.tool_calls) > 0):
@@ -445,8 +447,6 @@ class Agent(BaseAgent):
             function_call = (
                 response_message.function_call if response_message.function_call is not None else response_message.tool_calls[0].function
             )
-            # Get the name of the function
             function_name = function_call.name
             self.logger.info(f"Request to call function {function_name} with tool_call_id: {tool_call_id}")
@@ -459,7 +459,9 @@ class Agent(BaseAgent):
             if not target_letta_tool:
                 error_msg = f"No function named {function_name}"
                 function_response = "None"  # more like "never ran?"
-                messages = self._handle_function_error_response(error_msg, tool_call_id, function_name, function_response, messages)
+                messages = self._handle_function_error_response(
+                    error_msg, tool_call_id, function_name, function_args, function_response, messages
+                )
                 return messages, False, True  # force a heartbeat to allow agent to handle error
             # Failure case 2: function name is OK, but function args are bad JSON
@@ -469,7 +471,9 @@ class Agent(BaseAgent):
             except Exception:
                 error_msg = f"Error parsing JSON for function '{function_name}' arguments: {function_call.arguments}"
                 function_response = "None"  # more like "never ran?"
-                messages = self._handle_function_error_response(error_msg, tool_call_id, function_name, function_response, messages)
+                messages = self._handle_function_error_response(
+                    error_msg, tool_call_id, function_name, function_args, function_response, messages
+                )
                 return messages, False, True  # force a heartbeat to allow agent to handle error
             # Check if inner thoughts is in the function call arguments (possible apparently if you are using Azure)
@@ -506,7 +510,7 @@ class Agent(BaseAgent):
                 if sandbox_run_result and sandbox_run_result.status == "error":
                     messages = self._handle_function_error_response(
-                        function_response, tool_call_id, function_name, function_response, messages
+                        function_response, tool_call_id, function_name, function_args, function_response, messages
                     )
                     return messages, False, True  # force a heartbeat to allow agent to handle error
@@ -535,7 +539,7 @@ class Agent(BaseAgent):
                 error_msg_user = f"{error_msg}\n{traceback.format_exc()}"
                 self.logger.error(error_msg_user)
                 messages = self._handle_function_error_response(
-                    error_msg, tool_call_id, function_name, function_response, messages, include_function_failed_message=True
+                    error_msg, tool_call_id, function_name, function_args, function_response, messages, include_function_failed_message=True
                 )
                 return messages, False, True  # force a heartbeat to allow agent to handle error
@@ -543,7 +547,7 @@ class Agent(BaseAgent):
             if function_response_string.startswith(ERROR_MESSAGE_PREFIX):
                 error_msg = function_response_string
                 messages = self._handle_function_error_response(
-                    error_msg, tool_call_id, function_name, function_response, messages, include_function_failed_message=True
+                    error_msg, tool_call_id, function_name, function_args, function_response, messages, include_function_failed_message=True
                 )
                 return messages, False, True  # force a heartbeat to allow agent to handle error

letta/cli/cli.py CHANGED Viewed

@@ -15,7 +15,6 @@ from letta.local_llm.constants import ASSISTANT_MESSAGE_CLI_SYMBOL
 from letta.log import get_logger
 from letta.schemas.enums import OptionState
 from letta.schemas.memory import ChatMemory, Memory
-from letta.server.server import logger as server_logger
 # from letta.interface import CLIInterface as interface  # for printing to terminal
 from letta.streaming_interface import StreamingRefreshCLIInterface as interface  # for printing to terminal
@@ -119,6 +118,8 @@ def run(
     utils.DEBUG = debug
     # TODO: add logging command line options for runtime log level
+    from letta.server.server import logger as server_logger
     if debug:
         logger.setLevel(logging.DEBUG)
         server_logger.setLevel(logging.DEBUG)
@@ -360,4 +361,4 @@ def delete_agent(
 def version() -> str:
     import letta
-    return letta.__version__
+    print(letta.__version__)

letta/client/streaming.py CHANGED Viewed

@@ -17,48 +17,45 @@ logger = get_logger(__name__)
 def _sse_post(url: str, data: dict, headers: dict) -> Generator[Union[LettaStreamingResponse, ChatCompletionChunk], None, None]:
-    with httpx.Client() as client:
+    """
+    Sends an SSE POST request and yields parsed response chunks.
+    """
+    # TODO: Please note his is a very generous timeout for e2b reasons
+    with httpx.Client(timeout=httpx.Timeout(5 * 60.0, read=5 * 60.0)) as client:
         with connect_sse(client, method="POST", url=url, json=data, headers=headers) as event_source:
-            # Inspect for errors before iterating (see https://github.com/florimondmanca/httpx-sse/pull/12)
+            # Check for immediate HTTP errors before processing the SSE stream
             if not event_source.response.is_success:
-                # handle errors
-                pass
-                logger.warning("Caught error before iterating SSE request:", vars(event_source.response))
-                logger.warning(event_source.response.read().decode("utf-8"))
+                response_bytes = event_source.response.read()
+                logger.warning(f"SSE request error: {vars(event_source.response)}")
+                logger.warning(response_bytes.decode("utf-8"))
                 try:
-                    response_bytes = event_source.response.read()
                     response_dict = json.loads(response_bytes.decode("utf-8"))
-                    # e.g.: This model's maximum context length is 8192 tokens. However, your messages resulted in 8198 tokens (7450 in the messages, 748 in the functions). Please reduce the length of the messages or functions.
-                    if (
-                        "error" in response_dict
-                        and "message" in response_dict["error"]
-                        and OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING in response_dict["error"]["message"]
-                    ):
-                        logger.error(response_dict["error"]["message"])
-                        raise LLMError(response_dict["error"]["message"])
+                    error_message = response_dict.get("error", {}).get("message", "")
+                    if OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING in error_message:
+                        logger.error(error_message)
+                        raise LLMError(error_message)
                 except LLMError:
                     raise
-                except:
-                    logger.error(f"Failed to parse SSE message, throwing SSE HTTP error up the stack")
+                except Exception:
+                    logger.error("Failed to parse SSE message, raising HTTP error")
                     event_source.response.raise_for_status()
             try:
                 for sse in event_source.iter_sse():
-                    # if sse.data == OPENAI_SSE_DONE:
-                    # print("finished")
-                    # break
-                    if sse.data in [status.value for status in MessageStreamStatus]:
-                        # break
+                    if sse.data in {status.value for status in MessageStreamStatus}:
                         yield MessageStreamStatus(sse.data)
+                        if sse.data == MessageStreamStatus.done.value:
+                            # We received the [DONE], so stop reading the stream.
+                            break
                     else:
                         chunk_data = json.loads(sse.data)
                         if "reasoning" in chunk_data:
                             yield ReasoningMessage(**chunk_data)
-                        elif "message_type" in chunk_data and chunk_data["message_type"] == "assistant_message":
+                        elif chunk_data.get("message_type") == "assistant_message":
                             yield AssistantMessage(**chunk_data)
                         elif "tool_call" in chunk_data:
                             yield ToolCallMessage(**chunk_data)
@@ -67,33 +64,31 @@ def _sse_post(url: str, data: dict, headers: dict) -> Generator[Union[LettaStrea
                         elif "step_count" in chunk_data:
                             yield LettaUsageStatistics(**chunk_data)
                         elif chunk_data.get("object") == get_args(ChatCompletionChunk.__annotations__["object"])[0]:
-                            yield ChatCompletionChunk(**chunk_data)  # Add your processing logic for chat chunks here
+                            yield ChatCompletionChunk(**chunk_data)
                         else:
                             raise ValueError(f"Unknown message type in chunk_data: {chunk_data}")
             except SSEError as e:
-                logger.error("Caught an error while iterating the SSE stream:", str(e))
-                if "application/json" in str(e):  # Check if the error is because of JSON response
-                    # TODO figure out a better way to catch the error other than re-trying with a POST
-                    response = client.post(url=url, json=data, headers=headers)  # Make the request again to get the JSON response
-                    if response.headers["Content-Type"].startswith("application/json"):
-                        error_details = response.json()  # Parse the JSON to get the error message
-                        logger.error("Request:", vars(response.request))
-                        logger.error("POST Error:", error_details)
-                        logger.error("Original SSE Error:", str(e))
+                logger.error(f"SSE stream error: {e}")
+                if "application/json" in str(e):
+                    response = client.post(url=url, json=data, headers=headers)
+                    if response.headers.get("Content-Type", "").startswith("application/json"):
+                        error_details = response.json()
+                        logger.error(f"POST Error: {error_details}")
                     else:
                         logger.error("Failed to retrieve JSON error message via retry.")
-                else:
-                    logger.error("SSEError not related to 'application/json' content type.")
-                # Optionally re-raise the exception if you need to propagate it
                 raise e
             except Exception as e:
-                if event_source.response.request is not None:
-                    logger.error("HTTP Request:", vars(event_source.response.request))
-                if event_source.response is not None:
-                    logger.error("HTTP Status:", event_source.response.status_code)
-                    logger.error("HTTP Headers:", event_source.response.headers)
-                logger.error("Exception message:", str(e))
+                logger.error(f"Unexpected exception: {e}")
+                if event_source.response.request:
+                    logger.error(f"HTTP Request: {vars(event_source.response.request)}")
+                if event_source.response:
+                    logger.error(f"HTTP Status: {event_source.response.status_code}")
+                    logger.error(f"HTTP Headers: {event_source.response.headers}")
                 raise e

letta/constants.py CHANGED Viewed

@@ -51,9 +51,6 @@ BASE_TOOLS = ["send_message", "conversation_search", "archival_memory_insert", "
 BASE_MEMORY_TOOLS = ["core_memory_append", "core_memory_replace"]
 # Multi agent tools
 MULTI_AGENT_TOOLS = ["send_message_to_agent_and_wait_for_reply", "send_message_to_agents_matching_all_tags", "send_message_to_agent_async"]
-MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES = 3
-MULTI_AGENT_SEND_MESSAGE_TIMEOUT = 20 * 60
-MULTI_AGENT_CONCURRENT_SENDS = 15
 # The name of the tool used to send message to the user
 # May not be relevant in cases where the agent has multiple ways to message to user (send_imessage, send_discord_mesasge, ...)

letta/embeddings.py CHANGED Viewed

@@ -167,6 +167,27 @@ class OllamaEmbeddings:
         return response_json["embedding"]
+class GoogleEmbeddings:
+    def __init__(self, api_key: str, model: str, base_url: str):
+        self.api_key = api_key
+        self.model = model
+        self.base_url = base_url  # Expected to be "https://generativelanguage.googleapis.com"
+    def get_text_embedding(self, text: str):
+        import httpx
+        headers = {"Content-Type": "application/json"}
+        # Build the URL based on the provided base_url, model, and API key.
+        url = f"{self.base_url}/v1beta/models/{self.model}:embedContent?key={self.api_key}"
+        payload = {"model": self.model, "content": {"parts": [{"text": text}]}}
+        with httpx.Client() as client:
+            response = client.post(url, headers=headers, json=payload)
+        # Raise an error for non-success HTTP status codes.
+        response.raise_for_status()
+        response_json = response.json()
+        return response_json["embedding"]["values"]
 def query_embedding(embedding_model, query_text: str):
     """Generate padded embedding for querying database"""
     query_vec = embedding_model.get_text_embedding(query_text)
@@ -237,5 +258,14 @@ def embedding_model(config: EmbeddingConfig, user_id: Optional[uuid.UUID] = None
         )
         return model
+    elif endpoint_type == "google_ai":
+        assert all([model_settings.gemini_api_key is not None, model_settings.gemini_base_url is not None])
+        model = GoogleEmbeddings(
+            model=config.embedding_model,
+            api_key=model_settings.gemini_api_key,
+            base_url=model_settings.gemini_base_url,
+        )
+        return model
     else:
         raise ValueError(f"Unknown endpoint type {endpoint_type}")

letta/llm_api/anthropic.py CHANGED Viewed

@@ -19,6 +19,8 @@ from anthropic.types.beta import (
 from letta.errors import BedrockError, BedrockPermissionError
 from letta.llm_api.aws_bedrock import get_bedrock_client
+from letta.llm_api.helpers import add_inner_thoughts_to_functions
+from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
 from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
 from letta.schemas.message import Message as _Message
 from letta.schemas.message import MessageRole as _MessageRole
@@ -513,9 +515,23 @@ def convert_anthropic_stream_event_to_chatcompletion(
 def _prepare_anthropic_request(
     data: ChatCompletionRequest,
     inner_thoughts_xml_tag: Optional[str] = "thinking",
+    # if true, prefix fill the generation with the thinking tag
+    prefix_fill: bool = True,
+    # if true, put COT inside the tool calls instead of inside the content
+    put_inner_thoughts_in_kwargs: bool = False,
 ) -> dict:
     """Prepare the request data for Anthropic API format."""
-    # convert the tools
+    # if needed, put inner thoughts as a kwarg for all tools
+    if data.tools and put_inner_thoughts_in_kwargs:
+        functions = add_inner_thoughts_to_functions(
+            functions=[t.function.model_dump() for t in data.tools],
+            inner_thoughts_key=INNER_THOUGHTS_KWARG,
+            inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
+        )
+        data.tools = [Tool(function=f) for f in functions]
+    # convert the tools to Anthropic's payload format
     anthropic_tools = None if data.tools is None else convert_tools_to_anthropic_format(data.tools)
     # pydantic -> dict
@@ -529,11 +545,25 @@ def _prepare_anthropic_request(
         data.pop("tools")
         data.pop("tool_choice", None)
     elif anthropic_tools is not None:
+        # TODO eventually enable parallel tool use
         data["tools"] = anthropic_tools
-        if len(anthropic_tools) == 1:
+        # tool_choice_type other than "auto" only plays nice if thinking goes inside the tool calls
+        if put_inner_thoughts_in_kwargs:
+            if len(anthropic_tools) == 1:
+                data["tool_choice"] = {
+                    "type": "tool",
+                    "name": anthropic_tools[0]["name"],
+                    "disable_parallel_tool_use": True,
+                }
+            else:
+                data["tool_choice"] = {
+                    "type": "any",
+                    "disable_parallel_tool_use": True,
+                }
+        else:
             data["tool_choice"] = {
-                "type": "tool",
-                "name": anthropic_tools[0]["name"],
+                "type": "auto",
                 "disable_parallel_tool_use": True,
             }
@@ -548,8 +578,21 @@ def _prepare_anthropic_request(
             message["content"] = None
     # Convert to Anthropic format
-    msg_objs = [_Message.dict_to_message(user_id=None, agent_id=None, openai_message_dict=m) for m in data["messages"]]
-    data["messages"] = [m.to_anthropic_dict(inner_thoughts_xml_tag=inner_thoughts_xml_tag) for m in msg_objs]
+    msg_objs = [
+        _Message.dict_to_message(
+            user_id=None,
+            agent_id=None,
+            openai_message_dict=m,
+        )
+        for m in data["messages"]
+    ]
+    data["messages"] = [
+        m.to_anthropic_dict(
+            inner_thoughts_xml_tag=inner_thoughts_xml_tag,
+            put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
+        )
+        for m in msg_objs
+    ]
     # Ensure first message is user
     if data["messages"][0]["role"] != "user":
@@ -558,6 +601,16 @@ def _prepare_anthropic_request(
     # Handle alternating messages
     data["messages"] = merge_tool_results_into_user_messages(data["messages"])
+    # Handle prefix fill (not compatible with inner-thouguhts-in-kwargs)
+    # https://docs.anthropic.com/en/api/messages#body-messages
+    # NOTE: cannot prefill with tools for opus:
+    # Your API request included an `assistant` message in the final position, which would pre-fill the `assistant` response. When using tools with "claude-3-opus-20240229"
+    if prefix_fill and not put_inner_thoughts_in_kwargs and "opus" not in data["model"]:
+        data["messages"].append(
+            # Start the thinking process for the assistant
+            {"role": "assistant", "content": f"<{inner_thoughts_xml_tag}>"},
+        )
     # Validate max_tokens
     assert "max_tokens" in data, data
@@ -571,6 +624,7 @@ def _prepare_anthropic_request(
 def anthropic_chat_completions_request(
     data: ChatCompletionRequest,
     inner_thoughts_xml_tag: Optional[str] = "thinking",
+    put_inner_thoughts_in_kwargs: bool = False,
     betas: List[str] = ["tools-2024-04-04"],
 ) -> ChatCompletionResponse:
     """https://docs.anthropic.com/claude/docs/tool-use"""
@@ -580,7 +634,11 @@ def anthropic_chat_completions_request(
         anthropic_client = anthropic.Anthropic(api_key=anthropic_override_key)
     elif model_settings.anthropic_api_key:
         anthropic_client = anthropic.Anthropic()
-    data = _prepare_anthropic_request(data, inner_thoughts_xml_tag)
+    data = _prepare_anthropic_request(
+        data=data,
+        inner_thoughts_xml_tag=inner_thoughts_xml_tag,
+        put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
+    )
     response = anthropic_client.beta.messages.create(
         **data,
         betas=betas,
@@ -611,6 +669,7 @@ def anthropic_bedrock_chat_completions_request(
 def anthropic_chat_completions_request_stream(
     data: ChatCompletionRequest,
     inner_thoughts_xml_tag: Optional[str] = "thinking",
+    put_inner_thoughts_in_kwargs: bool = False,
     betas: List[str] = ["tools-2024-04-04"],
 ) -> Generator[ChatCompletionChunkResponse, None, None]:
     """Stream chat completions from Anthropic API.
@@ -618,7 +677,11 @@ def anthropic_chat_completions_request_stream(
     Similar to OpenAI's streaming, but using Anthropic's native streaming support.
     See: https://docs.anthropic.com/claude/reference/messages-streaming
     """
-    data = _prepare_anthropic_request(data, inner_thoughts_xml_tag)
+    data = _prepare_anthropic_request(
+        data=data,
+        inner_thoughts_xml_tag=inner_thoughts_xml_tag,
+        put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
+    )
     anthropic_override_key = ProviderManager().get_anthropic_override_key()
     if anthropic_override_key:
@@ -666,6 +729,7 @@ def anthropic_chat_completions_process_stream(
     chat_completion_request: ChatCompletionRequest,
     stream_interface: Optional[Union[AgentChunkStreamingInterface, AgentRefreshStreamingInterface]] = None,
     inner_thoughts_xml_tag: Optional[str] = "thinking",
+    put_inner_thoughts_in_kwargs: bool = False,
     create_message_id: bool = True,
     create_message_datetime: bool = True,
     betas: List[str] = ["tools-2024-04-04"],
@@ -743,6 +807,7 @@ def anthropic_chat_completions_process_stream(
             anthropic_chat_completions_request_stream(
                 data=chat_completion_request,
                 inner_thoughts_xml_tag=inner_thoughts_xml_tag,
+                put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
                 betas=betas,
             )
         ):

letta/llm_api/llm_api_tools.py CHANGED Viewed

@@ -111,7 +111,6 @@ def create(
     # streaming?
     stream: bool = False,
     stream_interface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
-    max_tokens: Optional[int] = None,
     model_settings: Optional[dict] = None,  # TODO: eventually pass from server
 ) -> ChatCompletionResponse:
     """Return response to chat completion with backoff"""
@@ -157,7 +156,7 @@ def create(
             else:
                 function_call = "required"
-        data = build_openai_chat_completions_request(llm_config, messages, user_id, functions, function_call, use_tool_naming, max_tokens)
+        data = build_openai_chat_completions_request(llm_config, messages, user_id, functions, function_call, use_tool_naming)
         if stream:  # Client requested token streaming
             data.stream = True
             assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
@@ -212,7 +211,7 @@ def create(
         # For Azure, this model_endpoint is required to be configured via env variable, so users don't need to provide it in the LLM config
         llm_config.model_endpoint = model_settings.azure_base_url
         chat_completion_request = build_openai_chat_completions_request(
-            llm_config, messages, user_id, functions, function_call, use_tool_naming, max_tokens
+            llm_config, messages, user_id, functions, function_call, use_tool_naming
         )
         response = azure_openai_chat_completions_request(
@@ -248,7 +247,7 @@ def create(
             data=dict(
                 contents=[m.to_google_ai_dict() for m in messages],
                 tools=tools,
-                generation_config={"temperature": llm_config.temperature},
+                generation_config={"temperature": llm_config.temperature, "max_output_tokens": llm_config.max_tokens},
             ),
             inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
         )
@@ -268,7 +267,7 @@ def create(
             messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
             tools=([{"type": "function", "function": f} for f in functions] if functions else None),
             tool_choice=tool_call,
-            max_tokens=1024,  # TODO make dynamic
+            max_tokens=llm_config.max_tokens,  # Note: max_tokens is required for Anthropic API
             temperature=llm_config.temperature,
             stream=stream,
         )
@@ -279,14 +278,21 @@ def create(
             response = anthropic_chat_completions_process_stream(
                 chat_completion_request=chat_completion_request,
+                put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
                 stream_interface=stream_interface,
             )
-            return response
-        # Client did not request token streaming (expect a blocking backend response)
-        return anthropic_chat_completions_request(
-            data=chat_completion_request,
-        )
+        else:
+            # Client did not request token streaming (expect a blocking backend response)
+            response = anthropic_chat_completions_request(
+                data=chat_completion_request,
+                put_inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
+            )
+        if llm_config.put_inner_thoughts_in_kwargs:
+            response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
+        return response
     # elif llm_config.model_endpoint_type == "cohere":
     #     if stream:
@@ -416,7 +422,7 @@ def create(
                 tool_choice=tool_call,
                 # user=str(user_id),
                 # NOTE: max_tokens is required for Anthropic API
-                max_tokens=1024,  # TODO make dynamic
+                max_tokens=llm_config.max_tokens,
             ),
         )

letta/llm_api/openai.py CHANGED Viewed

@@ -7,6 +7,7 @@ from openai import OpenAI
 from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_structured_output, make_post_request
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
 from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
+from letta.log import get_logger
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as _Message
 from letta.schemas.message import MessageRole as _MessageRole
@@ -26,7 +27,7 @@ from letta.schemas.openai.embedding_response import EmbeddingResponse
 from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
 from letta.utils import get_tool_call_id, smart_urljoin
-OPENAI_SSE_DONE = "[DONE]"
+logger = get_logger(__name__)
 def openai_get_model_list(
@@ -93,7 +94,6 @@ def build_openai_chat_completions_request(
     functions: Optional[list],
     function_call: Optional[str],
     use_tool_naming: bool,
-    max_tokens: Optional[int],
 ) -> ChatCompletionRequest:
     if functions and llm_config.put_inner_thoughts_in_kwargs:
         # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
@@ -130,7 +130,7 @@ def build_openai_chat_completions_request(
             tools=[Tool(type="function", function=f) for f in functions] if functions else None,
             tool_choice=tool_choice,
             user=str(user_id),
-            max_completion_tokens=max_tokens,
+            max_completion_tokens=llm_config.max_tokens,
             temperature=llm_config.temperature,
         )
     else:
@@ -140,7 +140,7 @@ def build_openai_chat_completions_request(
             functions=functions,
             function_call=function_call,
             user=str(user_id),
-            max_completion_tokens=max_tokens,
+            max_completion_tokens=llm_config.max_tokens,
             temperature=llm_config.temperature,
         )
         # https://platform.openai.com/docs/guides/text-generation/json-mode
@@ -354,9 +354,10 @@ def openai_chat_completions_process_stream(
     except Exception as e:
         if stream_interface:
             stream_interface.stream_end()
-        print(f"Parsing ChatCompletion stream failed with error:\n{str(e)}")
+        logger.error(f"Parsing ChatCompletion stream failed with error:\n{str(e)}")
         raise e
     finally:
+        logger.info(f"Finally ending streaming interface.")
         if stream_interface:
             stream_interface.stream_end()

letta/schemas/llm_config.py CHANGED Viewed

@@ -15,6 +15,7 @@ class LLMConfig(BaseModel):
         context_window (int): The context window size for the model.
         put_inner_thoughts_in_kwargs (bool): Puts `inner_thoughts` as a kwarg in the function call if this is set to True. This helps with function calling performance and also the generation of inner thoughts.
         temperature (float): The temperature to use when generating text with the model. A higher temperature will result in more random text.
+        max_tokens (int): The maximum number of tokens to generate.
     """
     # TODO: 🤮 don't default to a vendor! bug city!
@@ -51,6 +52,10 @@ class LLMConfig(BaseModel):
         0.7,
         description="The temperature to use when generating text with the model. A higher temperature will result in more random text.",
     )
+    max_tokens: Optional[int] = Field(
+        1024,
+        description="The maximum number of tokens to generate. If not set, the model will use its default value.",
+    )
     # FIXME hack to silence pydantic protected namespace warning
     model_config = ConfigDict(protected_namespaces=())

letta/schemas/message.py CHANGED Viewed

@@ -542,7 +542,11 @@ class Message(BaseMessage):
         return openai_message
-    def to_anthropic_dict(self, inner_thoughts_xml_tag="thinking") -> dict:
+    def to_anthropic_dict(
+        self,
+        inner_thoughts_xml_tag="thinking",
+        put_inner_thoughts_in_kwargs: bool = False,
+    ) -> dict:
         """
         Convert to an Anthropic message dictionary
@@ -586,26 +590,38 @@ class Message(BaseMessage):
                 "role": self.role,
             }
             content = []
-            if self.text is not None:
+            # COT / reasoning / thinking
+            if self.text is not None and not put_inner_thoughts_in_kwargs:
                 content.append(
                     {
                         "type": "text",
                         "text": add_xml_tag(string=self.text, xml_tag=inner_thoughts_xml_tag),
                     }
                 )
+            # Tool calling
             if self.tool_calls is not None:
                 for tool_call in self.tool_calls:
+                    if put_inner_thoughts_in_kwargs:
+                        tool_call_input = add_inner_thoughts_to_tool_call(
+                            tool_call,
+                            inner_thoughts=self.text,
+                            inner_thoughts_key=INNER_THOUGHTS_KWARG,
+                        ).model_dump()
+                    else:
+                        tool_call_input = json.loads(tool_call.function.arguments)
                     content.append(
                         {
                             "type": "tool_use",
                             "id": tool_call.id,
                             "name": tool_call.function.name,
-                            "input": json.loads(tool_call.function.arguments),
+                            "input": tool_call_input,
                         }
                     )
             # If the only content was text, unpack it back into a singleton
-            # TODO
+            # TODO support multi-modal
             anthropic_message["content"] = content
             # Optional fields, do not include if null

letta/schemas/providers.py CHANGED Viewed

@@ -347,6 +347,15 @@ class AnthropicProvider(Provider):
         configs = []
         for model in models:
+            # We set this to false by default, because Anthropic can
+            # natively support <thinking> tags inside of content fields
+            # However, putting COT inside of tool calls can make it more
+            # reliable for tool calling (no chance of a non-tool call step)
+            # Since tool_choice_type 'any' doesn't work with in-content COT
+            # NOTE For Haiku, it can be flaky if we don't enable this by default
+            inner_thoughts_in_kwargs = True if "haiku" in model["name"] else False
             configs.append(
                 LLMConfig(
                     model=model["name"],
@@ -354,6 +363,7 @@ class AnthropicProvider(Provider):
                     model_endpoint=self.base_url,
                     context_window=model["context_window"],
                     handle=self.get_handle(model["name"]),
+                    put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
                 )
             )
         return configs

letta/server/rest_api/chat_completions_interface.py CHANGED Viewed

@@ -41,7 +41,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
     def __init__(
         self,
         multi_step: bool = True,
-        timeout: int = 150,
+        timeout: int = 3 * 60,
         # The following are placeholders for potential expansions; they
         # remain if you need to differentiate between actual "assistant messages"
         # vs. tool calls. By default, they are set for the "send_message" tool usage.
@@ -55,6 +55,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
         # Parsing state for incremental function-call data
         self.current_function_name = ""
         self.current_function_arguments = []
+        self.current_json_parse_result = {}
         # Internal chunk buffer and event for async notification
         self._chunks = deque()
@@ -85,6 +86,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
             try:
                 await asyncio.wait_for(self._event.wait(), timeout=self.timeout)
             except asyncio.TimeoutError:
+                logger.warning("Chat completions interface timed out! Please check that this is intended.")
                 break
             while self._chunks:
@@ -105,7 +107,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
         self,
         item: ChatCompletionChunk,
     ):
-        """
+        """m
         Add an item (a LettaMessage, status marker, or partial chunk)
         to the queue and signal waiting consumers.
         """
@@ -156,6 +158,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
         Called externally with a ChatCompletionChunkResponse. Transforms
         it if necessary, then enqueues partial messages for streaming back.
         """
+        # print("RECEIVED CHUNK...")
         processed_chunk = self._process_chunk_to_openai_style(chunk)
         if processed_chunk is not None:
             self._push_to_buffer(processed_chunk)
@@ -216,37 +219,43 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
                 combined_args = "".join(self.current_function_arguments)
                 parsed_args = OptimisticJSONParser().parse(combined_args)
-                # If we can see a "message" field, return it as partial content
-                if self.assistant_message_tool_kwarg in parsed_args and parsed_args[self.assistant_message_tool_kwarg]:
-                    return ChatCompletionChunk(
-                        id=chunk.id,
-                        object=chunk.object,
-                        created=chunk.created.timestamp(),
-                        model=chunk.model,
-                        choices=[
-                            Choice(
-                                index=choice.index,
-                                delta=ChoiceDelta(content=self.current_function_arguments[-1], role=self.ASSISTANT_STR),
-                                finish_reason=None,
-                            )
-                        ],
-                    )
+                # If the parsed result is different
+                # This is an edge case we need to consider. E.g. if the last streamed token is '}', we shouldn't stream that out
+                if parsed_args != self.current_json_parse_result:
+                    self.current_json_parse_result = parsed_args
+                    # If we can see a "message" field, return it as partial content
+                    if self.assistant_message_tool_kwarg in parsed_args and parsed_args[self.assistant_message_tool_kwarg]:
+                        return ChatCompletionChunk(
+                            id=chunk.id,
+                            object=chunk.object,
+                            created=chunk.created.timestamp(),
+                            model=chunk.model,
+                            choices=[
+                                Choice(
+                                    index=choice.index,
+                                    delta=ChoiceDelta(content=self.current_function_arguments[-1], role=self.ASSISTANT_STR),
+                                    finish_reason=None,
+                                )
+                            ],
+                        )
         # If there's a finish reason, pass that along
         if choice.finish_reason is not None:
-            return ChatCompletionChunk(
-                id=chunk.id,
-                object=chunk.object,
-                created=chunk.created.timestamp(),
-                model=chunk.model,
-                choices=[
-                    Choice(
-                        index=choice.index,
-                        delta=ChoiceDelta(),
-                        finish_reason=self.FINISH_REASON_STR,
-                    )
-                ],
-            )
+            # only emit a final chunk if finish_reason == "stop"
+            if choice.finish_reason == "stop":
+                return ChatCompletionChunk(
+                    id=chunk.id,
+                    object=chunk.object,
+                    created=chunk.created.timestamp(),
+                    model=chunk.model,
+                    choices=[
+                        Choice(
+                            index=choice.index,
+                            delta=ChoiceDelta(),  # no partial text here
+                            finish_reason="stop",
+                        )
+                    ],
+                )
         return None

letta/server/rest_api/interface.py CHANGED Viewed

@@ -436,11 +436,15 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
         # inner thoughts
         if message_delta.content is not None:
-            processed_chunk = ReasoningMessage(
-                id=message_id,
-                date=message_date,
-                reasoning=message_delta.content,
-            )
+            if message_delta.content == "":
+                print("skipping empty content")
+                processed_chunk = None
+            else:
+                processed_chunk = ReasoningMessage(
+                    id=message_id,
+                    date=message_date,
+                    reasoning=message_delta.content,
+                )
         # tool calls
         elif message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
@@ -496,15 +500,24 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                         if tool_call.function.name:
                             tool_call_delta["name"] = tool_call.function.name
-                    processed_chunk = ToolCallMessage(
-                        id=message_id,
-                        date=message_date,
-                        tool_call=ToolCallDelta(
-                            name=tool_call_delta.get("name"),
-                            arguments=tool_call_delta.get("arguments"),
-                            tool_call_id=tool_call_delta.get("id"),
-                        ),
-                    )
+                    # We might end up with a no-op, in which case we should omit
+                    if (
+                        tool_call_delta.get("name") is None
+                        and tool_call_delta.get("arguments") in [None, ""]
+                        and tool_call_delta.get("id") is None
+                    ):
+                        processed_chunk = None
+                        print("skipping empty chunk...")
+                    else:
+                        processed_chunk = ToolCallMessage(
+                            id=message_id,
+                            date=message_date,
+                            tool_call=ToolCallDelta(
+                                name=tool_call_delta.get("name"),
+                                arguments=tool_call_delta.get("arguments"),
+                                tool_call_id=tool_call_delta.get("id"),
+                            ),
+                        )
             elif self.inner_thoughts_in_kwargs and tool_call.function:
                 processed_chunk = None
@@ -525,11 +538,12 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                         self.function_id_buffer += tool_call.id
                 if tool_call.function.arguments:
-                    if chunk.model.startswith("claude-"):
-                        updates_main_json = tool_call.function.arguments
-                        updates_inner_thoughts = ""
-                    else:  # OpenAI
-                        updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
+                    # if chunk.model.startswith("claude-"):
+                    # updates_main_json = tool_call.function.arguments
+                    # updates_inner_thoughts = ""
+                    # else:  # OpenAI
+                    # updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
+                    updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
                     # If we have inner thoughts, we should output them as a chunk
                     if updates_inner_thoughts:
@@ -787,15 +801,24 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                     if tool_call.function.name:
                         tool_call_delta["name"] = tool_call.function.name
-                processed_chunk = ToolCallMessage(
-                    id=message_id,
-                    date=message_date,
-                    tool_call=ToolCallDelta(
-                        name=tool_call_delta.get("name"),
-                        arguments=tool_call_delta.get("arguments"),
-                        tool_call_id=tool_call_delta.get("id"),
-                    ),
-                )
+                # We might end up with a no-op, in which case we should omit
+                if (
+                    tool_call_delta.get("name") is None
+                    and tool_call_delta.get("arguments") in [None, ""]
+                    and tool_call_delta.get("id") is None
+                ):
+                    processed_chunk = None
+                    print("skipping empty chunk...")
+                else:
+                    processed_chunk = ToolCallMessage(
+                        id=message_id,
+                        date=message_date,
+                        tool_call=ToolCallDelta(
+                            name=tool_call_delta.get("name"),
+                            arguments=tool_call_delta.get("arguments"),
+                            tool_call_id=tool_call_delta.get("id"),
+                        ),
+                    )
         elif choice.finish_reason is not None:
             # skip if there's a finish

letta/server/rest_api/routers/v1/health.py CHANGED Viewed

@@ -2,7 +2,7 @@ from typing import TYPE_CHECKING
 from fastapi import APIRouter
-from letta.cli.cli import version
+from letta import __version__
 from letta.schemas.health import Health
 if TYPE_CHECKING:
@@ -15,6 +15,6 @@ router = APIRouter(prefix="/health", tags=["health"])
 @router.get("/", response_model=Health, operation_id="health_check")
 def health_check():
     return Health(
-        version=version(),
+        version=__version__,
         status="ok",
     )

letta/server/rest_api/utils.py CHANGED Viewed

@@ -9,6 +9,7 @@ from fastapi import Header
 from pydantic import BaseModel
 from letta.errors import ContextWindowExceededError, RateLimitExceededError
+from letta.log import get_logger
 from letta.schemas.usage import LettaUsageStatistics
 from letta.server.rest_api.interface import StreamingServerInterface
@@ -24,10 +25,14 @@ SSE_FINISH_MSG = "[DONE]"  # mimic openai
 SSE_ARTIFICIAL_DELAY = 0.1
+logger = get_logger(__name__)
 def sse_formatter(data: Union[dict, str]) -> str:
     """Prefix with 'data: ', and always include double newlines"""
     assert type(data) in [dict, str], f"Expected type dict or str, got type {type(data)}"
     data_str = json.dumps(data, separators=(",", ":")) if isinstance(data, dict) else data
+    # print(f"data: {data_str}\n\n")
     return f"data: {data_str}\n\n"
@@ -62,23 +67,29 @@ async def sse_async_generator(
                 usage = await usage_task
                 # Double-check the type
                 if not isinstance(usage, LettaUsageStatistics):
-                    raise ValueError(f"Expected LettaUsageStatistics, got {type(usage)}")
+                    err_msg = f"Expected LettaUsageStatistics, got {type(usage)}"
+                    logger.error(err_msg)
+                    raise ValueError(err_msg)
                 yield sse_formatter(usage.model_dump())
             except ContextWindowExceededError as e:
                 log_error_to_sentry(e)
+                logger.error(f"ContextWindowExceededError error: {e}")
                 yield sse_formatter({"error": f"Stream failed: {e}", "code": str(e.code.value) if e.code else None})
             except RateLimitExceededError as e:
                 log_error_to_sentry(e)
+                logger.error(f"RateLimitExceededError error: {e}")
                 yield sse_formatter({"error": f"Stream failed: {e}", "code": str(e.code.value) if e.code else None})
             except Exception as e:
                 log_error_to_sentry(e)
-                yield sse_formatter({"error": f"Stream failed (internal error occured)"})
+                logger.error(f"Caught unexpected Exception: {e}")
+                yield sse_formatter({"error": f"Stream failed (internal error occurred)"})
     except Exception as e:
         log_error_to_sentry(e)
+        logger.error(f"Caught unexpected Exception: {e}")
         yield sse_formatter({"error": "Stream failed (decoder encountered an error)"})
     finally:

letta/services/agent_manager.py CHANGED Viewed

@@ -477,39 +477,39 @@ class AgentManager:
             )
             message = self.message_manager.create_message(message, actor=actor)
             message_ids = [message.id] + agent_state.message_ids[1:]  # swap index 0 (system)
-            return self.set_in_context_messages(agent_id=agent_id, message_ids=message_ids, actor=actor)
+            return self._set_in_context_messages(agent_id=agent_id, message_ids=message_ids, actor=actor)
         else:
             return agent_state
     @enforce_types
-    def set_in_context_messages(self, agent_id: str, message_ids: List[str], actor: PydanticUser) -> PydanticAgentState:
+    def _set_in_context_messages(self, agent_id: str, message_ids: List[str], actor: PydanticUser) -> PydanticAgentState:
         return self.update_agent(agent_id=agent_id, agent_update=UpdateAgent(message_ids=message_ids), actor=actor)
     @enforce_types
     def trim_older_in_context_messages(self, num: int, agent_id: str, actor: PydanticUser) -> PydanticAgentState:
         message_ids = self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids
         new_messages = [message_ids[0]] + message_ids[num:]  # 0 is system message
-        return self.set_in_context_messages(agent_id=agent_id, message_ids=new_messages, actor=actor)
+        return self._set_in_context_messages(agent_id=agent_id, message_ids=new_messages, actor=actor)
     @enforce_types
     def trim_all_in_context_messages_except_system(self, agent_id: str, actor: PydanticUser) -> PydanticAgentState:
         message_ids = self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids
         new_messages = [message_ids[0]]  # 0 is system message
-        return self.set_in_context_messages(agent_id=agent_id, message_ids=new_messages, actor=actor)
+        return self._set_in_context_messages(agent_id=agent_id, message_ids=new_messages, actor=actor)
     @enforce_types
     def prepend_to_in_context_messages(self, messages: List[PydanticMessage], agent_id: str, actor: PydanticUser) -> PydanticAgentState:
         message_ids = self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids
         new_messages = self.message_manager.create_many_messages(messages, actor=actor)
         message_ids = [message_ids[0]] + [m.id for m in new_messages] + message_ids[1:]
-        return self.set_in_context_messages(agent_id=agent_id, message_ids=message_ids, actor=actor)
+        return self._set_in_context_messages(agent_id=agent_id, message_ids=message_ids, actor=actor)
     @enforce_types
     def append_to_in_context_messages(self, messages: List[PydanticMessage], agent_id: str, actor: PydanticUser) -> PydanticAgentState:
         messages = self.message_manager.create_many_messages(messages, actor=actor)
         message_ids = self.get_agent_by_id(agent_id=agent_id, actor=actor).message_ids or []
         message_ids += [m.id for m in messages]
-        return self.set_in_context_messages(agent_id=agent_id, message_ids=message_ids, actor=actor)
+        return self._set_in_context_messages(agent_id=agent_id, message_ids=message_ids, actor=actor)
     @enforce_types
     def reset_messages(self, agent_id: str, actor: PydanticUser, add_default_initial_messages: bool = False) -> PydanticAgentState:

letta/settings.py CHANGED Viewed

@@ -85,7 +85,7 @@ class ModelSettings(BaseSettings):
     # google ai
     gemini_api_key: Optional[str] = None
+    gemini_base_url: str = "https://generativelanguage.googleapis.com/"
     # together
     together_api_key: Optional[str] = None

{letta_nightly-0.6.23.dev20250211104055.dist-info → letta_nightly-0.6.24.dev20250212104045.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: letta-nightly
-Version: 0.6.23.dev20250211104055
+Version: 0.6.24.dev20250212104045
 Summary: Create LLM agents with long-term memory and custom tools
 License: Apache License
 Author: Letta Team
@@ -34,6 +34,7 @@ Requires-Dist: docker (>=7.1.0,<8.0.0) ; extra == "external-tools" or extra == "
 Requires-Dist: docstring-parser (>=0.16,<0.17)
 Requires-Dist: docx2txt (>=0.8,<0.9)
 Requires-Dist: e2b-code-interpreter (>=1.0.3,<2.0.0) ; extra == "cloud-tool-sandbox"
+Requires-Dist: faker (>=36.1.0,<37.0.0)
 Requires-Dist: fastapi (>=0.115.6,<0.116.0) ; extra == "server" or extra == "all"
 Requires-Dist: grpcio (>=1.68.1,<2.0.0)
 Requires-Dist: grpcio-tools (>=1.68.1,<2.0.0)

{letta_nightly-0.6.23.dev20250211104055.dist-info → letta_nightly-0.6.24.dev20250212104045.dist-info}/RECORD RENAMED Viewed

@@ -1,21 +1,21 @@
-letta/__init__.py,sha256=7Jj4qnODNSOECEFr6e6vMUw-7kqA-Y65s6SWaFBmr44,919
+letta/__init__.py,sha256=uwg8T6rRtfO8a2xRXUlXfp0rIsNyRkmGFAGne7CYEXM,918
 letta/__main__.py,sha256=6Hs2PV7EYc5Tid4g4OtcLXhqVHiNYTGzSBdoOnW2HXA,29
-letta/agent.py,sha256=xOhzNF-DMxBzCkADyw1-OILsxfy2gMBkV0CoQ3XfW_I,56980
+letta/agent.py,sha256=KHpTmZnyNu7VE9vjZ6cP6vG1RzTnFZN_FG3_RAwYeQY,57143
 letta/benchmark/benchmark.py,sha256=ebvnwfp3yezaXOQyGXkYCDYpsmre-b9hvNtnyx4xkG0,3701
 letta/benchmark/constants.py,sha256=aXc5gdpMGJT327VuxsT5FngbCK2J41PQYeICBO7g_RE,536
 letta/chat_only_agent.py,sha256=71Lf-df8y3nsE9IFKpEigaZaWHoWnXnhVChkp1L-83I,4760
-letta/cli/cli.py,sha256=_uGKM-RvGLGf7y8iWjkLgLTxIw7uWrdCdL5ETUOCkUs,16472
+letta/cli/cli.py,sha256=zJz78-qDUz-depb7VQWkg87RBKiETQU4h9DI6ukQBa8,16477
 letta/cli/cli_config.py,sha256=2oo4vui1GXQarAD6Ru4SRzPvcW4eX2mCXOBusfYGvJw,8533
 letta/cli/cli_load.py,sha256=xFw-CuzjChcIptaqQ1XpDROENt0JSjyPeiQ0nmEeO1k,2706
 letta/client/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/client/client.py,sha256=ZgJEt5F1yB6Q_z9Qi0FJ7Vmlb-YK41tymSKFB7NWy38,138311
-letta/client/streaming.py,sha256=DzE86XJTg_0j9eC45Hrpy9vPt-Wfo1F-sIv_B7iNV6I,5509
+letta/client/streaming.py,sha256=lN9vamc07sfQlRbFif327GvURLUPhx-4AC_oUOPvs6w,4543
 letta/client/utils.py,sha256=VCGV-op5ZSmurd4yw7Vhf93XDQ0BkyBT8qsuV7EqfiU,2859
 letta/config.py,sha256=JFGY4TWW0Wm5fTbZamOwWqk5G8Nn-TXyhgByGoAqy2c,12375
-letta/constants.py,sha256=ZyPGoe68NfBCteTQI6hX9aFhszuBvy10xakb2FFKV9M,7276
+letta/constants.py,sha256=1MG3QTRgKdcEI5vqoJUkxkiM2aDIQR4xd0BOh0mcCgw,7158
 letta/data_sources/connectors.py,sha256=R2AssXpqS7wN6VI8AfxvqaZs5S1ZACc4E_FewmR9iZI,7022
 letta/data_sources/connectors_helper.py,sha256=2TQjCt74fCgT5sw1AP8PalDEk06jPBbhrPG4HVr-WLs,3371
-letta/embeddings.py,sha256=VgqbUqYL6oTuLOKGOd_8swTRMYIpRTIWJbBthjT8eR8,8838
+letta/embeddings.py,sha256=WwnIul-4po2jAgOPqZ36gAjhEBLa7hDcb3lNXpahBAw,10110
 letta/errors.py,sha256=6fQXg2unP-2fo3R7db0ayKKWlD2XMusOPNi9TgJplCg,5558
 letta/functions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/functions/ast_parsers.py,sha256=MEFfGxpflUsw34JiY9zdunkpbczAYxte8t4rDPOmXfQ,3620
@@ -33,16 +33,16 @@ letta/humans/examples/basic.txt,sha256=Lcp8YESTWvOJgO4Yf_yyQmgo5bKakeB1nIVrwEGG6
 letta/humans/examples/cs_phd.txt,sha256=9C9ZAV_VuG7GB31ksy3-_NAyk8rjE6YtVOkhp08k1xw,297
 letta/interface.py,sha256=JszHyhIK34dpV0h5KL0CD1W4svh4eijaHGgfOYyZOhg,12755
 letta/llm_api/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-letta/llm_api/anthropic.py,sha256=ZPPjpYZh8hleSjHkbRF27EEwSp-pg23JlSo-b1wbWBY,33602
+letta/llm_api/anthropic.py,sha256=OcQ60GTXtz6-BcbaMyIFnLGvZSNZvA9Ixqjp3x_drNY,36235
 letta/llm_api/aws_bedrock.py,sha256=-ms9tdROu8DLrEZJ9XgL-IyIOU_0UJKuhfRbjLs0_Gc,3838
 letta/llm_api/azure_openai.py,sha256=Y1HKPog1XzM_f7ujUK_Gv2zQkoy5pU-1bKiUnvSxSrs,6297
 letta/llm_api/azure_openai_constants.py,sha256=_f7NKjKBPxGPFQPfP1e0umHk4Jmf56qNjyecI0PqWqU,267
 letta/llm_api/cohere.py,sha256=H5kzYH_aQAnGNq7lip7XyKGLEOKC318Iw0_tiTP6kc4,14772
 letta/llm_api/google_ai.py,sha256=MIX4nmyC6448AvyPPSE8JZ_tzSpKJTArkZSfQGGoy0M,17920
 letta/llm_api/helpers.py,sha256=ov9WHsLSvkceIpSNJ3PUgCvufD862Bcrum-bWrUVJko,16193
-letta/llm_api/llm_api_tools.py,sha256=UXm1t_DPyJVhBtzBGP8wv1LPTKyfsng31X0yfIAEusI,20292
+letta/llm_api/llm_api_tools.py,sha256=rPqMHgKWMQ9sgQCWh48TavTcedhHg1rfAZlN5TUXlxk,20693
 letta/llm_api/mistral.py,sha256=fHdfD9ug-rQIk2qn8tRKay1U6w9maF11ryhKi91FfXM,1593
-letta/llm_api/openai.py,sha256=gE2RTYsyATYjicgE4VwATUAwTD38B74ZVqy8oVemzdQ,20277
+letta/llm_api/openai.py,sha256=T69e4oveJw1IdzuONIaK4t1aRqXggTdfQ6n6eW0Uh8Q,20371
 letta/local_llm/README.md,sha256=hFJyw5B0TU2jrh9nb0zGZMgdH-Ei1dSRfhvPQG_NSoU,168
 letta/local_llm/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/local_llm/chat_completion_proxy.py,sha256=ElYR0M5SY2zL4NQzInye21MxqtiP3AUXX9Ia0KbkD4Y,12948
@@ -153,10 +153,10 @@ letta/schemas/letta_base.py,sha256=HTnSHJ2YSyhEdpY-vg9Y7ywqS1zzTjb9j5iVPYsuVSk,3
 letta/schemas/letta_message.py,sha256=QHzIEwnEJEkE02biCwyQo5IvL2fVq_whBRQD3vPYO48,9837
 letta/schemas/letta_request.py,sha256=dzy3kwb5j2QLaSV0sDlwISEMt2xxH3IiK-vR9xJV65k,1123
 letta/schemas/letta_response.py,sha256=yL0w-cdUazgEqg6_F4LJz2tugKNAZsB83Gr5jfXwa5U,7124
-letta/schemas/llm_config.py,sha256=lycAmLNvAm6D35jlLBN333x3tpzdk2Fwkx6yJl3pXjQ,5273
+letta/schemas/llm_config.py,sha256=CsWQ7M70KXJdgCSGE66zA5Bb13XdBeWWFydaKnNvuf4,5515
 letta/schemas/llm_config_overrides.py,sha256=-oRglCTcajF6UAK3RAa0FLWVuKODPI1v403fDIWMAtA,1815
 letta/schemas/memory.py,sha256=GOYDfPKzbWftUWO9Hv4KW7xAi1EIQmC8zpP7qvEkVHw,10245
-letta/schemas/message.py,sha256=4L0-B2gCA2krF34AkKi_G4L2X5aALQnH997MowIcQgs,37457
+letta/schemas/message.py,sha256=jTC1Z_gpCFlodfeystPq1WeNCE9Ccjqlr_HBlOwO0hQ,38072
 letta/schemas/openai/chat_completion_request.py,sha256=AOIwgbN3CZKVqkuXeMHeSa53u4h0wVq69t3T_LJ0vIE,3389
 letta/schemas/openai/chat_completion_response.py,sha256=Kaz9T0_ZvhWdVgGcouBuUuAG8-Nl3kC3dRymBQlONZ4,3980
 letta/schemas/openai/chat_completions.py,sha256=l0e9sT9boTD5VBU5YtJ0s7qUtCfFGB2K-gQLeEZ2LHU,3599
@@ -164,7 +164,7 @@ letta/schemas/openai/embedding_response.py,sha256=WKIZpXab1Av7v6sxKG8feW3ZtpQUNo
 letta/schemas/openai/openai.py,sha256=Hilo5BiLAGabzxCwnwfzK5QrWqwYD8epaEKFa4Pwndk,7970
 letta/schemas/organization.py,sha256=WWbUWVSp_VQRFwWN4fdHg1yObiV6x9rZnvIY8x5BPs0,746
 letta/schemas/passage.py,sha256=pdCLZgOn0gWK1gB6aFHLS0gfdWCBqLaiHDA0iQ12Zd8,3704
-letta/schemas/providers.py,sha256=1Sc7gWI6n9RkR4kOY4g3xGLVo6VCSwpiJySp3Pm3MQw,34903
+letta/schemas/providers.py,sha256=il--tOeW2rXjRS0d9L9-UScu4PWrzMIWLwJBmVA65-Y,35510
 letta/schemas/run.py,sha256=SRqPRziINIiPunjOhE_NlbnQYgxTvqmbauni_yfBQRA,2085
 letta/schemas/sandbox_config.py,sha256=Nz8K5brqe6jpf66KnTJ0-E7ZeFdPoBFGN-XOI35OeaY,5926
 letta/schemas/source.py,sha256=-BQVolcXA2ziCu2ztR6cbTdGUc8G7vGJy7rvpdf1hpg,2880
@@ -181,8 +181,8 @@ letta/server/rest_api/app.py,sha256=9cf9H6vZhN-iBJqkqjBdFWjA3PlKfok-q48ltI71qls,
 letta/server/rest_api/auth/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/server/rest_api/auth/index.py,sha256=fQBGyVylGSRfEMLQ17cZzrHd5Y1xiVylvPqH5Rl-lXQ,1378
 letta/server/rest_api/auth_token.py,sha256=725EFEIiNj4dh70hrSd94UysmFD8vcJLrTRfNHkzxDo,774
-letta/server/rest_api/chat_completions_interface.py,sha256=i9tfb9oSh14QNY-1ghWYtdgP7_RiyPHD5NcA2FKF3Dw,10195
-letta/server/rest_api/interface.py,sha256=ZGTJ5WIRNsWgKO0yoCD-yUESxq838qNrK3N1SzRZl40,51886
+letta/server/rest_api/chat_completions_interface.py,sha256=XAMbQ-f0KDUeMAtnazwbjcrUn8ZpF8vBBtYw_kEPZ-8,10932
+letta/server/rest_api/interface.py,sha256=UpmXS-srzM3t0SKJvrFIFhsIyjufLpdjursO8IPKbjs,53138
 letta/server/rest_api/optimistic_json_parser.py,sha256=1z4d9unmxMb0ou7owJ62uUQoNjNYf21FmaNdg0ZcqUU,6567
 letta/server/rest_api/routers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 letta/server/rest_api/routers/openai/chat_completions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -190,7 +190,7 @@ letta/server/rest_api/routers/openai/chat_completions/chat_completions.py,sha256
 letta/server/rest_api/routers/v1/__init__.py,sha256=tzD8Oh6ynPkg8ULcITWcwalLL81SIh6eztPqV9l7VGk,1162
 letta/server/rest_api/routers/v1/agents.py,sha256=AkoJWxn-cJEURGXBarJtasTJsldPe6vv9TFOiGpJRlY,25473
 letta/server/rest_api/routers/v1/blocks.py,sha256=oJYOpGUTd4AhKwVolVlZPIXO2EoOrBHkyi2PdrmbtmA,3888
-letta/server/rest_api/routers/v1/health.py,sha256=pKCuVESlVOhGIb4VC4K-H82eZqfghmT6kvj2iOkkKuc,401
+letta/server/rest_api/routers/v1/health.py,sha256=MoOjkydhGcJXTiuJrKIB0etVXiRMdTa51S8RQ8-50DQ,399
 letta/server/rest_api/routers/v1/jobs.py,sha256=pKihW12hQdFwt6tHQXs94yOMv6xotlhBB3Vl7Q5ASKQ,2738
 letta/server/rest_api/routers/v1/llms.py,sha256=lYp5URXtZk1yu_Pe-p1Wq1uQ0qeb6aWtx78rXSB7N_E,881
 letta/server/rest_api/routers/v1/organizations.py,sha256=8n-kA9LHtKImdY2xL-v7m6nYAbFWqH1vjBCJhQbv7Is,2077
@@ -203,7 +203,7 @@ letta/server/rest_api/routers/v1/tags.py,sha256=45G0cmcP-ER0OO5OanT_fGtGQfl9ZjRK
 letta/server/rest_api/routers/v1/tools.py,sha256=Ft1wnS7RJT3TOfwSGMJ0_gfTpXnVArZUtPCXT3osI-0,12615
 letta/server/rest_api/routers/v1/users.py,sha256=G5DBHSkPfBgVHN2Wkm-rVYiLQAudwQczIq2Z3YLdbVo,2277
 letta/server/rest_api/static_files.py,sha256=NG8sN4Z5EJ8JVQdj19tkFa9iQ1kBPTab9f_CUxd_u4Q,3143
-letta/server/rest_api/utils.py,sha256=dsjkZzgo9Rk3fjUf1ajjiiql1eeO5DAzmXprttI7bJU,3993
+letta/server/rest_api/utils.py,sha256=X7D6PsSVOAH6_irY0bgxdnS2yZ5ux-OA4eAiYqVgAPE,4438
 letta/server/server.py,sha256=8tRXPLta26ARQSThMDnKDAxTGx39j8Zw-41kgEgSpoQ,59850
 letta/server/startup.sh,sha256=qEi6dQHJRzEzDIgnIODj-RYp-O1XstfFpc6cFLkUzVs,1576
 letta/server/static_files/assets/index-048c9598.js,sha256=mR16XppvselwKCcNgONs4L7kZEVa4OEERm4lNZYtLSk,146819
@@ -218,7 +218,7 @@ letta/server/ws_api/interface.py,sha256=TWl9vkcMCnLsUtgsuENZ-ku2oMDA-OUTzLh_yNRo
 letta/server/ws_api/protocol.py,sha256=M_-gM5iuDBwa1cuN2IGNCG5GxMJwU2d3XW93XALv9s8,1821
 letta/server/ws_api/server.py,sha256=cBSzf-V4zT1bL_0i54OTI3cMXhTIIxqjSRF8pYjk7fg,5835
 letta/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-letta/services/agent_manager.py,sha256=FLb6Y3_aZSNGTIfUrKJbRTXTMHJBlf8OGH-Ahf0moY0,50958
+letta/services/agent_manager.py,sha256=S-WBNNg4H84eabplDkFHdOD4tjKkPwIosDI3yM8aN4E,50964
 letta/services/block_manager.py,sha256=u56TXG46QDMbQZadDGCO7fY1vreJ69Xr_0MUF53xw4k,5519
 letta/services/helpers/agent_manager_helper.py,sha256=RH0MXLZASkP2LVbVNUfSYHrcBYZnVxFd9ejGjRK90Hw,11283
 letta/services/helpers/tool_execution_helper.py,sha256=q8uSiQcX6VH_iNg5VNloZgC2JkH9lIOXBKCXYPx2Yac,6097
@@ -234,13 +234,13 @@ letta/services/step_manager.py,sha256=_PJUgaXyUHKCdlwt9CAmKhdeCNzKE_0_8-SRdUzpZa
 letta/services/tool_execution_sandbox.py,sha256=4XBYkCEBLG6GqijxgqeLIQQJ9zRbsJa8vZ4dZG04Pq8,22080
 letta/services/tool_manager.py,sha256=9Y15q0GqnADk-tnUeWDFFsDOt_ZjwsPU2oteDVtHAF4,9572
 letta/services/user_manager.py,sha256=1U8BQ_-MBkEW2wnSFV_OsTwBmRAZLN8uHLFjnDjK3hA,4308
-letta/settings.py,sha256=TnWJKZxeu571rKZQPGZj9fSupDwHUj1Pn5yzer6TlMY,6263
+letta/settings.py,sha256=gO5X4miD884jvFVsGoL_1kBE1fGlrce6haZ9N_v7CCs,6334
 letta/streaming_interface.py,sha256=lo2VAQRUJOdWTijwnXuKOC9uejqr2siUAEmZiQUXkj8,15710
 letta/streaming_utils.py,sha256=jLqFTVhUL76FeOuYk8TaRQHmPTf3HSRc2EoJwxJNK6U,11946
 letta/system.py,sha256=S_0cod77iEttkFd1bSh2wenLCKA8YL487AuVenIDUng,8425
 letta/utils.py,sha256=lgBDWKmrQrmJGPxcgamFC2aJyi6I0dX7bzLBt3YC6j0,34051
-letta_nightly-0.6.23.dev20250211104055.dist-info/LICENSE,sha256=mExtuZ_GYJgDEI38GWdiEYZizZS4KkVt2SF1g_GPNhI,10759
-letta_nightly-0.6.23.dev20250211104055.dist-info/METADATA,sha256=vbZOnhlQslB1TJ-Y9r96eSCTYx2pH380fdjigSmBqxM,22156
-letta_nightly-0.6.23.dev20250211104055.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
-letta_nightly-0.6.23.dev20250211104055.dist-info/entry_points.txt,sha256=2zdiyGNEZGV5oYBuS-y2nAAgjDgcC9yM_mHJBFSRt5U,40
-letta_nightly-0.6.23.dev20250211104055.dist-info/RECORD,,
+letta_nightly-0.6.24.dev20250212104045.dist-info/LICENSE,sha256=mExtuZ_GYJgDEI38GWdiEYZizZS4KkVt2SF1g_GPNhI,10759
+letta_nightly-0.6.24.dev20250212104045.dist-info/METADATA,sha256=USLZaRA7JEPRVOe9nB7sZQrIKjcWHFv52yhq7B67tNk,22196
+letta_nightly-0.6.24.dev20250212104045.dist-info/WHEEL,sha256=FMvqSimYX_P7y0a7UY-_Mc83r5zkBZsCYPm7Lr0Bsq4,88
+letta_nightly-0.6.24.dev20250212104045.dist-info/entry_points.txt,sha256=2zdiyGNEZGV5oYBuS-y2nAAgjDgcC9yM_mHJBFSRt5U,40
+letta_nightly-0.6.24.dev20250212104045.dist-info/RECORD,,

{letta_nightly-0.6.23.dev20250211104055.dist-info → letta_nightly-0.6.24.dev20250212104045.dist-info}/LICENSE RENAMED Viewed

File without changes

{letta_nightly-0.6.23.dev20250211104055.dist-info → letta_nightly-0.6.24.dev20250212104045.dist-info}/WHEEL RENAMED Viewed

File without changes

{letta_nightly-0.6.23.dev20250211104055.dist-info → letta_nightly-0.6.24.dev20250212104045.dist-info}/entry_points.txt RENAMED Viewed

File without changes

letta-nightly 0.6.23.dev20250211104055__py3-none-any.whl → 0.6.24.dev20250212104045__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.6.23.dev20250211104055py3-none-any.whl → 0.6.24.dev20250212104045py3-none-any.whl