PyPI - letta-nightly - Versions diffs - 0.6.45.dev20250328104141__py3-none-any.whl → 0.6.46.dev20250330050944__py3-none-any.whl - Mend

letta-nightly 0.6.45.dev20250328104141py3-none-any.whl → 0.6.46.dev20250330050944py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (48) hide show

letta/__init__.py +1 -1
letta/agent.py +25 -8
letta/agents/base_agent.py +6 -5
letta/agents/letta_agent.py +323 -0
letta/agents/voice_agent.py +4 -3
letta/client/client.py +2 -0
letta/dynamic_multi_agent.py +5 -5
letta/errors.py +20 -0
letta/helpers/tool_execution_helper.py +1 -1
letta/helpers/tool_rule_solver.py +1 -1
letta/llm_api/anthropic.py +2 -0
letta/llm_api/anthropic_client.py +153 -167
letta/llm_api/google_ai_client.py +112 -29
letta/llm_api/llm_api_tools.py +5 -0
letta/llm_api/llm_client.py +6 -7
letta/llm_api/llm_client_base.py +38 -17
letta/llm_api/openai.py +2 -0
letta/orm/group.py +2 -5
letta/round_robin_multi_agent.py +18 -7
letta/schemas/group.py +6 -0
letta/schemas/message.py +23 -14
letta/schemas/openai/chat_completion_request.py +6 -1
letta/schemas/providers.py +3 -3
letta/serialize_schemas/marshmallow_agent.py +34 -10
letta/serialize_schemas/pydantic_agent_schema.py +23 -3
letta/server/rest_api/app.py +9 -0
letta/server/rest_api/interface.py +25 -2
letta/server/rest_api/optimistic_json_parser.py +1 -1
letta/server/rest_api/routers/v1/agents.py +57 -23
letta/server/rest_api/routers/v1/groups.py +72 -49
letta/server/rest_api/routers/v1/sources.py +1 -0
letta/server/rest_api/utils.py +0 -1
letta/server/server.py +73 -80
letta/server/startup.sh +1 -1
letta/services/agent_manager.py +7 -0
letta/services/group_manager.py +87 -29
letta/services/message_manager.py +5 -0
letta/services/tool_executor/async_tool_execution_sandbox.py +397 -0
letta/services/tool_executor/tool_execution_manager.py +27 -0
letta/services/{tool_execution_sandbox.py → tool_executor/tool_execution_sandbox.py} +40 -12
letta/services/tool_executor/tool_executor.py +23 -6
letta/settings.py +17 -1
letta/supervisor_multi_agent.py +3 -1
{letta_nightly-0.6.45.dev20250328104141.dist-info → letta_nightly-0.6.46.dev20250330050944.dist-info}/METADATA +1 -1
{letta_nightly-0.6.45.dev20250328104141.dist-info → letta_nightly-0.6.46.dev20250330050944.dist-info}/RECORD +48 -46
{letta_nightly-0.6.45.dev20250328104141.dist-info → letta_nightly-0.6.46.dev20250330050944.dist-info}/LICENSE +0 -0
{letta_nightly-0.6.45.dev20250328104141.dist-info → letta_nightly-0.6.46.dev20250330050944.dist-info}/WHEEL +0 -0
{letta_nightly-0.6.45.dev20250328104141.dist-info → letta_nightly-0.6.46.dev20250330050944.dist-info}/entry_points.txt +0 -0

letta/llm_api/anthropic_client.py CHANGED Viewed

@@ -7,12 +7,11 @@ from anthropic.types import Message as AnthropicMessage
 from letta.helpers.datetime_helpers import get_utc_time
 from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
-from letta.llm_api.llm_api_tools import cast_message_to_subtype
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
 from letta.log import get_logger
 from letta.schemas.message import Message as PydanticMessage
-from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
+from letta.schemas.openai.chat_completion_request import Tool
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall
 from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
 from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
@@ -26,20 +25,14 @@ logger = get_logger(__name__)
 class AnthropicClient(LLMClientBase):
     def request(self, request_data: dict) -> dict:
-        try:
-            client = self._get_anthropic_client(async_client=False)
-            response = client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
-            return response.model_dump()
-        except Exception as e:
-            self._handle_anthropic_error(e)
+        client = self._get_anthropic_client(async_client=False)
+        response = client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
+        return response.model_dump()
     async def request_async(self, request_data: dict) -> dict:
-        try:
-            client = self._get_anthropic_client(async_client=True)
-            response = await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
-            return response.model_dump()
-        except Exception as e:
-            self._handle_anthropic_error(e)
+        client = self._get_anthropic_client(async_client=True)
+        response = await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
+        return response.model_dump()
     def _get_anthropic_client(self, async_client: bool = False) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
         override_key = ProviderManager().get_anthropic_override_key()
@@ -47,15 +40,6 @@ class AnthropicClient(LLMClientBase):
             return anthropic.AsyncAnthropic(api_key=override_key) if override_key else anthropic.AsyncAnthropic()
         return anthropic.Anthropic(api_key=override_key) if override_key else anthropic.Anthropic()
-    def _handle_anthropic_error(self, e: Exception):
-        if isinstance(e, anthropic.APIConnectionError):
-            logger.warning(f"[Anthropic] API connection error: {e.__cause__}")
-        elif isinstance(e, anthropic.RateLimitError):
-            logger.warning("[Anthropic] Rate limited (429). Consider backoff.")
-        elif isinstance(e, anthropic.APIStatusError):
-            logger.warning(f"[Anthropic] API status error: {e.status_code}, {e.response}")
-        raise e
     def build_request_data(
         self,
         messages: List[PydanticMessage],
@@ -63,43 +47,157 @@ class AnthropicClient(LLMClientBase):
         tool_call: Optional[str],
         force_tool_call: Optional[str] = None,
     ) -> dict:
+        prefix_fill = True
         if not self.use_tool_naming:
             raise NotImplementedError("Only tool calling supported on Anthropic API requests")
-        if tools is None:
-            # Special case for summarization path
-            available_tools = None
-            tool_choice = None
-        elif force_tool_call is not None:
-            assert tools is not None
-            tool_choice = {"type": "tool", "name": force_tool_call}
-            available_tools = [{"type": "function", "function": f} for f in tools if f["name"] == force_tool_call]
-            # need to have this setting to be able to put inner thoughts in kwargs
-            self.llm_config.put_inner_thoughts_in_kwargs = True
-        else:
-            if self.llm_config.put_inner_thoughts_in_kwargs:
-                # tool_choice_type other than "auto" only plays nice if thinking goes inside the tool calls
-                tool_choice = {"type": "any", "disable_parallel_tool_use": True}
-            else:
-                tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
-            available_tools = [{"type": "function", "function": f} for f in tools]
-        chat_completion_request = ChatCompletionRequest(
-            model=self.llm_config.model,
-            messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
-            tools=available_tools,
-            tool_choice=tool_choice,
-            max_tokens=self.llm_config.max_tokens,  # Note: max_tokens is required for Anthropic API
-            temperature=self.llm_config.temperature,
-        )
+        if not self.llm_config.max_tokens:
+            raise ValueError("Max  tokens must be set for anthropic")
+        data = {
+            "model": self.llm_config.model,
+            "max_tokens": self.llm_config.max_tokens,
+            "temperature": self.llm_config.temperature,
+        }
+        # Extended Thinking
+        if self.llm_config.enable_reasoner:
+            assert (
+                self.llm_config.max_reasoning_tokens is not None and self.llm_config.max_reasoning_tokens < self.llm_config.max_tokens
+            ), "max tokens must be greater than thinking budget"
+            assert not self.llm_config.put_inner_thoughts_in_kwargs, "extended thinking not compatible with put_inner_thoughts_in_kwargs"
-        return _prepare_anthropic_request(
-            data=chat_completion_request,
-            put_inner_thoughts_in_kwargs=self.llm_config.put_inner_thoughts_in_kwargs,
-            extended_thinking=self.llm_config.enable_reasoner,
-            max_reasoning_tokens=self.llm_config.max_reasoning_tokens,
+            data["thinking"] = {
+                "type": "enabled",
+                "budget_tokens": self.llm_config.max_reasoning_tokens,
+            }
+            # `temperature` may only be set to 1 when thinking is enabled. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking'
+            data["temperature"] = 1.0
+            # Silently disable prefix_fill for now
+            prefix_fill = False
+        # Tools
+        tools_for_request = (
+            [Tool(function=f) for f in tools if f["name"] == force_tool_call]
+            if force_tool_call is not None
+            else [Tool(function=f) for f in tools]
         )
+        if force_tool_call is not None:
+            self.llm_config.put_inner_thoughts_in_kwargs = True  # why do we do this ?
+        # Add inner thoughts kwarg
+        if len(tools_for_request) > 0 and self.llm_config.put_inner_thoughts_in_kwargs:
+            tools_with_inner_thoughts = add_inner_thoughts_to_functions(
+                functions=[t.function.model_dump() for t in tools_for_request],
+                inner_thoughts_key=INNER_THOUGHTS_KWARG,
+                inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
+            )
+            tools_for_request = [Tool(function=f) for f in tools_with_inner_thoughts]
+        if len(tools_for_request) > 0:
+            # TODO eventually enable parallel tool use
+            data["tools"] = convert_tools_to_anthropic_format(tools_for_request)
+        # Messages
+        inner_thoughts_xml_tag = "thinking"
+        data["messages"] = [
+            m.to_anthropic_dict(
+                inner_thoughts_xml_tag=inner_thoughts_xml_tag,
+                put_inner_thoughts_in_kwargs=self.llm_config.put_inner_thoughts_in_kwargs,
+            )
+            for m in messages
+        ]
+        # Move 'system' to the top level
+        if data["messages"][0]["role"] != "system":
+            raise RuntimeError(f"First message is not a system message, instead has role {data["messages"][0]["role"]}")
+        data["system"] = data["messages"][0]["content"]
+        data["messages"] = data["messages"][1:]
+        # Ensure first message is user
+        if data["messages"][0]["role"] != "user":
+            data["messages"] = [{"role": "user", "content": DUMMY_FIRST_USER_MESSAGE}] + data["messages"]
+        # Handle alternating messages
+        data["messages"] = merge_tool_results_into_user_messages(data["messages"])
+        # Prefix fill
+        # https://docs.anthropic.com/en/api/messages#body-messages
+        # NOTE: cannot prefill with tools for opus:
+        # Your API request included an `assistant` message in the final position, which would pre-fill the `assistant` response. When using tools with "claude-3-opus-20240229"
+        if prefix_fill and not self.llm_config.put_inner_thoughts_in_kwargs and "opus" not in data["model"]:
+            data["messages"].append(
+                # Start the thinking process for the assistant
+                {"role": "assistant", "content": f"<{inner_thoughts_xml_tag}>"},
+            )
+        return data
+    def handle_llm_error(self, e: Exception) -> Exception:
+        if isinstance(e, anthropic.APIConnectionError):
+            logger.warning(f"[Anthropic] API connection error: {e.__cause__}")
+            return LLMConnectionError(
+                message=f"Failed to connect to Anthropic: {str(e)}",
+                code=ErrorCode.INTERNAL_SERVER_ERROR,
+                details={"cause": str(e.__cause__) if e.__cause__ else None},
+            )
+        if isinstance(e, anthropic.RateLimitError):
+            logger.warning("[Anthropic] Rate limited (429). Consider backoff.")
+            return LLMRateLimitError(
+                message=f"Rate limited by Anthropic: {str(e)}",
+                code=ErrorCode.RATE_LIMIT_EXCEEDED,
+            )
+        if isinstance(e, anthropic.BadRequestError):
+            logger.warning(f"[Anthropic] Bad request: {str(e)}")
+            return LLMBadRequestError(
+                message=f"Bad request to Anthropic: {str(e)}",
+                code=ErrorCode.INTERNAL_SERVER_ERROR,
+            )
+        if isinstance(e, anthropic.AuthenticationError):
+            logger.warning(f"[Anthropic] Authentication error: {str(e)}")
+            return LLMAuthenticationError(
+                message=f"Authentication failed with Anthropic: {str(e)}",
+                code=ErrorCode.INTERNAL_SERVER_ERROR,
+            )
+        if isinstance(e, anthropic.PermissionDeniedError):
+            logger.warning(f"[Anthropic] Permission denied: {str(e)}")
+            return LLMPermissionDeniedError(
+                message=f"Permission denied by Anthropic: {str(e)}",
+                code=ErrorCode.INTERNAL_SERVER_ERROR,
+            )
+        if isinstance(e, anthropic.NotFoundError):
+            logger.warning(f"[Anthropic] Resource not found: {str(e)}")
+            return LLMNotFoundError(
+                message=f"Resource not found in Anthropic: {str(e)}",
+                code=ErrorCode.INTERNAL_SERVER_ERROR,
+            )
+        if isinstance(e, anthropic.UnprocessableEntityError):
+            logger.warning(f"[Anthropic] Unprocessable entity: {str(e)}")
+            return LLMUnprocessableEntityError(
+                message=f"Invalid request content for Anthropic: {str(e)}",
+                code=ErrorCode.INTERNAL_SERVER_ERROR,
+            )
+        if isinstance(e, anthropic.APIStatusError):
+            logger.warning(f"[Anthropic] API status error: {str(e)}")
+            return LLMServerError(
+                message=f"Anthropic API error: {str(e)}",
+                code=ErrorCode.INTERNAL_SERVER_ERROR,
+                details={
+                    "status_code": e.status_code if hasattr(e, "status_code") else None,
+                    "response": str(e.response) if hasattr(e, "response") else None,
+                },
+            )
+        return super().handle_llm_error(e)
     def convert_response_to_chat_completion(
         self,
@@ -208,118 +306,6 @@ class AnthropicClient(LLMClientBase):
         return chat_completion_response
-def _prepare_anthropic_request(
-    data: ChatCompletionRequest,
-    inner_thoughts_xml_tag: Optional[str] = "thinking",
-    # if true, prefix fill the generation with the thinking tag
-    prefix_fill: bool = True,
-    # if true, put COT inside the tool calls instead of inside the content
-    put_inner_thoughts_in_kwargs: bool = False,
-    bedrock: bool = False,
-    # extended thinking related fields
-    # https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
-    extended_thinking: bool = False,
-    max_reasoning_tokens: Optional[int] = None,
-) -> dict:
-    """Prepare the request data for Anthropic API format."""
-    if extended_thinking:
-        assert (
-            max_reasoning_tokens is not None and max_reasoning_tokens < data.max_tokens
-        ), "max tokens must be greater than thinking budget"
-        assert not put_inner_thoughts_in_kwargs, "extended thinking not compatible with put_inner_thoughts_in_kwargs"
-        # assert not prefix_fill, "extended thinking not compatible with prefix_fill"
-        # Silently disable prefix_fill for now
-        prefix_fill = False
-    # if needed, put inner thoughts as a kwarg for all tools
-    if data.tools and put_inner_thoughts_in_kwargs:
-        functions = add_inner_thoughts_to_functions(
-            functions=[t.function.model_dump() for t in data.tools],
-            inner_thoughts_key=INNER_THOUGHTS_KWARG,
-            inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
-        )
-        data.tools = [Tool(function=f) for f in functions]
-    # convert the tools to Anthropic's payload format
-    anthropic_tools = None if data.tools is None else convert_tools_to_anthropic_format(data.tools)
-    # pydantic -> dict
-    data = data.model_dump(exclude_none=True)
-    if extended_thinking:
-        data["thinking"] = {
-            "type": "enabled",
-            "budget_tokens": max_reasoning_tokens,
-        }
-        # `temperature` may only be set to 1 when thinking is enabled. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking'
-        data["temperature"] = 1.0
-    if "functions" in data:
-        raise ValueError(f"'functions' unexpected in Anthropic API payload")
-    # Handle tools
-    if "tools" in data and data["tools"] is None:
-        data.pop("tools")
-        data.pop("tool_choice", None)
-    elif anthropic_tools is not None:
-        # TODO eventually enable parallel tool use
-        data["tools"] = anthropic_tools
-    # Move 'system' to the top level
-    assert data["messages"][0]["role"] == "system", f"Expected 'system' role in messages[0]:\n{data['messages'][0]}"
-    data["system"] = data["messages"][0]["content"]
-    data["messages"] = data["messages"][1:]
-    # Process messages
-    for message in data["messages"]:
-        if "content" not in message:
-            message["content"] = None
-    # Convert to Anthropic format
-    msg_objs = [
-        PydanticMessage.dict_to_message(
-            user_id=None,
-            agent_id=None,
-            openai_message_dict=m,
-        )
-        for m in data["messages"]
-    ]
-    data["messages"] = [
-        m.to_anthropic_dict(
-            inner_thoughts_xml_tag=inner_thoughts_xml_tag,
-            put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
-        )
-        for m in msg_objs
-    ]
-    # Ensure first message is user
-    if data["messages"][0]["role"] != "user":
-        data["messages"] = [{"role": "user", "content": DUMMY_FIRST_USER_MESSAGE}] + data["messages"]
-    # Handle alternating messages
-    data["messages"] = merge_tool_results_into_user_messages(data["messages"])
-    # Handle prefix fill (not compatible with inner-thouguhts-in-kwargs)
-    # https://docs.anthropic.com/en/api/messages#body-messages
-    # NOTE: cannot prefill with tools for opus:
-    # Your API request included an `assistant` message in the final position, which would pre-fill the `assistant` response. When using tools with "claude-3-opus-20240229"
-    if prefix_fill and not put_inner_thoughts_in_kwargs and "opus" not in data["model"]:
-        if not bedrock:  # not support for bedrock
-            data["messages"].append(
-                # Start the thinking process for the assistant
-                {"role": "assistant", "content": f"<{inner_thoughts_xml_tag}>"},
-            )
-    # Validate max_tokens
-    assert "max_tokens" in data, data
-    # Remove OpenAI-specific fields
-    for field in ["frequency_penalty", "logprobs", "n", "top_p", "presence_penalty", "user", "stream"]:
-        data.pop(field, None)
-    return data
 def convert_tools_to_anthropic_format(tools: List[Tool]) -> List[dict]:
     """See: https://docs.anthropic.com/claude/docs/tool-use

letta/llm_api/google_ai_client.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import uuid
 from typing import List, Optional, Tuple
+import requests
 from letta.constants import NON_USER_MSG_PREFIX
 from letta.helpers.datetime_helpers import get_utc_time
 from letta.helpers.json_helpers import json_dumps
@@ -21,7 +23,13 @@ class GoogleAIClient(LLMClientBase):
         """
         Performs underlying request to llm and returns raw response.
         """
-        url, headers = self.get_gemini_endpoint_and_headers(generate_content=True)
+        url, headers = get_gemini_endpoint_and_headers(
+            base_url=str(self.llm_config.model_endpoint),
+            model=self.llm_config.model,
+            api_key=str(model_settings.gemini_api_key),
+            key_in_header=True,
+            generate_content=True,
+        )
         return make_post_request(url, headers, request_data)
     def build_request_data(
@@ -208,34 +216,6 @@ class GoogleAIClient(LLMClientBase):
         except KeyError as e:
             raise e
-    def get_gemini_endpoint_and_headers(
-        self,
-        key_in_header: bool = True,
-        generate_content: bool = False,
-    ) -> Tuple[str, dict]:
-        """
-        Dynamically generate the model endpoint and headers.
-        """
-        url = f"{self.llm_config.model_endpoint}/v1beta/models"
-        # Add the model
-        url += f"/{self.llm_config.model}"
-        # Add extension for generating content if we're hitting the LM
-        if generate_content:
-            url += ":generateContent"
-        # Decide if api key should be in header or not
-        # Two ways to pass the key: https://ai.google.dev/tutorials/setup
-        if key_in_header:
-            headers = {"Content-Type": "application/json", "x-goog-api-key": model_settings.gemini_api_key}
-        else:
-            url += f"?key={model_settings.gemini_api_key}"
-            headers = {"Content-Type": "application/json"}
-        return url, headers
     def convert_tools_to_google_ai_format(self, tools: List[Tool]) -> List[dict]:
         """
         OpenAI style:
@@ -330,3 +310,106 @@ class GoogleAIClient(LLMClientBase):
                 messages_with_padding.append(dummy_yield_message)
         return messages_with_padding
+def get_gemini_endpoint_and_headers(
+    base_url: str, model: Optional[str], api_key: str, key_in_header: bool = True, generate_content: bool = False
+) -> Tuple[str, dict]:
+    """
+    Dynamically generate the model endpoint and headers.
+    """
+    url = f"{base_url}/v1beta/models"
+    # Add the model
+    if model is not None:
+        url += f"/{model}"
+    # Add extension for generating content if we're hitting the LM
+    if generate_content:
+        url += ":generateContent"
+    # Decide if api key should be in header or not
+    # Two ways to pass the key: https://ai.google.dev/tutorials/setup
+    if key_in_header:
+        headers = {"Content-Type": "application/json", "x-goog-api-key": api_key}
+    else:
+        url += f"?key={api_key}"
+        headers = {"Content-Type": "application/json"}
+    return url, headers
+def google_ai_get_model_list(base_url: str, api_key: str, key_in_header: bool = True) -> List[dict]:
+    from letta.utils import printd
+    url, headers = get_gemini_endpoint_and_headers(base_url, None, api_key, key_in_header)
+    try:
+        response = requests.get(url, headers=headers)
+        response.raise_for_status()  # Raises HTTPError for 4XX/5XX status
+        response = response.json()  # convert to dict from string
+        # Grab the models out
+        model_list = response["models"]
+        return model_list
+    except requests.exceptions.HTTPError as http_err:
+        # Handle HTTP errors (e.g., response 4XX, 5XX)
+        printd(f"Got HTTPError, exception={http_err}")
+        # Print the HTTP status code
+        print(f"HTTP Error: {http_err.response.status_code}")
+        # Print the response content (error message from server)
+        print(f"Message: {http_err.response.text}")
+        raise http_err
+    except requests.exceptions.RequestException as req_err:
+        # Handle other requests-related errors (e.g., connection error)
+        printd(f"Got RequestException, exception={req_err}")
+        raise req_err
+    except Exception as e:
+        # Handle other potential errors
+        printd(f"Got unknown Exception, exception={e}")
+        raise e
+def google_ai_get_model_details(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> List[dict]:
+    from letta.utils import printd
+    url, headers = get_gemini_endpoint_and_headers(base_url, model, api_key, key_in_header)
+    try:
+        response = requests.get(url, headers=headers)
+        printd(f"response = {response}")
+        response.raise_for_status()  # Raises HTTPError for 4XX/5XX status
+        response = response.json()  # convert to dict from string
+        printd(f"response.json = {response}")
+        # Grab the models out
+        return response
+    except requests.exceptions.HTTPError as http_err:
+        # Handle HTTP errors (e.g., response 4XX, 5XX)
+        printd(f"Got HTTPError, exception={http_err}")
+        # Print the HTTP status code
+        print(f"HTTP Error: {http_err.response.status_code}")
+        # Print the response content (error message from server)
+        print(f"Message: {http_err.response.text}")
+        raise http_err
+    except requests.exceptions.RequestException as req_err:
+        # Handle other requests-related errors (e.g., connection error)
+        printd(f"Got RequestException, exception={req_err}")
+        raise req_err
+    except Exception as e:
+        # Handle other potential errors
+        printd(f"Got unknown Exception, exception={e}")
+        raise e
+def google_ai_get_model_context_window(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> int:
+    model_details = google_ai_get_model_details(base_url=base_url, api_key=api_key, model=model, key_in_header=key_in_header)
+    # TODO should this be:
+    # return model_details["inputTokenLimit"] + model_details["outputTokenLimit"]
+    return int(model_details["inputTokenLimit"])

letta/llm_api/llm_api_tools.py CHANGED Viewed

@@ -140,6 +140,7 @@ def create(
     stream_interface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
     model_settings: Optional[dict] = None,  # TODO: eventually pass from server
     put_inner_thoughts_first: bool = True,
+    name: Optional[str] = None,
 ) -> ChatCompletionResponse:
     """Return response to chat completion with backoff"""
     from letta.utils import printd
@@ -206,6 +207,7 @@ def create(
                 api_key=api_key,
                 chat_completion_request=data,
                 stream_interface=stream_interface,
+                name=name,
             )
         else:  # Client did not request token streaming (expect a blocking backend response)
             data.stream = False
@@ -255,6 +257,7 @@ def create(
                 api_key=api_key,
                 chat_completion_request=data,
                 stream_interface=stream_interface,
+                name=name,
             )
         else:  # Client did not request token streaming (expect a blocking backend response)
             data.stream = False
@@ -359,6 +362,7 @@ def create(
                 stream_interface=stream_interface,
                 extended_thinking=llm_config.enable_reasoner,
                 max_reasoning_tokens=llm_config.max_reasoning_tokens,
+                name=name,
             )
         else:
@@ -531,6 +535,7 @@ def create(
                 api_key=model_settings.deepseek_api_key,
                 chat_completion_request=data,
                 stream_interface=stream_interface,
+                name=name,
             )
         else:  # Client did not request token streaming (expect a blocking backend response)
             data.stream = False

letta/llm_api/llm_client.py CHANGED Viewed

@@ -9,21 +9,17 @@ class LLMClient:
     @staticmethod
     def create(
-        agent_id: str,
         llm_config: LLMConfig,
         put_inner_thoughts_first: bool = True,
-        actor_id: Optional[str] = None,
     ) -> Optional[LLMClientBase]:
         """
         Create an LLM client based on the model endpoint type.
         Args:
-            agent_id: Unique identifier for the agent
             llm_config: Configuration for the LLM model
             put_inner_thoughts_first: Whether to put inner thoughts first in the response
             use_structured_output: Whether to use structured output
             use_tool_naming: Whether to use tool naming
-            actor_id: Optional actor identifier
         Returns:
             An instance of LLMClientBase subclass
@@ -36,19 +32,22 @@ class LLMClient:
                 from letta.llm_api.google_ai_client import GoogleAIClient
                 return GoogleAIClient(
-                    agent_id=agent_id, llm_config=llm_config, put_inner_thoughts_first=put_inner_thoughts_first, actor_id=actor_id
+                    llm_config=llm_config,
+                    put_inner_thoughts_first=put_inner_thoughts_first,
                 )
             case "google_vertex":
                 from letta.llm_api.google_vertex_client import GoogleVertexClient
                 return GoogleVertexClient(
-                    agent_id=agent_id, llm_config=llm_config, put_inner_thoughts_first=put_inner_thoughts_first, actor_id=actor_id
+                    llm_config=llm_config,
+                    put_inner_thoughts_first=put_inner_thoughts_first,
                 )
             case "anthropic":
                 from letta.llm_api.anthropic_client import AnthropicClient
                 return AnthropicClient(
-                    agent_id=agent_id, llm_config=llm_config, put_inner_thoughts_first=put_inner_thoughts_first, actor_id=actor_id
+                    llm_config=llm_config,
+                    put_inner_thoughts_first=put_inner_thoughts_first,
                 )
             case _:
                 return None

letta-nightly 0.6.45.dev20250328104141__py3-none-any.whl → 0.6.46.dev20250330050944__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.6.45.dev20250328104141py3-none-any.whl → 0.6.46.dev20250330050944py3-none-any.whl