PyPI - letta-nightly - Versions diffs - 0.7.10.dev20250506104245__py3-none-any.whl → 0.7.11.dev20250507230415__py3-none-any.whl - Mend

letta-nightly 0.7.10.dev20250506104245py3-none-any.whl → 0.7.11.dev20250507230415py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

letta/__init__.py +1 -1
letta/agent.py +8 -4
letta/agents/letta_agent.py +3 -5
letta/agents/letta_agent_batch.py +2 -4
letta/client/client.py +2 -2
letta/functions/async_composio_toolset.py +106 -0
letta/functions/composio_helpers.py +20 -24
letta/llm_api/anthropic.py +16 -5
letta/llm_api/anthropic_client.py +10 -8
letta/llm_api/google_ai_client.py +12 -10
letta/llm_api/google_vertex_client.py +107 -27
letta/llm_api/llm_api_tools.py +9 -3
letta/llm_api/llm_client.py +9 -11
letta/llm_api/llm_client_base.py +6 -5
letta/llm_api/openai_client.py +6 -6
letta/local_llm/constants.py +1 -0
letta/memory.py +8 -5
letta/orm/provider.py +1 -0
letta/schemas/enums.py +5 -0
letta/schemas/llm_config.py +2 -0
letta/schemas/message.py +3 -3
letta/schemas/providers.py +33 -1
letta/server/rest_api/routers/v1/agents.py +10 -5
letta/server/rest_api/routers/v1/llms.py +16 -6
letta/server/rest_api/routers/v1/providers.py +3 -1
letta/server/rest_api/routers/v1/sources.py +1 -0
letta/server/server.py +58 -24
letta/services/provider_manager.py +11 -8
letta/settings.py +2 -0
{letta_nightly-0.7.10.dev20250506104245.dist-info → letta_nightly-0.7.11.dev20250507230415.dist-info}/METADATA +1 -1
{letta_nightly-0.7.10.dev20250506104245.dist-info → letta_nightly-0.7.11.dev20250507230415.dist-info}/RECORD +34 -33
{letta_nightly-0.7.10.dev20250506104245.dist-info → letta_nightly-0.7.11.dev20250507230415.dist-info}/LICENSE +0 -0
{letta_nightly-0.7.10.dev20250506104245.dist-info → letta_nightly-0.7.11.dev20250507230415.dist-info}/WHEEL +0 -0
{letta_nightly-0.7.10.dev20250506104245.dist-info → letta_nightly-0.7.11.dev20250507230415.dist-info}/entry_points.txt +0 -0

letta/llm_api/google_vertex_client.py CHANGED Viewed

@@ -5,16 +5,19 @@ from google import genai
 from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, GenerateContentResponse, ThinkingConfig, ToolConfig
 from letta.helpers.datetime_helpers import get_utc_time_int
-from letta.helpers.json_helpers import json_dumps
+from letta.helpers.json_helpers import json_dumps, json_loads
 from letta.llm_api.google_ai_client import GoogleAIClient
 from letta.local_llm.json_parser import clean_json_string_extra_backslash
 from letta.local_llm.utils import count_tokens
+from letta.log import get_logger
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as PydanticMessage
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
-from letta.settings import model_settings
+from letta.settings import model_settings, settings
 from letta.utils import get_tool_call_id
+logger = get_logger(__name__)
 class GoogleVertexClient(GoogleAIClient):
@@ -35,6 +38,23 @@ class GoogleVertexClient(GoogleAIClient):
         )
         return response.model_dump()
+    async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
+        """
+        Performs underlying request to llm and returns raw response.
+        """
+        client = genai.Client(
+            vertexai=True,
+            project=model_settings.google_cloud_project,
+            location=model_settings.google_cloud_location,
+            http_options={"api_version": "v1"},
+        )
+        response = await client.aio.models.generate_content(
+            model=llm_config.model,
+            contents=request_data["contents"],
+            config=request_data["config"],
+        )
+        return response.model_dump()
     def build_request_data(
         self,
         messages: List[PydanticMessage],
@@ -49,16 +69,21 @@ class GoogleVertexClient(GoogleAIClient):
         request_data["config"] = request_data.pop("generation_config")
         request_data["config"]["tools"] = request_data.pop("tools")
-        tool_names = [t["name"] for t in tools]
-        tool_config = ToolConfig(
-            function_calling_config=FunctionCallingConfig(
-                # ANY mode forces the model to predict only function calls
-                mode=FunctionCallingConfigMode.ANY,
-                # Provide the list of tools (though empty should also work, it seems not to)
-                allowed_function_names=tool_names,
+        tool_names = [t["name"] for t in tools] if tools else []
+        if len(tool_names) == 1 and settings.use_vertex_structured_outputs_experimental:
+            request_data["config"]["response_mime_type"] = "application/json"
+            request_data["config"]["response_schema"] = self.get_function_call_response_schema(tools[0])
+            del request_data["config"]["tools"]
+        else:
+            tool_config = ToolConfig(
+                function_calling_config=FunctionCallingConfig(
+                    # ANY mode forces the model to predict only function calls
+                    mode=FunctionCallingConfigMode.ANY,
+                    # Provide the list of tools (though empty should also work, it seems not to)
+                    allowed_function_names=tool_names,
+                )
             )
-        )
-        request_data["config"]["tool_config"] = tool_config.model_dump()
+            request_data["config"]["tool_config"] = tool_config.model_dump()
         # Add thinking_config
         # If enable_reasoner is False, set thinking_budget to 0
@@ -110,12 +135,16 @@ class GoogleVertexClient(GoogleAIClient):
             for candidate in response.candidates:
                 content = candidate.content
-                # if "role" not in content or not content["role"]:
-                #    # This means the response is malformed like MALFORMED_FUNCTION_CALL
-                #    # NOTE: must be a ValueError to trigger a retry
-                #    raise ValueError(f"Error in response data from LLM: {response_data}")
-                # role = content["role"]
-                # assert role == "model", f"Unknown role in response: {role}"
+                if content.role is None or content.parts is None:
+                    # This means the response is malformed like MALFORMED_FUNCTION_CALL
+                    # NOTE: must be a ValueError to trigger a retry
+                    if candidate.finish_reason == "MALFORMED_FUNCTION_CALL":
+                        raise ValueError(f"Error in response data from LLM: {candidate.finish_message[:350]}...")
+                    else:
+                        raise ValueError(f"Error in response data from LLM: {response_data}")
+                role = content.role
+                assert role == "model", f"Unknown role in response: {role}"
                 parts = content.parts
@@ -142,10 +171,12 @@ class GoogleVertexClient(GoogleAIClient):
                         # NOTE: this also involves stripping the inner monologue out of the function
                         if llm_config.put_inner_thoughts_in_kwargs:
-                            from letta.local_llm.constants import INNER_THOUGHTS_KWARG
+                            from letta.local_llm.constants import INNER_THOUGHTS_KWARG_VERTEX
-                            assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}"
-                            inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG)
+                            assert (
+                                INNER_THOUGHTS_KWARG_VERTEX in function_args
+                            ), f"Couldn't find inner thoughts in function args:\n{function_call}"
+                            inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG_VERTEX)
                             assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
                         else:
                             inner_thoughts = None
@@ -167,15 +198,50 @@ class GoogleVertexClient(GoogleAIClient):
                         )
                     else:
+                        try:
+                            # Structured output tool call
+                            function_call = json_loads(response_message.text)
+                            function_name = function_call["name"]
+                            function_args = function_call["args"]
+                            assert isinstance(function_args, dict), function_args
-                        # Inner thoughts are the content by default
-                        inner_thoughts = response_message.text
+                            # NOTE: this also involves stripping the inner monologue out of the function
+                            if llm_config.put_inner_thoughts_in_kwargs:
+                                from letta.local_llm.constants import INNER_THOUGHTS_KWARG
-                        # Google AI API doesn't generate tool call IDs
-                        openai_response_message = Message(
-                            role="assistant",  # NOTE: "model" -> "assistant"
-                            content=inner_thoughts,
-                        )
+                                assert (
+                                    INNER_THOUGHTS_KWARG in function_args
+                                ), f"Couldn't find inner thoughts in function args:\n{function_call}"
+                                inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG)
+                                assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
+                            else:
+                                inner_thoughts = None
+                            # Google AI API doesn't generate tool call IDs
+                            openai_response_message = Message(
+                                role="assistant",  # NOTE: "model" -> "assistant"
+                                content=inner_thoughts,
+                                tool_calls=[
+                                    ToolCall(
+                                        id=get_tool_call_id(),
+                                        type="function",
+                                        function=FunctionCall(
+                                            name=function_name,
+                                            arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
+                                        ),
+                                    )
+                                ],
+                            )
+                        except:
+                            # Inner thoughts are the content by default
+                            inner_thoughts = response_message.text
+                            # Google AI API doesn't generate tool call IDs
+                            openai_response_message = Message(
+                                role="assistant",  # NOTE: "model" -> "assistant"
+                                content=inner_thoughts,
+                            )
                     # Google AI API uses different finish reason strings than OpenAI
                     # OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
@@ -244,3 +310,17 @@ class GoogleVertexClient(GoogleAIClient):
             )
         except KeyError as e:
             raise e
+    def get_function_call_response_schema(self, tool: dict) -> dict:
+        return {
+            "type": "OBJECT",
+            "properties": {
+                "name": {"type": "STRING", "enum": [tool["name"]]},
+                "args": {
+                    "type": "OBJECT",
+                    "properties": tool["parameters"]["properties"],
+                    "required": tool["parameters"]["required"],
+                },
+            },
+            "required": ["name", "args"],
+        }

letta/llm_api/llm_api_tools.py CHANGED Viewed

@@ -24,7 +24,7 @@ from letta.llm_api.openai import (
 from letta.local_llm.chat_completion_proxy import get_chat_completion
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
 from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
-from letta.schemas.enums import ProviderType
+from letta.schemas.enums import ProviderCategory
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message
 from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, cast_message_to_subtype
@@ -172,10 +172,12 @@ def create(
         if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
             # only is a problem if we are *not* using an openai proxy
             raise LettaConfigurationError(message="OpenAI key is missing from letta config file", missing_fields=["openai_api_key"])
-        elif llm_config.provider_name and llm_config.provider_name != ProviderType.openai.value:
+        elif llm_config.provider_category == ProviderCategory.byok:
             from letta.services.provider_manager import ProviderManager
+            from letta.services.user_manager import UserManager
-            api_key = ProviderManager().get_override_key(llm_config.provider_name)
+            actor = UserManager().get_user_or_default(user_id=user_id)
+            api_key = ProviderManager().get_override_key(llm_config.provider_name, actor=actor)
         elif model_settings.openai_api_key is None:
             # the openai python client requires a dummy API key
             api_key = "DUMMY_API_KEY"
@@ -379,7 +381,9 @@ def create(
                 extended_thinking=llm_config.enable_reasoner,
                 max_reasoning_tokens=llm_config.max_reasoning_tokens,
                 provider_name=llm_config.provider_name,
+                provider_category=llm_config.provider_category,
                 name=name,
+                user_id=user_id,
             )
         else:
@@ -390,6 +394,8 @@ def create(
                 extended_thinking=llm_config.enable_reasoner,
                 max_reasoning_tokens=llm_config.max_reasoning_tokens,
                 provider_name=llm_config.provider_name,
+                provider_category=llm_config.provider_category,
+                user_id=user_id,
             )
         if llm_config.put_inner_thoughts_in_kwargs:

letta/llm_api/llm_client.py CHANGED Viewed

@@ -1,8 +1,11 @@
-from typing import Optional
+from typing import TYPE_CHECKING, Optional
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.schemas.enums import ProviderType
+if TYPE_CHECKING:
+    from letta.orm import User
 class LLMClient:
     """Factory class for creating LLM clients based on the model endpoint type."""
@@ -10,9 +13,8 @@ class LLMClient:
     @staticmethod
     def create(
         provider_type: ProviderType,
-        provider_name: Optional[str] = None,
         put_inner_thoughts_first: bool = True,
-        actor_id: Optional[str] = None,
+        actor: Optional["User"] = None,
     ) -> Optional[LLMClientBase]:
         """
         Create an LLM client based on the model endpoint type.
@@ -32,33 +34,29 @@ class LLMClient:
                 from letta.llm_api.google_ai_client import GoogleAIClient
                 return GoogleAIClient(
-                    provider_name=provider_name,
                     put_inner_thoughts_first=put_inner_thoughts_first,
-                    actor_id=actor_id,
+                    actor=actor,
                 )
             case ProviderType.google_vertex:
                 from letta.llm_api.google_vertex_client import GoogleVertexClient
                 return GoogleVertexClient(
-                    provider_name=provider_name,
                     put_inner_thoughts_first=put_inner_thoughts_first,
-                    actor_id=actor_id,
+                    actor=actor,
                 )
             case ProviderType.anthropic:
                 from letta.llm_api.anthropic_client import AnthropicClient
                 return AnthropicClient(
-                    provider_name=provider_name,
                     put_inner_thoughts_first=put_inner_thoughts_first,
-                    actor_id=actor_id,
+                    actor=actor,
                 )
             case ProviderType.openai:
                 from letta.llm_api.openai_client import OpenAIClient
                 return OpenAIClient(
-                    provider_name=provider_name,
                     put_inner_thoughts_first=put_inner_thoughts_first,
-                    actor_id=actor_id,
+                    actor=actor,
                 )
             case _:
                 return None

letta/llm_api/llm_client_base.py CHANGED Viewed

@@ -1,5 +1,5 @@
 from abc import abstractmethod
-from typing import Dict, List, Optional, Union
+from typing import TYPE_CHECKING, Dict, List, Optional, Union
 from anthropic.types.beta.messages import BetaMessageBatch
 from openai import AsyncStream, Stream
@@ -11,6 +11,9 @@ from letta.schemas.message import Message
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
 from letta.tracing import log_event
+if TYPE_CHECKING:
+    from letta.orm import User
 class LLMClientBase:
     """
@@ -20,13 +23,11 @@ class LLMClientBase:
     def __init__(
         self,
-        provider_name: Optional[str] = None,
         put_inner_thoughts_first: Optional[bool] = True,
         use_tool_naming: bool = True,
-        actor_id: Optional[str] = None,
+        actor: Optional["User"] = None,
     ):
-        self.actor_id = actor_id
-        self.provider_name = provider_name
+        self.actor = actor
         self.put_inner_thoughts_first = put_inner_thoughts_first
         self.use_tool_naming = use_tool_naming

letta/llm_api/openai_client.py CHANGED Viewed

@@ -22,7 +22,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_st
 from letta.llm_api.llm_client_base import LLMClientBase
 from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
 from letta.log import get_logger
-from letta.schemas.enums import ProviderType
+from letta.schemas.enums import ProviderCategory
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.message import Message as PydanticMessage
 from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
@@ -78,10 +78,10 @@ def supports_parallel_tool_calling(model: str) -> bool:
 class OpenAIClient(LLMClientBase):
     def _prepare_client_kwargs(self, llm_config: LLMConfig) -> dict:
         api_key = None
-        if llm_config.provider_name and llm_config.provider_name != ProviderType.openai.value:
+        if llm_config.provider_category == ProviderCategory.byok:
             from letta.services.provider_manager import ProviderManager
-            api_key = ProviderManager().get_override_key(llm_config.provider_name)
+            api_key = ProviderManager().get_override_key(llm_config.provider_name, actor=self.actor)
         if not api_key:
             api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
@@ -156,11 +156,11 @@ class OpenAIClient(LLMClientBase):
         )
         # always set user id for openai requests
-        if self.actor_id:
-            data.user = self.actor_id
+        if self.actor:
+            data.user = self.actor.id
         if llm_config.model_endpoint == LETTA_MODEL_ENDPOINT:
-            if not self.actor_id:
+            if not self.actor:
                 # override user id for inference.letta.com
                 import uuid

letta/local_llm/constants.py CHANGED Viewed

@@ -26,6 +26,7 @@ DEFAULT_WRAPPER = ChatMLInnerMonologueWrapper
 DEFAULT_WRAPPER_NAME = "chatml"
 INNER_THOUGHTS_KWARG = "inner_thoughts"
+INNER_THOUGHTS_KWARG_VERTEX = "thinking"
 INNER_THOUGHTS_KWARG_DESCRIPTION = "Deep inner monologue private to you only."
 INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST = f"Deep inner monologue private to you only. Think before you act, so always generate arg '{INNER_THOUGHTS_KWARG}' first before any other arg."
 INNER_THOUGHTS_CLI_SYMBOL = "💭"

letta/memory.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Callable, Dict, List
+from typing import TYPE_CHECKING, Callable, Dict, List
 from letta.constants import MESSAGE_SUMMARY_REQUEST_ACK
 from letta.llm_api.llm_api_tools import create
@@ -13,6 +13,9 @@ from letta.settings import summarizer_settings
 from letta.tracing import trace_method
 from letta.utils import count_tokens, printd
+if TYPE_CHECKING:
+    from letta.orm import User
 def get_memory_functions(cls: Memory) -> Dict[str, Callable]:
     """Get memory functions for a memory class"""
@@ -51,6 +54,7 @@ def _format_summary_history(message_history: List[Message]):
 def summarize_messages(
     agent_state: AgentState,
     message_sequence_to_summarize: List[Message],
+    actor: "User",
 ):
     """Summarize a message sequence using GPT"""
     # we need the context_window
@@ -63,7 +67,7 @@ def summarize_messages(
         trunc_ratio = (summarizer_settings.memory_warning_threshold * context_window / summary_input_tkns) * 0.8  # For good measure...
         cutoff = int(len(message_sequence_to_summarize) * trunc_ratio)
         summary_input = str(
-            [summarize_messages(agent_state, message_sequence_to_summarize=message_sequence_to_summarize[:cutoff])]
+            [summarize_messages(agent_state, message_sequence_to_summarize=message_sequence_to_summarize[:cutoff], actor=actor)]
             + message_sequence_to_summarize[cutoff:]
         )
@@ -79,10 +83,9 @@ def summarize_messages(
     llm_config_no_inner_thoughts.put_inner_thoughts_in_kwargs = False
     llm_client = LLMClient.create(
-        provider_name=llm_config_no_inner_thoughts.provider_name,
-        provider_type=llm_config_no_inner_thoughts.model_endpoint_type,
+        provider_type=agent_state.llm_config.model_endpoint_type,
         put_inner_thoughts_first=False,
-        actor_id=agent_state.created_by_id,
+        actor=actor,
     )
     # try to use new client, otherwise fallback to old flow
     # TODO: we can just directly call the LLM here?

letta/orm/provider.py CHANGED Viewed

@@ -26,6 +26,7 @@ class Provider(SqlalchemyBase, OrganizationMixin):
     name: Mapped[str] = mapped_column(nullable=False, doc="The name of the provider")
     provider_type: Mapped[str] = mapped_column(nullable=True, doc="The type of the provider")
+    provider_category: Mapped[str] = mapped_column(nullable=True, doc="The category of the provider (base or byok)")
     api_key: Mapped[str] = mapped_column(nullable=True, doc="API key used for requests to the provider.")
     base_url: Mapped[str] = mapped_column(nullable=True, doc="Base URL for the provider.")

letta/schemas/enums.py CHANGED Viewed

@@ -19,6 +19,11 @@ class ProviderType(str, Enum):
     bedrock = "bedrock"
+class ProviderCategory(str, Enum):
+    base = "base"
+    byok = "byok"
 class MessageRole(str, Enum):
     assistant = "assistant"
     user = "user"

letta/schemas/llm_config.py CHANGED Viewed

@@ -4,6 +4,7 @@ from pydantic import BaseModel, ConfigDict, Field, model_validator
 from letta.constants import LETTA_MODEL_ENDPOINT
 from letta.log import get_logger
+from letta.schemas.enums import ProviderCategory
 logger = get_logger(__name__)
@@ -51,6 +52,7 @@ class LLMConfig(BaseModel):
     ] = Field(..., description="The endpoint type for the model.")
     model_endpoint: Optional[str] = Field(None, description="The endpoint for the model.")
     provider_name: Optional[str] = Field(None, description="The provider name for the model.")
+    provider_category: Optional[ProviderCategory] = Field(None, description="The provider category for the model.")
     model_wrapper: Optional[str] = Field(None, description="The wrapper for the model.")
     context_window: int = Field(..., description="The context window size for the model.")
     put_inner_thoughts_in_kwargs: Optional[bool] = Field(

letta/schemas/message.py CHANGED Viewed

@@ -16,7 +16,7 @@ from pydantic import BaseModel, Field, field_validator
 from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, TOOL_CALL_ID_MAX_LEN
 from letta.helpers.datetime_helpers import get_utc_time, is_utc_datetime
 from letta.helpers.json_helpers import json_dumps
-from letta.local_llm.constants import INNER_THOUGHTS_KWARG
+from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_VERTEX
 from letta.schemas.enums import MessageRole
 from letta.schemas.letta_base import OrmMetadataBase
 from letta.schemas.letta_message import (
@@ -914,9 +914,9 @@ class Message(BaseMessage):
                         function_args = {"args": function_args}
                     if put_inner_thoughts_in_kwargs and text_content is not None:
-                        assert "inner_thoughts" not in function_args, function_args
+                        assert INNER_THOUGHTS_KWARG not in function_args, function_args
                         assert len(self.tool_calls) == 1
-                        function_args[INNER_THOUGHTS_KWARG] = text_content
+                        function_args[INNER_THOUGHTS_KWARG_VERTEX] = text_content
                     parts.append(
                         {

letta-nightly 0.7.10.dev20250506104245__py3-none-any.whl → 0.7.11.dev20250507230415__py3-none-any.whl

letta-nightly 0.7.10.dev20250506104245py3-none-any.whl → 0.7.11.dev20250507230415py3-none-any.whl