PyPI - letta-nightly - Versions diffs - 0.11.3.dev20250820104219__py3-none-any.whl → 0.11.4.dev20250820213507__py3-none-any.whl - Mend

letta-nightly 0.11.3.dev20250820104219py3-none-any.whl → 0.11.4.dev20250820213507py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

letta/__init__.py +1 -1
letta/agents/helpers.py +4 -0
letta/agents/letta_agent.py +142 -5
letta/constants.py +10 -7
letta/data_sources/connectors.py +70 -53
letta/embeddings.py +3 -240
letta/errors.py +28 -0
letta/functions/function_sets/base.py +4 -4
letta/functions/functions.py +287 -32
letta/functions/mcp_client/types.py +11 -0
letta/functions/schema_validator.py +187 -0
letta/functions/typescript_parser.py +196 -0
letta/helpers/datetime_helpers.py +8 -4
letta/helpers/tool_execution_helper.py +25 -2
letta/llm_api/anthropic_client.py +23 -18
letta/llm_api/azure_client.py +73 -0
letta/llm_api/bedrock_client.py +8 -4
letta/llm_api/google_vertex_client.py +14 -5
letta/llm_api/llm_api_tools.py +2 -217
letta/llm_api/llm_client.py +15 -1
letta/llm_api/llm_client_base.py +32 -1
letta/llm_api/openai.py +1 -0
letta/llm_api/openai_client.py +18 -28
letta/llm_api/together_client.py +55 -0
letta/orm/provider.py +1 -0
letta/orm/step_metrics.py +40 -1
letta/otel/db_pool_monitoring.py +1 -1
letta/schemas/agent.py +3 -4
letta/schemas/agent_file.py +2 -0
letta/schemas/block.py +11 -5
letta/schemas/embedding_config.py +4 -5
letta/schemas/enums.py +1 -1
letta/schemas/job.py +2 -3
letta/schemas/llm_config.py +79 -7
letta/schemas/mcp.py +0 -24
letta/schemas/message.py +0 -108
letta/schemas/openai/chat_completion_request.py +1 -0
letta/schemas/providers/__init__.py +0 -2
letta/schemas/providers/anthropic.py +106 -8
letta/schemas/providers/azure.py +102 -8
letta/schemas/providers/base.py +10 -3
letta/schemas/providers/bedrock.py +28 -16
letta/schemas/providers/letta.py +3 -3
letta/schemas/providers/ollama.py +2 -12
letta/schemas/providers/openai.py +4 -4
letta/schemas/providers/together.py +14 -2
letta/schemas/sandbox_config.py +2 -1
letta/schemas/tool.py +46 -22
letta/server/rest_api/routers/v1/agents.py +179 -38
letta/server/rest_api/routers/v1/folders.py +13 -8
letta/server/rest_api/routers/v1/providers.py +10 -3
letta/server/rest_api/routers/v1/sources.py +14 -8
letta/server/rest_api/routers/v1/steps.py +17 -1
letta/server/rest_api/routers/v1/tools.py +96 -5
letta/server/rest_api/streaming_response.py +91 -45
letta/server/server.py +27 -38
letta/services/agent_manager.py +92 -20
letta/services/agent_serialization_manager.py +11 -7
letta/services/context_window_calculator/context_window_calculator.py +40 -2
letta/services/helpers/agent_manager_helper.py +73 -12
letta/services/mcp_manager.py +109 -15
letta/services/passage_manager.py +28 -109
letta/services/provider_manager.py +24 -0
letta/services/step_manager.py +68 -0
letta/services/summarizer/summarizer.py +1 -4
letta/services/tool_executor/core_tool_executor.py +1 -1
letta/services/tool_executor/sandbox_tool_executor.py +26 -9
letta/services/tool_manager.py +82 -5
letta/services/tool_sandbox/base.py +3 -11
letta/services/tool_sandbox/modal_constants.py +17 -0
letta/services/tool_sandbox/modal_deployment_manager.py +242 -0
letta/services/tool_sandbox/modal_sandbox.py +218 -3
letta/services/tool_sandbox/modal_sandbox_v2.py +429 -0
letta/services/tool_sandbox/modal_version_manager.py +273 -0
letta/services/tool_sandbox/safe_pickle.py +193 -0
letta/settings.py +5 -3
letta/templates/sandbox_code_file.py.j2 +2 -4
letta/templates/sandbox_code_file_async.py.j2 +2 -4
letta/utils.py +1 -1
{letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/METADATA +2 -2
{letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/RECORD +84 -81
letta/llm_api/anthropic.py +0 -1206
letta/llm_api/aws_bedrock.py +0 -104
letta/llm_api/azure_openai.py +0 -118
letta/llm_api/azure_openai_constants.py +0 -11
letta/llm_api/cohere.py +0 -391
letta/schemas/providers/cohere.py +0 -18
{letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/LICENSE +0 -0
{letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/WHEEL +0 -0
{letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/entry_points.txt +0 -0

letta/schemas/agent.py CHANGED Viewed

@@ -2,7 +2,7 @@ from datetime import datetime
 from enum import Enum
 from typing import Dict, List, Optional
-from pydantic import BaseModel, Field, field_validator, model_validator
+from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
 from letta.constants import CORE_MEMORY_LINE_NUMBER_WARNING, DEFAULT_EMBEDDING_CHUNK_SIZE
 from letta.schemas.block import CreateBlock
@@ -211,7 +211,7 @@ class CreateAgent(BaseModel, validate_assignment=True):  #
     max_reasoning_tokens: Optional[int] = Field(
         None, description="The maximum number of tokens to generate for reasoning step. If not set, the model will use its default value."
     )
-    enable_reasoner: Optional[bool] = Field(False, description="Whether to enable internal extended thinking step for a reasoner model.")
+    enable_reasoner: Optional[bool] = Field(True, description="Whether to enable internal extended thinking step for a reasoner model.")
     reasoning: Optional[bool] = Field(None, description="Whether to enable reasoning for this agent.")
     from_template: Optional[str] = Field(None, description="The template id used to configure the agent")
     template: bool = Field(False, description="Whether the agent is a template")
@@ -355,8 +355,7 @@ class UpdateAgent(BaseModel):
         description="If set to True, the agent will be hidden.",
     )
-    class Config:
-        extra = "ignore"  # Ignores extra fields
+    model_config = ConfigDict(extra="ignore")  # Ignores extra fields
 class AgentStepResponse(BaseModel):

letta/schemas/agent_file.py CHANGED Viewed

@@ -24,12 +24,14 @@ class ImportResult:
         success: bool,
         message: str = "",
         imported_count: int = 0,
+        imported_agent_ids: Optional[List[str]] = None,
         errors: Optional[List[str]] = None,
         id_mappings: Optional[Dict[str, str]] = None,
     ):
         self.success = success
         self.message = message
         self.imported_count = imported_count
+        self.imported_agent_ids = imported_agent_ids or []
         self.errors = errors or []
         self.id_mappings = id_mappings or {}

letta/schemas/block.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from datetime import datetime
 from typing import Optional
-from pydantic import Field, model_validator
+from pydantic import ConfigDict, Field, model_validator
 from typing_extensions import Self
 from letta.constants import CORE_MEMORY_BLOCK_CHAR_LIMIT, DEFAULT_HUMAN_BLOCK_DESCRIPTION, DEFAULT_PERSONA_BLOCK_DESCRIPTION
@@ -38,8 +38,7 @@ class BaseBlock(LettaBase, validate_assignment=True):
     # def __len__(self):
     #     return len(self.value)
-    class Config:
-        extra = "ignore"  # Ignores extra fields
+    model_config = ConfigDict(extra="ignore")  # Ignores extra fields
     @model_validator(mode="after")
     def verify_char_limit(self) -> Self:
@@ -115,8 +114,7 @@ class BlockUpdate(BaseBlock):
     value: Optional[str] = Field(None, description="Value of the block.")
     project_id: Optional[str] = Field(None, description="The associated project id.")
-    class Config:
-        extra = "ignore"  # Ignores extra fields
+    model_config = ConfigDict(extra="ignore")  # Ignores extra fields
 class CreateBlock(BaseBlock):
@@ -131,6 +129,14 @@ class CreateBlock(BaseBlock):
     is_template: bool = False
     template_name: Optional[str] = Field(None, description="Name of the block if it is a template.", alias="name")
+    @model_validator(mode="before")
+    @classmethod
+    def ensure_value_is_string(cls, data):
+        """Convert None value to empty string"""
+        if data and isinstance(data, dict) and data.get("value") is None:
+            data["value"] = ""
+        return data
 class CreateHuman(CreateBlock):
     """Create a human block"""

letta/schemas/embedding_config.py CHANGED Viewed

@@ -12,7 +12,6 @@ class EmbeddingConfig(BaseModel):
         "openai",
         "anthropic",
         "bedrock",
-        "cohere",
         "google_ai",
         "google_vertex",
         "azure",
@@ -63,11 +62,11 @@ class EmbeddingConfig(BaseModel):
             )
         elif model_name == "letta":
             return cls(
-                embedding_endpoint="https://bun-function-production-e310.up.railway.app/v1",
-                embedding_model="BAAI/bge-large-en-v1.5",
-                embedding_dim=1024,
+                embedding_endpoint="https://embeddings.letta.com/",
+                embedding_model="letta-free",
+                embedding_dim=1536,
                 embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
-                embedding_endpoint_type="hugging-face",
+                embedding_endpoint_type="openai",
             )
         elif provider == "pinecone":
             # default config for pinecone with empty endpoint

letta/schemas/enums.py CHANGED Viewed

@@ -18,7 +18,6 @@ class ProviderType(str, Enum):
     azure = "azure"
     vllm = "vllm"
     bedrock = "bedrock"
-    cohere = "cohere"
 class ProviderCategory(str, Enum):
@@ -155,6 +154,7 @@ class DuplicateFileHandling(str, Enum):
     SKIP = "skip"  # skip files with duplicate names
     ERROR = "error"  # error when duplicate names are encountered
     SUFFIX = "suffix"  # add numeric suffix to make names unique (default behavior)
+    REPLACE = "replace"  # replace the file with the duplicate name
 class SandboxType(str, Enum):

letta/schemas/job.py CHANGED Viewed

@@ -1,7 +1,7 @@
 from datetime import datetime
 from typing import List, Optional
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
 from letta.schemas.enums import JobStatus, JobType
@@ -81,8 +81,7 @@ class BatchJob(JobBase):
 class JobUpdate(JobBase):
     status: Optional[JobStatus] = Field(None, description="The status of the job.")
-    class Config:
-        extra = "ignore"  # Ignores extra fields
+    model_config = ConfigDict(extra="ignore")  # Ignores extra fields
 class LettaRequestConfig(BaseModel):

letta/schemas/llm_config.py CHANGED Viewed

@@ -16,7 +16,6 @@ class LLMConfig(BaseModel):
     model_endpoint_type: Literal[
         "openai",
         "anthropic",
-        "cohere",
         "google_ai",
         "google_vertex",
         "azure",
@@ -56,7 +55,7 @@ class LLMConfig(BaseModel):
         description="The maximum number of tokens to generate. If not set, the model will use its default value.",
     )
     enable_reasoner: bool = Field(
-        False, description="Whether or not the model should use extended thinking if it is a 'reasoning' style model"
+        True, description="Whether or not the model should use extended thinking if it is a 'reasoning' style model"
     )
     reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]] = Field(
         None,
@@ -71,10 +70,50 @@ class LLMConfig(BaseModel):
         description="Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. From OpenAI: Number between -2.0 and 2.0.",
     )
     compatibility_type: Optional[Literal["gguf", "mlx"]] = Field(None, description="The framework compatibility type for the model.")
+    verbosity: Optional[Literal["low", "medium", "high"]] = Field(
+        "medium",
+        description="Soft control for how verbose model output should be, used for GPT-5 models.",
+    )
     # FIXME hack to silence pydantic protected namespace warning
     model_config = ConfigDict(protected_namespaces=())
+    @model_validator(mode="before")
+    @classmethod
+    def set_model_specific_defaults(cls, values):
+        """
+        Set model-specific default values for fields like max_tokens, context_window, etc.
+        This ensures the same defaults from default_config are applied automatically.
+        """
+        model = values.get("model")
+        if model is None:
+            return values
+        # Set max_tokens defaults based on model
+        if values.get("max_tokens") is None:
+            if model == "gpt-5":
+                values["max_tokens"] = 16384
+            elif model == "gpt-4.1":
+                values["max_tokens"] = 8192
+            # For other models, the field default of 4096 will be used
+        # Set context_window defaults if not provided
+        if values.get("context_window") is None:
+            if model == "gpt-5":
+                values["context_window"] = 128000
+            elif model == "gpt-4.1":
+                values["context_window"] = 256000
+            elif model == "gpt-4o" or model == "gpt-4o-mini":
+                values["context_window"] = 128000
+            elif model == "gpt-4":
+                values["context_window"] = 8192
+        # Set verbosity defaults for GPT-5 models
+        if model == "gpt-5" and values.get("verbosity") is None:
+            values["verbosity"] = "medium"
+        return values
     @model_validator(mode="before")
     @classmethod
     def set_default_enable_reasoner(cls, values):
@@ -159,6 +198,16 @@ class LLMConfig(BaseModel):
                 context_window=256000,
                 max_tokens=8192,
             )
+        elif model_name == "gpt-5":
+            return cls(
+                model="gpt-5",
+                model_endpoint_type="openai",
+                model_endpoint="https://api.openai.com/v1",
+                model_wrapper=None,
+                context_window=128000,
+                verbosity="medium",
+                max_tokens=16384,
+            )
         elif model_name == "letta":
             return cls(
                 model="memgpt-openai",
@@ -196,13 +245,36 @@ class LLMConfig(BaseModel):
             config.model.startswith("gemini-2.5-flash") or config.model.startswith("gemini-2.5-pro")
         )
+    @classmethod
+    def is_google_ai_reasoning_model(cls, config: "LLMConfig") -> bool:
+        return config.model_endpoint_type == "google_ai" and (
+            config.model.startswith("gemini-2.5-flash") or config.model.startswith("gemini-2.5-pro")
+        )
+    @classmethod
+    def supports_verbosity(cls, config: "LLMConfig") -> bool:
+        """Check if the model supports verbosity control."""
+        return config.model_endpoint_type == "openai" and config.model.startswith("gpt-5")
     @classmethod
     def apply_reasoning_setting_to_config(cls, config: "LLMConfig", reasoning: bool):
         if not reasoning:
-            if cls.is_openai_reasoning_model(config) or config.model.startswith("gemini-2.5-pro"):
-                raise ValueError("Reasoning cannot be disabled for OpenAI o1/o3 models")
-            config.put_inner_thoughts_in_kwargs = False
-            config.enable_reasoner = False
+            if cls.is_openai_reasoning_model(config):
+                logger.warning("Reasoning cannot be disabled for OpenAI o1/o3 models")
+                config.put_inner_thoughts_in_kwargs = False
+                config.enable_reasoner = True
+                if config.reasoning_effort is None:
+                    config.reasoning_effort = "medium"
+            elif config.model.startswith("gemini-2.5-pro"):
+                logger.warning("Reasoning cannot be disabled for Gemini 2.5 Pro model")
+                # Handle as non-reasoner until we support summary
+                config.put_inner_thoughts_in_kwargs = True
+                config.enable_reasoner = True
+                if config.max_reasoning_tokens == 0:
+                    config.max_reasoning_tokens = 1024
+            else:
+                config.put_inner_thoughts_in_kwargs = False
+                config.enable_reasoner = False
         else:
             config.enable_reasoner = True
@@ -210,7 +282,7 @@ class LLMConfig(BaseModel):
                 config.put_inner_thoughts_in_kwargs = False
                 if config.max_reasoning_tokens == 0:
                     config.max_reasoning_tokens = 1024
-            elif cls.is_google_vertex_reasoning_model(config):
+            elif cls.is_google_vertex_reasoning_model(config) or cls.is_google_ai_reasoning_model(config):
                 # Handle as non-reasoner until we support summary
                 config.put_inner_thoughts_in_kwargs = True
                 if config.max_reasoning_tokens == 0:

letta/schemas/mcp.py CHANGED Viewed

@@ -81,29 +81,6 @@ class MCPServer(BaseMCPServer):
             raise ValueError(f"Unsupported server type: {self.server_type}")
-class RegisterSSEMCPServer(LettaBase):
-    server_name: str = Field(..., description="The name of the server")
-    server_type: MCPServerType = MCPServerType.SSE
-    server_url: str = Field(..., description="The URL of the server (MCP SSE client will connect to this URL)")
-    token: Optional[str] = Field(None, description="The access token or API key for the MCP server used for authentication")
-    custom_headers: Optional[Dict[str, str]] = Field(None, description="Custom authentication headers as key-value pairs")
-class RegisterStdioMCPServer(LettaBase):
-    server_name: str = Field(..., description="The name of the server")
-    server_type: MCPServerType = MCPServerType.STDIO
-    stdio_config: StdioServerConfig = Field(..., description="The configuration for the server (MCP 'local' client will run this command)")
-class RegisterStreamableHTTPMCPServer(LettaBase):
-    server_name: str = Field(..., description="The name of the server")
-    server_type: MCPServerType = MCPServerType.STREAMABLE_HTTP
-    server_url: str = Field(..., description="The URL path for the streamable HTTP server (e.g., 'example/mcp')")
-    auth_header: Optional[str] = Field(None, description="The name of the authentication header (e.g., 'Authorization')")
-    auth_token: Optional[str] = Field(None, description="The authentication token or API key value")
-    custom_headers: Optional[Dict[str, str]] = Field(None, description="Custom authentication headers as key-value pairs")
 class UpdateSSEMCPServer(LettaBase):
     """Update an SSE MCP server"""
@@ -133,7 +110,6 @@ class UpdateStreamableHTTPMCPServer(LettaBase):
 UpdateMCPServer = Union[UpdateSSEMCPServer, UpdateStdioMCPServer, UpdateStreamableHTTPMCPServer]
-RegisterMCPServer = Union[RegisterSSEMCPServer, RegisterStdioMCPServer, RegisterStreamableHTTPMCPServer]
 # OAuth-related schemas

letta/schemas/message.py CHANGED Viewed

@@ -1051,114 +1051,6 @@ class Message(BaseMessage):
         return google_ai_message
-    def to_cohere_dict(
-        self,
-        function_call_role: Optional[str] = "SYSTEM",
-        function_call_prefix: Optional[str] = "[CHATBOT called function]",
-        function_response_role: Optional[str] = "SYSTEM",
-        function_response_prefix: Optional[str] = "[CHATBOT function returned]",
-        inner_thoughts_as_kwarg: Optional[bool] = False,
-    ) -> List[dict]:
-        """
-        Cohere chat_history dicts only have 'role' and 'message' fields
-        """
-        # NOTE: returns a list of dicts so that we can convert:
-        #  assistant [cot]: "I'll send a message"
-        #  assistant [func]: send_message("hi")
-        #  tool: {'status': 'OK'}
-        # to:
-        #  CHATBOT.text: "I'll send a message"
-        #  SYSTEM.text: [CHATBOT called function] send_message("hi")
-        #  SYSTEM.text: [CHATBOT function returned] {'status': 'OK'}
-        # TODO: update this prompt style once guidance from Cohere on
-        # embedded function calls in multi-turn conversation become more clear
-        if self.content and len(self.content) == 1 and isinstance(self.content[0], TextContent):
-            text_content = self.content[0].text
-        elif self.content and len(self.content) == 1 and isinstance(self.content[0], ToolReturnContent):
-            text_content = self.content[0].content
-        elif self.content and len(self.content) == 1 and isinstance(self.content[0], ImageContent):
-            text_content = "[Image Here]"
-        else:
-            text_content = None
-        if self.role == "system":
-            """
-            The chat_history parameter should not be used for SYSTEM messages in most cases.
-            Instead, to add a SYSTEM role message at the beginning of a conversation, the preamble parameter should be used.
-            """
-            raise UserWarning(f"role 'system' messages should go in 'preamble' field for Cohere API")
-        elif self.role == "user":
-            assert all([v is not None for v in [text_content, self.role]]), vars(self)
-            cohere_message = [
-                {
-                    "role": "USER",
-                    "message": text_content,
-                }
-            ]
-        elif self.role == "assistant":
-            # NOTE: we may break this into two message - an inner thought and a function call
-            # Optionally, we could just make this a function call with the inner thought inside
-            assert self.tool_calls is not None or text_content is not None
-            if text_content and self.tool_calls:
-                if inner_thoughts_as_kwarg:
-                    raise NotImplementedError
-                cohere_message = [
-                    {
-                        "role": "CHATBOT",
-                        "message": text_content,
-                    },
-                ]
-                for tc in self.tool_calls:
-                    function_name = tc.function["name"]
-                    function_args = parse_json(tc.function["arguments"])
-                    function_args_str = ",".join([f"{k}={v}" for k, v in function_args.items()])
-                    function_call_text = f"{function_name}({function_args_str})"
-                    cohere_message.append(
-                        {
-                            "role": function_call_role,
-                            "message": f"{function_call_prefix} {function_call_text}",
-                        }
-                    )
-            elif not text_content and self.tool_calls:
-                cohere_message = []
-                for tc in self.tool_calls:
-                    # TODO better way to pack?
-                    function_call_text = json_dumps(tc.to_dict())
-                    cohere_message.append(
-                        {
-                            "role": function_call_role,
-                            "message": f"{function_call_prefix} {function_call_text}",
-                        }
-                    )
-            elif text_content and not self.tool_calls:
-                cohere_message = [
-                    {
-                        "role": "CHATBOT",
-                        "message": text_content,
-                    }
-                ]
-            else:
-                raise ValueError("Message does not have content nor tool_calls")
-        elif self.role == "tool":
-            assert all([v is not None for v in [self.role, self.tool_call_id]]), vars(self)
-            function_response_text = text_content
-            cohere_message = [
-                {
-                    "role": function_response_role,
-                    "message": f"{function_response_prefix} {function_response_text}",
-                }
-            ]
-        else:
-            raise ValueError(self.role)
-        return cohere_message
     @staticmethod
     def generate_otid_from_id(message_id: str, index: int) -> str:
         """

letta/schemas/openai/chat_completion_request.py CHANGED Viewed

@@ -135,6 +135,7 @@ class ChatCompletionRequest(BaseModel):
     user: Optional[str] = None  # unique ID of the end-user (for monitoring)
     parallel_tool_calls: Optional[bool] = None
     instructions: Optional[str] = None
+    verbosity: Optional[Literal["low", "medium", "high"]] = None  # For verbosity control in GPT-5 models
     # function-calling related
     tools: Optional[List[Tool]] = None

letta/schemas/providers/__init__.py CHANGED Viewed

@@ -5,7 +5,6 @@ from .azure import AzureProvider
 from .base import Provider, ProviderBase, ProviderCheck, ProviderCreate, ProviderUpdate
 from .bedrock import BedrockProvider
 from .cerebras import CerebrasProvider
-from .cohere import CohereProvider
 from .deepseek import DeepSeekProvider
 from .google_gemini import GoogleAIProvider
 from .google_vertex import GoogleVertexProvider
@@ -31,7 +30,6 @@ __all__ = [
     "AzureProvider",
     "BedrockProvider",
     "CerebrasProvider",  # NEW
-    "CohereProvider",
     "DeepSeekProvider",
     "GoogleAIProvider",
     "GoogleVertexProvider",

letta/schemas/providers/anthropic.py CHANGED Viewed

@@ -1,12 +1,90 @@
 import warnings
 from typing import Literal
+import anthropic
 from pydantic import Field
 from letta.schemas.enums import ProviderCategory, ProviderType
 from letta.schemas.llm_config import LLMConfig
 from letta.schemas.providers.base import Provider
+# https://docs.anthropic.com/claude/docs/models-overview
+# Sadly hardcoded
+MODEL_LIST = [
+    ## Opus 4.1
+    {
+        "name": "claude-opus-4-1-20250805",
+        "context_window": 200000,
+    },
+    ## Opus 3
+    {
+        "name": "claude-3-opus-20240229",
+        "context_window": 200000,
+    },
+    # 3 latest
+    {
+        "name": "claude-3-opus-latest",
+        "context_window": 200000,
+    },
+    # 4
+    {
+        "name": "claude-opus-4-20250514",
+        "context_window": 200000,
+    },
+    ## Sonnet
+    # 3.0
+    {
+        "name": "claude-3-sonnet-20240229",
+        "context_window": 200000,
+    },
+    # 3.5
+    {
+        "name": "claude-3-5-sonnet-20240620",
+        "context_window": 200000,
+    },
+    # 3.5 new
+    {
+        "name": "claude-3-5-sonnet-20241022",
+        "context_window": 200000,
+    },
+    # 3.5 latest
+    {
+        "name": "claude-3-5-sonnet-latest",
+        "context_window": 200000,
+    },
+    # 3.7
+    {
+        "name": "claude-3-7-sonnet-20250219",
+        "context_window": 200000,
+    },
+    # 3.7 latest
+    {
+        "name": "claude-3-7-sonnet-latest",
+        "context_window": 200000,
+    },
+    # 4
+    {
+        "name": "claude-sonnet-4-20250514",
+        "context_window": 200000,
+    },
+    ## Haiku
+    # 3.0
+    {
+        "name": "claude-3-haiku-20240307",
+        "context_window": 200000,
+    },
+    # 3.5
+    {
+        "name": "claude-3-5-haiku-20241022",
+        "context_window": 200000,
+    },
+    # 3.5 latest
+    {
+        "name": "claude-3-5-haiku-latest",
+        "context_window": 200000,
+    },
+]
 class AnthropicProvider(Provider):
     provider_type: Literal[ProviderType.anthropic] = Field(ProviderType.anthropic, description="The type of the provider.")
@@ -15,19 +93,39 @@ class AnthropicProvider(Provider):
     base_url: str = "https://api.anthropic.com/v1"
     async def check_api_key(self):
-        from letta.llm_api.anthropic import anthropic_check_valid_api_key
-        anthropic_check_valid_api_key(self.api_key)
+        if self.api_key:
+            anthropic_client = anthropic.Anthropic(api_key=self.api_key)
+            try:
+                # just use a cheap model to count some tokens - as of 5/7/2025 this is faster than fetching the list of models
+                anthropic_client.messages.count_tokens(model=MODEL_LIST[-1]["name"], messages=[{"role": "user", "content": "a"}])
+            except anthropic.AuthenticationError as e:
+                raise LLMAuthenticationError(message=f"Failed to authenticate with Anthropic: {e}", code=ErrorCode.UNAUTHENTICATED)
+            except Exception as e:
+                raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
+        else:
+            raise ValueError("No API key provided")
     async def list_llm_models_async(self) -> list[LLMConfig]:
-        from letta.llm_api.anthropic import anthropic_get_model_list_async
+        """
+        https://docs.anthropic.com/claude/docs/models-overview
-        models = await anthropic_get_model_list_async(api_key=self.api_key)
-        return self._list_llm_models(models)
+        NOTE: currently there is no GET /models, so we need to hardcode
+        """
+        if self.api_key:
+            anthropic_client = anthropic.AsyncAnthropic(api_key=self.api_key)
+        elif model_settings.anthropic_api_key:
+            anthropic_client = anthropic.AsyncAnthropic()
+        else:
+            raise ValueError("No API key provided")
-    def _list_llm_models(self, models) -> list[LLMConfig]:
-        from letta.llm_api.anthropic import MODEL_LIST
+        models = await anthropic_client.models.list()
+        models_json = models.model_dump()
+        assert "data" in models_json, f"Anthropic model query response missing 'data' field: {models_json}"
+        models_data = models_json["data"]
+        return self._list_llm_models(models_data)
+    def _list_llm_models(self, models) -> list[LLMConfig]:
         configs = []
         for model in models:
             if any((model.get("type") != "model", "id" not in model, model.get("id").startswith("claude-2"))):

letta-nightly 0.11.3.dev20250820104219__py3-none-any.whl → 0.11.4.dev20250820213507__py3-none-any.whl

letta-nightly 0.11.3.dev20250820104219py3-none-any.whl → 0.11.4.dev20250820213507py3-none-any.whl