letta-nightly 0.11.3.dev20250820104219__py3-none-any.whl → 0.11.4.dev20250820213507__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. letta/__init__.py +1 -1
  2. letta/agents/helpers.py +4 -0
  3. letta/agents/letta_agent.py +142 -5
  4. letta/constants.py +10 -7
  5. letta/data_sources/connectors.py +70 -53
  6. letta/embeddings.py +3 -240
  7. letta/errors.py +28 -0
  8. letta/functions/function_sets/base.py +4 -4
  9. letta/functions/functions.py +287 -32
  10. letta/functions/mcp_client/types.py +11 -0
  11. letta/functions/schema_validator.py +187 -0
  12. letta/functions/typescript_parser.py +196 -0
  13. letta/helpers/datetime_helpers.py +8 -4
  14. letta/helpers/tool_execution_helper.py +25 -2
  15. letta/llm_api/anthropic_client.py +23 -18
  16. letta/llm_api/azure_client.py +73 -0
  17. letta/llm_api/bedrock_client.py +8 -4
  18. letta/llm_api/google_vertex_client.py +14 -5
  19. letta/llm_api/llm_api_tools.py +2 -217
  20. letta/llm_api/llm_client.py +15 -1
  21. letta/llm_api/llm_client_base.py +32 -1
  22. letta/llm_api/openai.py +1 -0
  23. letta/llm_api/openai_client.py +18 -28
  24. letta/llm_api/together_client.py +55 -0
  25. letta/orm/provider.py +1 -0
  26. letta/orm/step_metrics.py +40 -1
  27. letta/otel/db_pool_monitoring.py +1 -1
  28. letta/schemas/agent.py +3 -4
  29. letta/schemas/agent_file.py +2 -0
  30. letta/schemas/block.py +11 -5
  31. letta/schemas/embedding_config.py +4 -5
  32. letta/schemas/enums.py +1 -1
  33. letta/schemas/job.py +2 -3
  34. letta/schemas/llm_config.py +79 -7
  35. letta/schemas/mcp.py +0 -24
  36. letta/schemas/message.py +0 -108
  37. letta/schemas/openai/chat_completion_request.py +1 -0
  38. letta/schemas/providers/__init__.py +0 -2
  39. letta/schemas/providers/anthropic.py +106 -8
  40. letta/schemas/providers/azure.py +102 -8
  41. letta/schemas/providers/base.py +10 -3
  42. letta/schemas/providers/bedrock.py +28 -16
  43. letta/schemas/providers/letta.py +3 -3
  44. letta/schemas/providers/ollama.py +2 -12
  45. letta/schemas/providers/openai.py +4 -4
  46. letta/schemas/providers/together.py +14 -2
  47. letta/schemas/sandbox_config.py +2 -1
  48. letta/schemas/tool.py +46 -22
  49. letta/server/rest_api/routers/v1/agents.py +179 -38
  50. letta/server/rest_api/routers/v1/folders.py +13 -8
  51. letta/server/rest_api/routers/v1/providers.py +10 -3
  52. letta/server/rest_api/routers/v1/sources.py +14 -8
  53. letta/server/rest_api/routers/v1/steps.py +17 -1
  54. letta/server/rest_api/routers/v1/tools.py +96 -5
  55. letta/server/rest_api/streaming_response.py +91 -45
  56. letta/server/server.py +27 -38
  57. letta/services/agent_manager.py +92 -20
  58. letta/services/agent_serialization_manager.py +11 -7
  59. letta/services/context_window_calculator/context_window_calculator.py +40 -2
  60. letta/services/helpers/agent_manager_helper.py +73 -12
  61. letta/services/mcp_manager.py +109 -15
  62. letta/services/passage_manager.py +28 -109
  63. letta/services/provider_manager.py +24 -0
  64. letta/services/step_manager.py +68 -0
  65. letta/services/summarizer/summarizer.py +1 -4
  66. letta/services/tool_executor/core_tool_executor.py +1 -1
  67. letta/services/tool_executor/sandbox_tool_executor.py +26 -9
  68. letta/services/tool_manager.py +82 -5
  69. letta/services/tool_sandbox/base.py +3 -11
  70. letta/services/tool_sandbox/modal_constants.py +17 -0
  71. letta/services/tool_sandbox/modal_deployment_manager.py +242 -0
  72. letta/services/tool_sandbox/modal_sandbox.py +218 -3
  73. letta/services/tool_sandbox/modal_sandbox_v2.py +429 -0
  74. letta/services/tool_sandbox/modal_version_manager.py +273 -0
  75. letta/services/tool_sandbox/safe_pickle.py +193 -0
  76. letta/settings.py +5 -3
  77. letta/templates/sandbox_code_file.py.j2 +2 -4
  78. letta/templates/sandbox_code_file_async.py.j2 +2 -4
  79. letta/utils.py +1 -1
  80. {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/METADATA +2 -2
  81. {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/RECORD +84 -81
  82. letta/llm_api/anthropic.py +0 -1206
  83. letta/llm_api/aws_bedrock.py +0 -104
  84. letta/llm_api/azure_openai.py +0 -118
  85. letta/llm_api/azure_openai_constants.py +0 -11
  86. letta/llm_api/cohere.py +0 -391
  87. letta/schemas/providers/cohere.py +0 -18
  88. {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/LICENSE +0 -0
  89. {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/WHEEL +0 -0
  90. {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/entry_points.txt +0 -0
letta/schemas/agent.py CHANGED
@@ -2,7 +2,7 @@ from datetime import datetime
2
2
  from enum import Enum
3
3
  from typing import Dict, List, Optional
4
4
 
5
- from pydantic import BaseModel, Field, field_validator, model_validator
5
+ from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
6
6
 
7
7
  from letta.constants import CORE_MEMORY_LINE_NUMBER_WARNING, DEFAULT_EMBEDDING_CHUNK_SIZE
8
8
  from letta.schemas.block import CreateBlock
@@ -211,7 +211,7 @@ class CreateAgent(BaseModel, validate_assignment=True): #
211
211
  max_reasoning_tokens: Optional[int] = Field(
212
212
  None, description="The maximum number of tokens to generate for reasoning step. If not set, the model will use its default value."
213
213
  )
214
- enable_reasoner: Optional[bool] = Field(False, description="Whether to enable internal extended thinking step for a reasoner model.")
214
+ enable_reasoner: Optional[bool] = Field(True, description="Whether to enable internal extended thinking step for a reasoner model.")
215
215
  reasoning: Optional[bool] = Field(None, description="Whether to enable reasoning for this agent.")
216
216
  from_template: Optional[str] = Field(None, description="The template id used to configure the agent")
217
217
  template: bool = Field(False, description="Whether the agent is a template")
@@ -355,8 +355,7 @@ class UpdateAgent(BaseModel):
355
355
  description="If set to True, the agent will be hidden.",
356
356
  )
357
357
 
358
- class Config:
359
- extra = "ignore" # Ignores extra fields
358
+ model_config = ConfigDict(extra="ignore") # Ignores extra fields
360
359
 
361
360
 
362
361
  class AgentStepResponse(BaseModel):
@@ -24,12 +24,14 @@ class ImportResult:
24
24
  success: bool,
25
25
  message: str = "",
26
26
  imported_count: int = 0,
27
+ imported_agent_ids: Optional[List[str]] = None,
27
28
  errors: Optional[List[str]] = None,
28
29
  id_mappings: Optional[Dict[str, str]] = None,
29
30
  ):
30
31
  self.success = success
31
32
  self.message = message
32
33
  self.imported_count = imported_count
34
+ self.imported_agent_ids = imported_agent_ids or []
33
35
  self.errors = errors or []
34
36
  self.id_mappings = id_mappings or {}
35
37
 
letta/schemas/block.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from datetime import datetime
2
2
  from typing import Optional
3
3
 
4
- from pydantic import Field, model_validator
4
+ from pydantic import ConfigDict, Field, model_validator
5
5
  from typing_extensions import Self
6
6
 
7
7
  from letta.constants import CORE_MEMORY_BLOCK_CHAR_LIMIT, DEFAULT_HUMAN_BLOCK_DESCRIPTION, DEFAULT_PERSONA_BLOCK_DESCRIPTION
@@ -38,8 +38,7 @@ class BaseBlock(LettaBase, validate_assignment=True):
38
38
  # def __len__(self):
39
39
  # return len(self.value)
40
40
 
41
- class Config:
42
- extra = "ignore" # Ignores extra fields
41
+ model_config = ConfigDict(extra="ignore") # Ignores extra fields
43
42
 
44
43
  @model_validator(mode="after")
45
44
  def verify_char_limit(self) -> Self:
@@ -115,8 +114,7 @@ class BlockUpdate(BaseBlock):
115
114
  value: Optional[str] = Field(None, description="Value of the block.")
116
115
  project_id: Optional[str] = Field(None, description="The associated project id.")
117
116
 
118
- class Config:
119
- extra = "ignore" # Ignores extra fields
117
+ model_config = ConfigDict(extra="ignore") # Ignores extra fields
120
118
 
121
119
 
122
120
  class CreateBlock(BaseBlock):
@@ -131,6 +129,14 @@ class CreateBlock(BaseBlock):
131
129
  is_template: bool = False
132
130
  template_name: Optional[str] = Field(None, description="Name of the block if it is a template.", alias="name")
133
131
 
132
+ @model_validator(mode="before")
133
+ @classmethod
134
+ def ensure_value_is_string(cls, data):
135
+ """Convert None value to empty string"""
136
+ if data and isinstance(data, dict) and data.get("value") is None:
137
+ data["value"] = ""
138
+ return data
139
+
134
140
 
135
141
  class CreateHuman(CreateBlock):
136
142
  """Create a human block"""
@@ -12,7 +12,6 @@ class EmbeddingConfig(BaseModel):
12
12
  "openai",
13
13
  "anthropic",
14
14
  "bedrock",
15
- "cohere",
16
15
  "google_ai",
17
16
  "google_vertex",
18
17
  "azure",
@@ -63,11 +62,11 @@ class EmbeddingConfig(BaseModel):
63
62
  )
64
63
  elif model_name == "letta":
65
64
  return cls(
66
- embedding_endpoint="https://bun-function-production-e310.up.railway.app/v1",
67
- embedding_model="BAAI/bge-large-en-v1.5",
68
- embedding_dim=1024,
65
+ embedding_endpoint="https://embeddings.letta.com/",
66
+ embedding_model="letta-free",
67
+ embedding_dim=1536,
69
68
  embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
70
- embedding_endpoint_type="hugging-face",
69
+ embedding_endpoint_type="openai",
71
70
  )
72
71
  elif provider == "pinecone":
73
72
  # default config for pinecone with empty endpoint
letta/schemas/enums.py CHANGED
@@ -18,7 +18,6 @@ class ProviderType(str, Enum):
18
18
  azure = "azure"
19
19
  vllm = "vllm"
20
20
  bedrock = "bedrock"
21
- cohere = "cohere"
22
21
 
23
22
 
24
23
  class ProviderCategory(str, Enum):
@@ -155,6 +154,7 @@ class DuplicateFileHandling(str, Enum):
155
154
  SKIP = "skip" # skip files with duplicate names
156
155
  ERROR = "error" # error when duplicate names are encountered
157
156
  SUFFIX = "suffix" # add numeric suffix to make names unique (default behavior)
157
+ REPLACE = "replace" # replace the file with the duplicate name
158
158
 
159
159
 
160
160
  class SandboxType(str, Enum):
letta/schemas/job.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from datetime import datetime
2
2
  from typing import List, Optional
3
3
 
4
- from pydantic import BaseModel, Field
4
+ from pydantic import BaseModel, ConfigDict, Field
5
5
 
6
6
  from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
7
7
  from letta.schemas.enums import JobStatus, JobType
@@ -81,8 +81,7 @@ class BatchJob(JobBase):
81
81
  class JobUpdate(JobBase):
82
82
  status: Optional[JobStatus] = Field(None, description="The status of the job.")
83
83
 
84
- class Config:
85
- extra = "ignore" # Ignores extra fields
84
+ model_config = ConfigDict(extra="ignore") # Ignores extra fields
86
85
 
87
86
 
88
87
  class LettaRequestConfig(BaseModel):
@@ -16,7 +16,6 @@ class LLMConfig(BaseModel):
16
16
  model_endpoint_type: Literal[
17
17
  "openai",
18
18
  "anthropic",
19
- "cohere",
20
19
  "google_ai",
21
20
  "google_vertex",
22
21
  "azure",
@@ -56,7 +55,7 @@ class LLMConfig(BaseModel):
56
55
  description="The maximum number of tokens to generate. If not set, the model will use its default value.",
57
56
  )
58
57
  enable_reasoner: bool = Field(
59
- False, description="Whether or not the model should use extended thinking if it is a 'reasoning' style model"
58
+ True, description="Whether or not the model should use extended thinking if it is a 'reasoning' style model"
60
59
  )
61
60
  reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]] = Field(
62
61
  None,
@@ -71,10 +70,50 @@ class LLMConfig(BaseModel):
71
70
  description="Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. From OpenAI: Number between -2.0 and 2.0.",
72
71
  )
73
72
  compatibility_type: Optional[Literal["gguf", "mlx"]] = Field(None, description="The framework compatibility type for the model.")
73
+ verbosity: Optional[Literal["low", "medium", "high"]] = Field(
74
+ "medium",
75
+ description="Soft control for how verbose model output should be, used for GPT-5 models.",
76
+ )
74
77
 
75
78
  # FIXME hack to silence pydantic protected namespace warning
76
79
  model_config = ConfigDict(protected_namespaces=())
77
80
 
81
+ @model_validator(mode="before")
82
+ @classmethod
83
+ def set_model_specific_defaults(cls, values):
84
+ """
85
+ Set model-specific default values for fields like max_tokens, context_window, etc.
86
+ This ensures the same defaults from default_config are applied automatically.
87
+ """
88
+ model = values.get("model")
89
+ if model is None:
90
+ return values
91
+
92
+ # Set max_tokens defaults based on model
93
+ if values.get("max_tokens") is None:
94
+ if model == "gpt-5":
95
+ values["max_tokens"] = 16384
96
+ elif model == "gpt-4.1":
97
+ values["max_tokens"] = 8192
98
+ # For other models, the field default of 4096 will be used
99
+
100
+ # Set context_window defaults if not provided
101
+ if values.get("context_window") is None:
102
+ if model == "gpt-5":
103
+ values["context_window"] = 128000
104
+ elif model == "gpt-4.1":
105
+ values["context_window"] = 256000
106
+ elif model == "gpt-4o" or model == "gpt-4o-mini":
107
+ values["context_window"] = 128000
108
+ elif model == "gpt-4":
109
+ values["context_window"] = 8192
110
+
111
+ # Set verbosity defaults for GPT-5 models
112
+ if model == "gpt-5" and values.get("verbosity") is None:
113
+ values["verbosity"] = "medium"
114
+
115
+ return values
116
+
78
117
  @model_validator(mode="before")
79
118
  @classmethod
80
119
  def set_default_enable_reasoner(cls, values):
@@ -159,6 +198,16 @@ class LLMConfig(BaseModel):
159
198
  context_window=256000,
160
199
  max_tokens=8192,
161
200
  )
201
+ elif model_name == "gpt-5":
202
+ return cls(
203
+ model="gpt-5",
204
+ model_endpoint_type="openai",
205
+ model_endpoint="https://api.openai.com/v1",
206
+ model_wrapper=None,
207
+ context_window=128000,
208
+ verbosity="medium",
209
+ max_tokens=16384,
210
+ )
162
211
  elif model_name == "letta":
163
212
  return cls(
164
213
  model="memgpt-openai",
@@ -196,13 +245,36 @@ class LLMConfig(BaseModel):
196
245
  config.model.startswith("gemini-2.5-flash") or config.model.startswith("gemini-2.5-pro")
197
246
  )
198
247
 
248
+ @classmethod
249
+ def is_google_ai_reasoning_model(cls, config: "LLMConfig") -> bool:
250
+ return config.model_endpoint_type == "google_ai" and (
251
+ config.model.startswith("gemini-2.5-flash") or config.model.startswith("gemini-2.5-pro")
252
+ )
253
+
254
+ @classmethod
255
+ def supports_verbosity(cls, config: "LLMConfig") -> bool:
256
+ """Check if the model supports verbosity control."""
257
+ return config.model_endpoint_type == "openai" and config.model.startswith("gpt-5")
258
+
199
259
  @classmethod
200
260
  def apply_reasoning_setting_to_config(cls, config: "LLMConfig", reasoning: bool):
201
261
  if not reasoning:
202
- if cls.is_openai_reasoning_model(config) or config.model.startswith("gemini-2.5-pro"):
203
- raise ValueError("Reasoning cannot be disabled for OpenAI o1/o3 models")
204
- config.put_inner_thoughts_in_kwargs = False
205
- config.enable_reasoner = False
262
+ if cls.is_openai_reasoning_model(config):
263
+ logger.warning("Reasoning cannot be disabled for OpenAI o1/o3 models")
264
+ config.put_inner_thoughts_in_kwargs = False
265
+ config.enable_reasoner = True
266
+ if config.reasoning_effort is None:
267
+ config.reasoning_effort = "medium"
268
+ elif config.model.startswith("gemini-2.5-pro"):
269
+ logger.warning("Reasoning cannot be disabled for Gemini 2.5 Pro model")
270
+ # Handle as non-reasoner until we support summary
271
+ config.put_inner_thoughts_in_kwargs = True
272
+ config.enable_reasoner = True
273
+ if config.max_reasoning_tokens == 0:
274
+ config.max_reasoning_tokens = 1024
275
+ else:
276
+ config.put_inner_thoughts_in_kwargs = False
277
+ config.enable_reasoner = False
206
278
 
207
279
  else:
208
280
  config.enable_reasoner = True
@@ -210,7 +282,7 @@ class LLMConfig(BaseModel):
210
282
  config.put_inner_thoughts_in_kwargs = False
211
283
  if config.max_reasoning_tokens == 0:
212
284
  config.max_reasoning_tokens = 1024
213
- elif cls.is_google_vertex_reasoning_model(config):
285
+ elif cls.is_google_vertex_reasoning_model(config) or cls.is_google_ai_reasoning_model(config):
214
286
  # Handle as non-reasoner until we support summary
215
287
  config.put_inner_thoughts_in_kwargs = True
216
288
  if config.max_reasoning_tokens == 0:
letta/schemas/mcp.py CHANGED
@@ -81,29 +81,6 @@ class MCPServer(BaseMCPServer):
81
81
  raise ValueError(f"Unsupported server type: {self.server_type}")
82
82
 
83
83
 
84
- class RegisterSSEMCPServer(LettaBase):
85
- server_name: str = Field(..., description="The name of the server")
86
- server_type: MCPServerType = MCPServerType.SSE
87
- server_url: str = Field(..., description="The URL of the server (MCP SSE client will connect to this URL)")
88
- token: Optional[str] = Field(None, description="The access token or API key for the MCP server used for authentication")
89
- custom_headers: Optional[Dict[str, str]] = Field(None, description="Custom authentication headers as key-value pairs")
90
-
91
-
92
- class RegisterStdioMCPServer(LettaBase):
93
- server_name: str = Field(..., description="The name of the server")
94
- server_type: MCPServerType = MCPServerType.STDIO
95
- stdio_config: StdioServerConfig = Field(..., description="The configuration for the server (MCP 'local' client will run this command)")
96
-
97
-
98
- class RegisterStreamableHTTPMCPServer(LettaBase):
99
- server_name: str = Field(..., description="The name of the server")
100
- server_type: MCPServerType = MCPServerType.STREAMABLE_HTTP
101
- server_url: str = Field(..., description="The URL path for the streamable HTTP server (e.g., 'example/mcp')")
102
- auth_header: Optional[str] = Field(None, description="The name of the authentication header (e.g., 'Authorization')")
103
- auth_token: Optional[str] = Field(None, description="The authentication token or API key value")
104
- custom_headers: Optional[Dict[str, str]] = Field(None, description="Custom authentication headers as key-value pairs")
105
-
106
-
107
84
  class UpdateSSEMCPServer(LettaBase):
108
85
  """Update an SSE MCP server"""
109
86
 
@@ -133,7 +110,6 @@ class UpdateStreamableHTTPMCPServer(LettaBase):
133
110
 
134
111
 
135
112
  UpdateMCPServer = Union[UpdateSSEMCPServer, UpdateStdioMCPServer, UpdateStreamableHTTPMCPServer]
136
- RegisterMCPServer = Union[RegisterSSEMCPServer, RegisterStdioMCPServer, RegisterStreamableHTTPMCPServer]
137
113
 
138
114
 
139
115
  # OAuth-related schemas
letta/schemas/message.py CHANGED
@@ -1051,114 +1051,6 @@ class Message(BaseMessage):
1051
1051
 
1052
1052
  return google_ai_message
1053
1053
 
1054
- def to_cohere_dict(
1055
- self,
1056
- function_call_role: Optional[str] = "SYSTEM",
1057
- function_call_prefix: Optional[str] = "[CHATBOT called function]",
1058
- function_response_role: Optional[str] = "SYSTEM",
1059
- function_response_prefix: Optional[str] = "[CHATBOT function returned]",
1060
- inner_thoughts_as_kwarg: Optional[bool] = False,
1061
- ) -> List[dict]:
1062
- """
1063
- Cohere chat_history dicts only have 'role' and 'message' fields
1064
- """
1065
-
1066
- # NOTE: returns a list of dicts so that we can convert:
1067
- # assistant [cot]: "I'll send a message"
1068
- # assistant [func]: send_message("hi")
1069
- # tool: {'status': 'OK'}
1070
- # to:
1071
- # CHATBOT.text: "I'll send a message"
1072
- # SYSTEM.text: [CHATBOT called function] send_message("hi")
1073
- # SYSTEM.text: [CHATBOT function returned] {'status': 'OK'}
1074
-
1075
- # TODO: update this prompt style once guidance from Cohere on
1076
- # embedded function calls in multi-turn conversation become more clear
1077
- if self.content and len(self.content) == 1 and isinstance(self.content[0], TextContent):
1078
- text_content = self.content[0].text
1079
- elif self.content and len(self.content) == 1 and isinstance(self.content[0], ToolReturnContent):
1080
- text_content = self.content[0].content
1081
- elif self.content and len(self.content) == 1 and isinstance(self.content[0], ImageContent):
1082
- text_content = "[Image Here]"
1083
- else:
1084
- text_content = None
1085
- if self.role == "system":
1086
- """
1087
- The chat_history parameter should not be used for SYSTEM messages in most cases.
1088
- Instead, to add a SYSTEM role message at the beginning of a conversation, the preamble parameter should be used.
1089
- """
1090
- raise UserWarning(f"role 'system' messages should go in 'preamble' field for Cohere API")
1091
-
1092
- elif self.role == "user":
1093
- assert all([v is not None for v in [text_content, self.role]]), vars(self)
1094
- cohere_message = [
1095
- {
1096
- "role": "USER",
1097
- "message": text_content,
1098
- }
1099
- ]
1100
-
1101
- elif self.role == "assistant":
1102
- # NOTE: we may break this into two message - an inner thought and a function call
1103
- # Optionally, we could just make this a function call with the inner thought inside
1104
- assert self.tool_calls is not None or text_content is not None
1105
-
1106
- if text_content and self.tool_calls:
1107
- if inner_thoughts_as_kwarg:
1108
- raise NotImplementedError
1109
- cohere_message = [
1110
- {
1111
- "role": "CHATBOT",
1112
- "message": text_content,
1113
- },
1114
- ]
1115
- for tc in self.tool_calls:
1116
- function_name = tc.function["name"]
1117
- function_args = parse_json(tc.function["arguments"])
1118
- function_args_str = ",".join([f"{k}={v}" for k, v in function_args.items()])
1119
- function_call_text = f"{function_name}({function_args_str})"
1120
- cohere_message.append(
1121
- {
1122
- "role": function_call_role,
1123
- "message": f"{function_call_prefix} {function_call_text}",
1124
- }
1125
- )
1126
- elif not text_content and self.tool_calls:
1127
- cohere_message = []
1128
- for tc in self.tool_calls:
1129
- # TODO better way to pack?
1130
- function_call_text = json_dumps(tc.to_dict())
1131
- cohere_message.append(
1132
- {
1133
- "role": function_call_role,
1134
- "message": f"{function_call_prefix} {function_call_text}",
1135
- }
1136
- )
1137
- elif text_content and not self.tool_calls:
1138
- cohere_message = [
1139
- {
1140
- "role": "CHATBOT",
1141
- "message": text_content,
1142
- }
1143
- ]
1144
- else:
1145
- raise ValueError("Message does not have content nor tool_calls")
1146
-
1147
- elif self.role == "tool":
1148
- assert all([v is not None for v in [self.role, self.tool_call_id]]), vars(self)
1149
- function_response_text = text_content
1150
- cohere_message = [
1151
- {
1152
- "role": function_response_role,
1153
- "message": f"{function_response_prefix} {function_response_text}",
1154
- }
1155
- ]
1156
-
1157
- else:
1158
- raise ValueError(self.role)
1159
-
1160
- return cohere_message
1161
-
1162
1054
  @staticmethod
1163
1055
  def generate_otid_from_id(message_id: str, index: int) -> str:
1164
1056
  """
@@ -135,6 +135,7 @@ class ChatCompletionRequest(BaseModel):
135
135
  user: Optional[str] = None # unique ID of the end-user (for monitoring)
136
136
  parallel_tool_calls: Optional[bool] = None
137
137
  instructions: Optional[str] = None
138
+ verbosity: Optional[Literal["low", "medium", "high"]] = None # For verbosity control in GPT-5 models
138
139
 
139
140
  # function-calling related
140
141
  tools: Optional[List[Tool]] = None
@@ -5,7 +5,6 @@ from .azure import AzureProvider
5
5
  from .base import Provider, ProviderBase, ProviderCheck, ProviderCreate, ProviderUpdate
6
6
  from .bedrock import BedrockProvider
7
7
  from .cerebras import CerebrasProvider
8
- from .cohere import CohereProvider
9
8
  from .deepseek import DeepSeekProvider
10
9
  from .google_gemini import GoogleAIProvider
11
10
  from .google_vertex import GoogleVertexProvider
@@ -31,7 +30,6 @@ __all__ = [
31
30
  "AzureProvider",
32
31
  "BedrockProvider",
33
32
  "CerebrasProvider", # NEW
34
- "CohereProvider",
35
33
  "DeepSeekProvider",
36
34
  "GoogleAIProvider",
37
35
  "GoogleVertexProvider",
@@ -1,12 +1,90 @@
1
1
  import warnings
2
2
  from typing import Literal
3
3
 
4
+ import anthropic
4
5
  from pydantic import Field
5
6
 
6
7
  from letta.schemas.enums import ProviderCategory, ProviderType
7
8
  from letta.schemas.llm_config import LLMConfig
8
9
  from letta.schemas.providers.base import Provider
9
10
 
11
+ # https://docs.anthropic.com/claude/docs/models-overview
12
+ # Sadly hardcoded
13
+ MODEL_LIST = [
14
+ ## Opus 4.1
15
+ {
16
+ "name": "claude-opus-4-1-20250805",
17
+ "context_window": 200000,
18
+ },
19
+ ## Opus 3
20
+ {
21
+ "name": "claude-3-opus-20240229",
22
+ "context_window": 200000,
23
+ },
24
+ # 3 latest
25
+ {
26
+ "name": "claude-3-opus-latest",
27
+ "context_window": 200000,
28
+ },
29
+ # 4
30
+ {
31
+ "name": "claude-opus-4-20250514",
32
+ "context_window": 200000,
33
+ },
34
+ ## Sonnet
35
+ # 3.0
36
+ {
37
+ "name": "claude-3-sonnet-20240229",
38
+ "context_window": 200000,
39
+ },
40
+ # 3.5
41
+ {
42
+ "name": "claude-3-5-sonnet-20240620",
43
+ "context_window": 200000,
44
+ },
45
+ # 3.5 new
46
+ {
47
+ "name": "claude-3-5-sonnet-20241022",
48
+ "context_window": 200000,
49
+ },
50
+ # 3.5 latest
51
+ {
52
+ "name": "claude-3-5-sonnet-latest",
53
+ "context_window": 200000,
54
+ },
55
+ # 3.7
56
+ {
57
+ "name": "claude-3-7-sonnet-20250219",
58
+ "context_window": 200000,
59
+ },
60
+ # 3.7 latest
61
+ {
62
+ "name": "claude-3-7-sonnet-latest",
63
+ "context_window": 200000,
64
+ },
65
+ # 4
66
+ {
67
+ "name": "claude-sonnet-4-20250514",
68
+ "context_window": 200000,
69
+ },
70
+ ## Haiku
71
+ # 3.0
72
+ {
73
+ "name": "claude-3-haiku-20240307",
74
+ "context_window": 200000,
75
+ },
76
+ # 3.5
77
+ {
78
+ "name": "claude-3-5-haiku-20241022",
79
+ "context_window": 200000,
80
+ },
81
+ # 3.5 latest
82
+ {
83
+ "name": "claude-3-5-haiku-latest",
84
+ "context_window": 200000,
85
+ },
86
+ ]
87
+
10
88
 
11
89
  class AnthropicProvider(Provider):
12
90
  provider_type: Literal[ProviderType.anthropic] = Field(ProviderType.anthropic, description="The type of the provider.")
@@ -15,19 +93,39 @@ class AnthropicProvider(Provider):
15
93
  base_url: str = "https://api.anthropic.com/v1"
16
94
 
17
95
  async def check_api_key(self):
18
- from letta.llm_api.anthropic import anthropic_check_valid_api_key
19
-
20
- anthropic_check_valid_api_key(self.api_key)
96
+ if self.api_key:
97
+ anthropic_client = anthropic.Anthropic(api_key=self.api_key)
98
+ try:
99
+ # just use a cheap model to count some tokens - as of 5/7/2025 this is faster than fetching the list of models
100
+ anthropic_client.messages.count_tokens(model=MODEL_LIST[-1]["name"], messages=[{"role": "user", "content": "a"}])
101
+ except anthropic.AuthenticationError as e:
102
+ raise LLMAuthenticationError(message=f"Failed to authenticate with Anthropic: {e}", code=ErrorCode.UNAUTHENTICATED)
103
+ except Exception as e:
104
+ raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
105
+ else:
106
+ raise ValueError("No API key provided")
21
107
 
22
108
  async def list_llm_models_async(self) -> list[LLMConfig]:
23
- from letta.llm_api.anthropic import anthropic_get_model_list_async
109
+ """
110
+ https://docs.anthropic.com/claude/docs/models-overview
24
111
 
25
- models = await anthropic_get_model_list_async(api_key=self.api_key)
26
- return self._list_llm_models(models)
112
+ NOTE: currently there is no GET /models, so we need to hardcode
113
+ """
114
+ if self.api_key:
115
+ anthropic_client = anthropic.AsyncAnthropic(api_key=self.api_key)
116
+ elif model_settings.anthropic_api_key:
117
+ anthropic_client = anthropic.AsyncAnthropic()
118
+ else:
119
+ raise ValueError("No API key provided")
27
120
 
28
- def _list_llm_models(self, models) -> list[LLMConfig]:
29
- from letta.llm_api.anthropic import MODEL_LIST
121
+ models = await anthropic_client.models.list()
122
+ models_json = models.model_dump()
123
+ assert "data" in models_json, f"Anthropic model query response missing 'data' field: {models_json}"
124
+ models_data = models_json["data"]
30
125
 
126
+ return self._list_llm_models(models_data)
127
+
128
+ def _list_llm_models(self, models) -> list[LLMConfig]:
31
129
  configs = []
32
130
  for model in models:
33
131
  if any((model.get("type") != "model", "id" not in model, model.get("id").startswith("claude-2"))):