letta-nightly 0.11.3.dev20250820104219__py3-none-any.whl → 0.11.4.dev20250820213507__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agents/helpers.py +4 -0
- letta/agents/letta_agent.py +142 -5
- letta/constants.py +10 -7
- letta/data_sources/connectors.py +70 -53
- letta/embeddings.py +3 -240
- letta/errors.py +28 -0
- letta/functions/function_sets/base.py +4 -4
- letta/functions/functions.py +287 -32
- letta/functions/mcp_client/types.py +11 -0
- letta/functions/schema_validator.py +187 -0
- letta/functions/typescript_parser.py +196 -0
- letta/helpers/datetime_helpers.py +8 -4
- letta/helpers/tool_execution_helper.py +25 -2
- letta/llm_api/anthropic_client.py +23 -18
- letta/llm_api/azure_client.py +73 -0
- letta/llm_api/bedrock_client.py +8 -4
- letta/llm_api/google_vertex_client.py +14 -5
- letta/llm_api/llm_api_tools.py +2 -217
- letta/llm_api/llm_client.py +15 -1
- letta/llm_api/llm_client_base.py +32 -1
- letta/llm_api/openai.py +1 -0
- letta/llm_api/openai_client.py +18 -28
- letta/llm_api/together_client.py +55 -0
- letta/orm/provider.py +1 -0
- letta/orm/step_metrics.py +40 -1
- letta/otel/db_pool_monitoring.py +1 -1
- letta/schemas/agent.py +3 -4
- letta/schemas/agent_file.py +2 -0
- letta/schemas/block.py +11 -5
- letta/schemas/embedding_config.py +4 -5
- letta/schemas/enums.py +1 -1
- letta/schemas/job.py +2 -3
- letta/schemas/llm_config.py +79 -7
- letta/schemas/mcp.py +0 -24
- letta/schemas/message.py +0 -108
- letta/schemas/openai/chat_completion_request.py +1 -0
- letta/schemas/providers/__init__.py +0 -2
- letta/schemas/providers/anthropic.py +106 -8
- letta/schemas/providers/azure.py +102 -8
- letta/schemas/providers/base.py +10 -3
- letta/schemas/providers/bedrock.py +28 -16
- letta/schemas/providers/letta.py +3 -3
- letta/schemas/providers/ollama.py +2 -12
- letta/schemas/providers/openai.py +4 -4
- letta/schemas/providers/together.py +14 -2
- letta/schemas/sandbox_config.py +2 -1
- letta/schemas/tool.py +46 -22
- letta/server/rest_api/routers/v1/agents.py +179 -38
- letta/server/rest_api/routers/v1/folders.py +13 -8
- letta/server/rest_api/routers/v1/providers.py +10 -3
- letta/server/rest_api/routers/v1/sources.py +14 -8
- letta/server/rest_api/routers/v1/steps.py +17 -1
- letta/server/rest_api/routers/v1/tools.py +96 -5
- letta/server/rest_api/streaming_response.py +91 -45
- letta/server/server.py +27 -38
- letta/services/agent_manager.py +92 -20
- letta/services/agent_serialization_manager.py +11 -7
- letta/services/context_window_calculator/context_window_calculator.py +40 -2
- letta/services/helpers/agent_manager_helper.py +73 -12
- letta/services/mcp_manager.py +109 -15
- letta/services/passage_manager.py +28 -109
- letta/services/provider_manager.py +24 -0
- letta/services/step_manager.py +68 -0
- letta/services/summarizer/summarizer.py +1 -4
- letta/services/tool_executor/core_tool_executor.py +1 -1
- letta/services/tool_executor/sandbox_tool_executor.py +26 -9
- letta/services/tool_manager.py +82 -5
- letta/services/tool_sandbox/base.py +3 -11
- letta/services/tool_sandbox/modal_constants.py +17 -0
- letta/services/tool_sandbox/modal_deployment_manager.py +242 -0
- letta/services/tool_sandbox/modal_sandbox.py +218 -3
- letta/services/tool_sandbox/modal_sandbox_v2.py +429 -0
- letta/services/tool_sandbox/modal_version_manager.py +273 -0
- letta/services/tool_sandbox/safe_pickle.py +193 -0
- letta/settings.py +5 -3
- letta/templates/sandbox_code_file.py.j2 +2 -4
- letta/templates/sandbox_code_file_async.py.j2 +2 -4
- letta/utils.py +1 -1
- {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/METADATA +2 -2
- {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/RECORD +84 -81
- letta/llm_api/anthropic.py +0 -1206
- letta/llm_api/aws_bedrock.py +0 -104
- letta/llm_api/azure_openai.py +0 -118
- letta/llm_api/azure_openai_constants.py +0 -11
- letta/llm_api/cohere.py +0 -391
- letta/schemas/providers/cohere.py +0 -18
- {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/LICENSE +0 -0
- {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.3.dev20250820104219.dist-info → letta_nightly-0.11.4.dev20250820213507.dist-info}/entry_points.txt +0 -0
letta/schemas/agent.py
CHANGED
@@ -2,7 +2,7 @@ from datetime import datetime
|
|
2
2
|
from enum import Enum
|
3
3
|
from typing import Dict, List, Optional
|
4
4
|
|
5
|
-
from pydantic import BaseModel, Field, field_validator, model_validator
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
|
6
6
|
|
7
7
|
from letta.constants import CORE_MEMORY_LINE_NUMBER_WARNING, DEFAULT_EMBEDDING_CHUNK_SIZE
|
8
8
|
from letta.schemas.block import CreateBlock
|
@@ -211,7 +211,7 @@ class CreateAgent(BaseModel, validate_assignment=True): #
|
|
211
211
|
max_reasoning_tokens: Optional[int] = Field(
|
212
212
|
None, description="The maximum number of tokens to generate for reasoning step. If not set, the model will use its default value."
|
213
213
|
)
|
214
|
-
enable_reasoner: Optional[bool] = Field(
|
214
|
+
enable_reasoner: Optional[bool] = Field(True, description="Whether to enable internal extended thinking step for a reasoner model.")
|
215
215
|
reasoning: Optional[bool] = Field(None, description="Whether to enable reasoning for this agent.")
|
216
216
|
from_template: Optional[str] = Field(None, description="The template id used to configure the agent")
|
217
217
|
template: bool = Field(False, description="Whether the agent is a template")
|
@@ -355,8 +355,7 @@ class UpdateAgent(BaseModel):
|
|
355
355
|
description="If set to True, the agent will be hidden.",
|
356
356
|
)
|
357
357
|
|
358
|
-
|
359
|
-
extra = "ignore" # Ignores extra fields
|
358
|
+
model_config = ConfigDict(extra="ignore") # Ignores extra fields
|
360
359
|
|
361
360
|
|
362
361
|
class AgentStepResponse(BaseModel):
|
letta/schemas/agent_file.py
CHANGED
@@ -24,12 +24,14 @@ class ImportResult:
|
|
24
24
|
success: bool,
|
25
25
|
message: str = "",
|
26
26
|
imported_count: int = 0,
|
27
|
+
imported_agent_ids: Optional[List[str]] = None,
|
27
28
|
errors: Optional[List[str]] = None,
|
28
29
|
id_mappings: Optional[Dict[str, str]] = None,
|
29
30
|
):
|
30
31
|
self.success = success
|
31
32
|
self.message = message
|
32
33
|
self.imported_count = imported_count
|
34
|
+
self.imported_agent_ids = imported_agent_ids or []
|
33
35
|
self.errors = errors or []
|
34
36
|
self.id_mappings = id_mappings or {}
|
35
37
|
|
letta/schemas/block.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from datetime import datetime
|
2
2
|
from typing import Optional
|
3
3
|
|
4
|
-
from pydantic import Field, model_validator
|
4
|
+
from pydantic import ConfigDict, Field, model_validator
|
5
5
|
from typing_extensions import Self
|
6
6
|
|
7
7
|
from letta.constants import CORE_MEMORY_BLOCK_CHAR_LIMIT, DEFAULT_HUMAN_BLOCK_DESCRIPTION, DEFAULT_PERSONA_BLOCK_DESCRIPTION
|
@@ -38,8 +38,7 @@ class BaseBlock(LettaBase, validate_assignment=True):
|
|
38
38
|
# def __len__(self):
|
39
39
|
# return len(self.value)
|
40
40
|
|
41
|
-
|
42
|
-
extra = "ignore" # Ignores extra fields
|
41
|
+
model_config = ConfigDict(extra="ignore") # Ignores extra fields
|
43
42
|
|
44
43
|
@model_validator(mode="after")
|
45
44
|
def verify_char_limit(self) -> Self:
|
@@ -115,8 +114,7 @@ class BlockUpdate(BaseBlock):
|
|
115
114
|
value: Optional[str] = Field(None, description="Value of the block.")
|
116
115
|
project_id: Optional[str] = Field(None, description="The associated project id.")
|
117
116
|
|
118
|
-
|
119
|
-
extra = "ignore" # Ignores extra fields
|
117
|
+
model_config = ConfigDict(extra="ignore") # Ignores extra fields
|
120
118
|
|
121
119
|
|
122
120
|
class CreateBlock(BaseBlock):
|
@@ -131,6 +129,14 @@ class CreateBlock(BaseBlock):
|
|
131
129
|
is_template: bool = False
|
132
130
|
template_name: Optional[str] = Field(None, description="Name of the block if it is a template.", alias="name")
|
133
131
|
|
132
|
+
@model_validator(mode="before")
|
133
|
+
@classmethod
|
134
|
+
def ensure_value_is_string(cls, data):
|
135
|
+
"""Convert None value to empty string"""
|
136
|
+
if data and isinstance(data, dict) and data.get("value") is None:
|
137
|
+
data["value"] = ""
|
138
|
+
return data
|
139
|
+
|
134
140
|
|
135
141
|
class CreateHuman(CreateBlock):
|
136
142
|
"""Create a human block"""
|
@@ -12,7 +12,6 @@ class EmbeddingConfig(BaseModel):
|
|
12
12
|
"openai",
|
13
13
|
"anthropic",
|
14
14
|
"bedrock",
|
15
|
-
"cohere",
|
16
15
|
"google_ai",
|
17
16
|
"google_vertex",
|
18
17
|
"azure",
|
@@ -63,11 +62,11 @@ class EmbeddingConfig(BaseModel):
|
|
63
62
|
)
|
64
63
|
elif model_name == "letta":
|
65
64
|
return cls(
|
66
|
-
embedding_endpoint="https://
|
67
|
-
embedding_model="
|
68
|
-
embedding_dim=
|
65
|
+
embedding_endpoint="https://embeddings.letta.com/",
|
66
|
+
embedding_model="letta-free",
|
67
|
+
embedding_dim=1536,
|
69
68
|
embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
|
70
|
-
embedding_endpoint_type="
|
69
|
+
embedding_endpoint_type="openai",
|
71
70
|
)
|
72
71
|
elif provider == "pinecone":
|
73
72
|
# default config for pinecone with empty endpoint
|
letta/schemas/enums.py
CHANGED
@@ -18,7 +18,6 @@ class ProviderType(str, Enum):
|
|
18
18
|
azure = "azure"
|
19
19
|
vllm = "vllm"
|
20
20
|
bedrock = "bedrock"
|
21
|
-
cohere = "cohere"
|
22
21
|
|
23
22
|
|
24
23
|
class ProviderCategory(str, Enum):
|
@@ -155,6 +154,7 @@ class DuplicateFileHandling(str, Enum):
|
|
155
154
|
SKIP = "skip" # skip files with duplicate names
|
156
155
|
ERROR = "error" # error when duplicate names are encountered
|
157
156
|
SUFFIX = "suffix" # add numeric suffix to make names unique (default behavior)
|
157
|
+
REPLACE = "replace" # replace the file with the duplicate name
|
158
158
|
|
159
159
|
|
160
160
|
class SandboxType(str, Enum):
|
letta/schemas/job.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from datetime import datetime
|
2
2
|
from typing import List, Optional
|
3
3
|
|
4
|
-
from pydantic import BaseModel, Field
|
4
|
+
from pydantic import BaseModel, ConfigDict, Field
|
5
5
|
|
6
6
|
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
7
7
|
from letta.schemas.enums import JobStatus, JobType
|
@@ -81,8 +81,7 @@ class BatchJob(JobBase):
|
|
81
81
|
class JobUpdate(JobBase):
|
82
82
|
status: Optional[JobStatus] = Field(None, description="The status of the job.")
|
83
83
|
|
84
|
-
|
85
|
-
extra = "ignore" # Ignores extra fields
|
84
|
+
model_config = ConfigDict(extra="ignore") # Ignores extra fields
|
86
85
|
|
87
86
|
|
88
87
|
class LettaRequestConfig(BaseModel):
|
letta/schemas/llm_config.py
CHANGED
@@ -16,7 +16,6 @@ class LLMConfig(BaseModel):
|
|
16
16
|
model_endpoint_type: Literal[
|
17
17
|
"openai",
|
18
18
|
"anthropic",
|
19
|
-
"cohere",
|
20
19
|
"google_ai",
|
21
20
|
"google_vertex",
|
22
21
|
"azure",
|
@@ -56,7 +55,7 @@ class LLMConfig(BaseModel):
|
|
56
55
|
description="The maximum number of tokens to generate. If not set, the model will use its default value.",
|
57
56
|
)
|
58
57
|
enable_reasoner: bool = Field(
|
59
|
-
|
58
|
+
True, description="Whether or not the model should use extended thinking if it is a 'reasoning' style model"
|
60
59
|
)
|
61
60
|
reasoning_effort: Optional[Literal["minimal", "low", "medium", "high"]] = Field(
|
62
61
|
None,
|
@@ -71,10 +70,50 @@ class LLMConfig(BaseModel):
|
|
71
70
|
description="Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model's likelihood to repeat the same line verbatim. From OpenAI: Number between -2.0 and 2.0.",
|
72
71
|
)
|
73
72
|
compatibility_type: Optional[Literal["gguf", "mlx"]] = Field(None, description="The framework compatibility type for the model.")
|
73
|
+
verbosity: Optional[Literal["low", "medium", "high"]] = Field(
|
74
|
+
"medium",
|
75
|
+
description="Soft control for how verbose model output should be, used for GPT-5 models.",
|
76
|
+
)
|
74
77
|
|
75
78
|
# FIXME hack to silence pydantic protected namespace warning
|
76
79
|
model_config = ConfigDict(protected_namespaces=())
|
77
80
|
|
81
|
+
@model_validator(mode="before")
|
82
|
+
@classmethod
|
83
|
+
def set_model_specific_defaults(cls, values):
|
84
|
+
"""
|
85
|
+
Set model-specific default values for fields like max_tokens, context_window, etc.
|
86
|
+
This ensures the same defaults from default_config are applied automatically.
|
87
|
+
"""
|
88
|
+
model = values.get("model")
|
89
|
+
if model is None:
|
90
|
+
return values
|
91
|
+
|
92
|
+
# Set max_tokens defaults based on model
|
93
|
+
if values.get("max_tokens") is None:
|
94
|
+
if model == "gpt-5":
|
95
|
+
values["max_tokens"] = 16384
|
96
|
+
elif model == "gpt-4.1":
|
97
|
+
values["max_tokens"] = 8192
|
98
|
+
# For other models, the field default of 4096 will be used
|
99
|
+
|
100
|
+
# Set context_window defaults if not provided
|
101
|
+
if values.get("context_window") is None:
|
102
|
+
if model == "gpt-5":
|
103
|
+
values["context_window"] = 128000
|
104
|
+
elif model == "gpt-4.1":
|
105
|
+
values["context_window"] = 256000
|
106
|
+
elif model == "gpt-4o" or model == "gpt-4o-mini":
|
107
|
+
values["context_window"] = 128000
|
108
|
+
elif model == "gpt-4":
|
109
|
+
values["context_window"] = 8192
|
110
|
+
|
111
|
+
# Set verbosity defaults for GPT-5 models
|
112
|
+
if model == "gpt-5" and values.get("verbosity") is None:
|
113
|
+
values["verbosity"] = "medium"
|
114
|
+
|
115
|
+
return values
|
116
|
+
|
78
117
|
@model_validator(mode="before")
|
79
118
|
@classmethod
|
80
119
|
def set_default_enable_reasoner(cls, values):
|
@@ -159,6 +198,16 @@ class LLMConfig(BaseModel):
|
|
159
198
|
context_window=256000,
|
160
199
|
max_tokens=8192,
|
161
200
|
)
|
201
|
+
elif model_name == "gpt-5":
|
202
|
+
return cls(
|
203
|
+
model="gpt-5",
|
204
|
+
model_endpoint_type="openai",
|
205
|
+
model_endpoint="https://api.openai.com/v1",
|
206
|
+
model_wrapper=None,
|
207
|
+
context_window=128000,
|
208
|
+
verbosity="medium",
|
209
|
+
max_tokens=16384,
|
210
|
+
)
|
162
211
|
elif model_name == "letta":
|
163
212
|
return cls(
|
164
213
|
model="memgpt-openai",
|
@@ -196,13 +245,36 @@ class LLMConfig(BaseModel):
|
|
196
245
|
config.model.startswith("gemini-2.5-flash") or config.model.startswith("gemini-2.5-pro")
|
197
246
|
)
|
198
247
|
|
248
|
+
@classmethod
|
249
|
+
def is_google_ai_reasoning_model(cls, config: "LLMConfig") -> bool:
|
250
|
+
return config.model_endpoint_type == "google_ai" and (
|
251
|
+
config.model.startswith("gemini-2.5-flash") or config.model.startswith("gemini-2.5-pro")
|
252
|
+
)
|
253
|
+
|
254
|
+
@classmethod
|
255
|
+
def supports_verbosity(cls, config: "LLMConfig") -> bool:
|
256
|
+
"""Check if the model supports verbosity control."""
|
257
|
+
return config.model_endpoint_type == "openai" and config.model.startswith("gpt-5")
|
258
|
+
|
199
259
|
@classmethod
|
200
260
|
def apply_reasoning_setting_to_config(cls, config: "LLMConfig", reasoning: bool):
|
201
261
|
if not reasoning:
|
202
|
-
if cls.is_openai_reasoning_model(config)
|
203
|
-
|
204
|
-
|
205
|
-
|
262
|
+
if cls.is_openai_reasoning_model(config):
|
263
|
+
logger.warning("Reasoning cannot be disabled for OpenAI o1/o3 models")
|
264
|
+
config.put_inner_thoughts_in_kwargs = False
|
265
|
+
config.enable_reasoner = True
|
266
|
+
if config.reasoning_effort is None:
|
267
|
+
config.reasoning_effort = "medium"
|
268
|
+
elif config.model.startswith("gemini-2.5-pro"):
|
269
|
+
logger.warning("Reasoning cannot be disabled for Gemini 2.5 Pro model")
|
270
|
+
# Handle as non-reasoner until we support summary
|
271
|
+
config.put_inner_thoughts_in_kwargs = True
|
272
|
+
config.enable_reasoner = True
|
273
|
+
if config.max_reasoning_tokens == 0:
|
274
|
+
config.max_reasoning_tokens = 1024
|
275
|
+
else:
|
276
|
+
config.put_inner_thoughts_in_kwargs = False
|
277
|
+
config.enable_reasoner = False
|
206
278
|
|
207
279
|
else:
|
208
280
|
config.enable_reasoner = True
|
@@ -210,7 +282,7 @@ class LLMConfig(BaseModel):
|
|
210
282
|
config.put_inner_thoughts_in_kwargs = False
|
211
283
|
if config.max_reasoning_tokens == 0:
|
212
284
|
config.max_reasoning_tokens = 1024
|
213
|
-
elif cls.is_google_vertex_reasoning_model(config):
|
285
|
+
elif cls.is_google_vertex_reasoning_model(config) or cls.is_google_ai_reasoning_model(config):
|
214
286
|
# Handle as non-reasoner until we support summary
|
215
287
|
config.put_inner_thoughts_in_kwargs = True
|
216
288
|
if config.max_reasoning_tokens == 0:
|
letta/schemas/mcp.py
CHANGED
@@ -81,29 +81,6 @@ class MCPServer(BaseMCPServer):
|
|
81
81
|
raise ValueError(f"Unsupported server type: {self.server_type}")
|
82
82
|
|
83
83
|
|
84
|
-
class RegisterSSEMCPServer(LettaBase):
|
85
|
-
server_name: str = Field(..., description="The name of the server")
|
86
|
-
server_type: MCPServerType = MCPServerType.SSE
|
87
|
-
server_url: str = Field(..., description="The URL of the server (MCP SSE client will connect to this URL)")
|
88
|
-
token: Optional[str] = Field(None, description="The access token or API key for the MCP server used for authentication")
|
89
|
-
custom_headers: Optional[Dict[str, str]] = Field(None, description="Custom authentication headers as key-value pairs")
|
90
|
-
|
91
|
-
|
92
|
-
class RegisterStdioMCPServer(LettaBase):
|
93
|
-
server_name: str = Field(..., description="The name of the server")
|
94
|
-
server_type: MCPServerType = MCPServerType.STDIO
|
95
|
-
stdio_config: StdioServerConfig = Field(..., description="The configuration for the server (MCP 'local' client will run this command)")
|
96
|
-
|
97
|
-
|
98
|
-
class RegisterStreamableHTTPMCPServer(LettaBase):
|
99
|
-
server_name: str = Field(..., description="The name of the server")
|
100
|
-
server_type: MCPServerType = MCPServerType.STREAMABLE_HTTP
|
101
|
-
server_url: str = Field(..., description="The URL path for the streamable HTTP server (e.g., 'example/mcp')")
|
102
|
-
auth_header: Optional[str] = Field(None, description="The name of the authentication header (e.g., 'Authorization')")
|
103
|
-
auth_token: Optional[str] = Field(None, description="The authentication token or API key value")
|
104
|
-
custom_headers: Optional[Dict[str, str]] = Field(None, description="Custom authentication headers as key-value pairs")
|
105
|
-
|
106
|
-
|
107
84
|
class UpdateSSEMCPServer(LettaBase):
|
108
85
|
"""Update an SSE MCP server"""
|
109
86
|
|
@@ -133,7 +110,6 @@ class UpdateStreamableHTTPMCPServer(LettaBase):
|
|
133
110
|
|
134
111
|
|
135
112
|
UpdateMCPServer = Union[UpdateSSEMCPServer, UpdateStdioMCPServer, UpdateStreamableHTTPMCPServer]
|
136
|
-
RegisterMCPServer = Union[RegisterSSEMCPServer, RegisterStdioMCPServer, RegisterStreamableHTTPMCPServer]
|
137
113
|
|
138
114
|
|
139
115
|
# OAuth-related schemas
|
letta/schemas/message.py
CHANGED
@@ -1051,114 +1051,6 @@ class Message(BaseMessage):
|
|
1051
1051
|
|
1052
1052
|
return google_ai_message
|
1053
1053
|
|
1054
|
-
def to_cohere_dict(
|
1055
|
-
self,
|
1056
|
-
function_call_role: Optional[str] = "SYSTEM",
|
1057
|
-
function_call_prefix: Optional[str] = "[CHATBOT called function]",
|
1058
|
-
function_response_role: Optional[str] = "SYSTEM",
|
1059
|
-
function_response_prefix: Optional[str] = "[CHATBOT function returned]",
|
1060
|
-
inner_thoughts_as_kwarg: Optional[bool] = False,
|
1061
|
-
) -> List[dict]:
|
1062
|
-
"""
|
1063
|
-
Cohere chat_history dicts only have 'role' and 'message' fields
|
1064
|
-
"""
|
1065
|
-
|
1066
|
-
# NOTE: returns a list of dicts so that we can convert:
|
1067
|
-
# assistant [cot]: "I'll send a message"
|
1068
|
-
# assistant [func]: send_message("hi")
|
1069
|
-
# tool: {'status': 'OK'}
|
1070
|
-
# to:
|
1071
|
-
# CHATBOT.text: "I'll send a message"
|
1072
|
-
# SYSTEM.text: [CHATBOT called function] send_message("hi")
|
1073
|
-
# SYSTEM.text: [CHATBOT function returned] {'status': 'OK'}
|
1074
|
-
|
1075
|
-
# TODO: update this prompt style once guidance from Cohere on
|
1076
|
-
# embedded function calls in multi-turn conversation become more clear
|
1077
|
-
if self.content and len(self.content) == 1 and isinstance(self.content[0], TextContent):
|
1078
|
-
text_content = self.content[0].text
|
1079
|
-
elif self.content and len(self.content) == 1 and isinstance(self.content[0], ToolReturnContent):
|
1080
|
-
text_content = self.content[0].content
|
1081
|
-
elif self.content and len(self.content) == 1 and isinstance(self.content[0], ImageContent):
|
1082
|
-
text_content = "[Image Here]"
|
1083
|
-
else:
|
1084
|
-
text_content = None
|
1085
|
-
if self.role == "system":
|
1086
|
-
"""
|
1087
|
-
The chat_history parameter should not be used for SYSTEM messages in most cases.
|
1088
|
-
Instead, to add a SYSTEM role message at the beginning of a conversation, the preamble parameter should be used.
|
1089
|
-
"""
|
1090
|
-
raise UserWarning(f"role 'system' messages should go in 'preamble' field for Cohere API")
|
1091
|
-
|
1092
|
-
elif self.role == "user":
|
1093
|
-
assert all([v is not None for v in [text_content, self.role]]), vars(self)
|
1094
|
-
cohere_message = [
|
1095
|
-
{
|
1096
|
-
"role": "USER",
|
1097
|
-
"message": text_content,
|
1098
|
-
}
|
1099
|
-
]
|
1100
|
-
|
1101
|
-
elif self.role == "assistant":
|
1102
|
-
# NOTE: we may break this into two message - an inner thought and a function call
|
1103
|
-
# Optionally, we could just make this a function call with the inner thought inside
|
1104
|
-
assert self.tool_calls is not None or text_content is not None
|
1105
|
-
|
1106
|
-
if text_content and self.tool_calls:
|
1107
|
-
if inner_thoughts_as_kwarg:
|
1108
|
-
raise NotImplementedError
|
1109
|
-
cohere_message = [
|
1110
|
-
{
|
1111
|
-
"role": "CHATBOT",
|
1112
|
-
"message": text_content,
|
1113
|
-
},
|
1114
|
-
]
|
1115
|
-
for tc in self.tool_calls:
|
1116
|
-
function_name = tc.function["name"]
|
1117
|
-
function_args = parse_json(tc.function["arguments"])
|
1118
|
-
function_args_str = ",".join([f"{k}={v}" for k, v in function_args.items()])
|
1119
|
-
function_call_text = f"{function_name}({function_args_str})"
|
1120
|
-
cohere_message.append(
|
1121
|
-
{
|
1122
|
-
"role": function_call_role,
|
1123
|
-
"message": f"{function_call_prefix} {function_call_text}",
|
1124
|
-
}
|
1125
|
-
)
|
1126
|
-
elif not text_content and self.tool_calls:
|
1127
|
-
cohere_message = []
|
1128
|
-
for tc in self.tool_calls:
|
1129
|
-
# TODO better way to pack?
|
1130
|
-
function_call_text = json_dumps(tc.to_dict())
|
1131
|
-
cohere_message.append(
|
1132
|
-
{
|
1133
|
-
"role": function_call_role,
|
1134
|
-
"message": f"{function_call_prefix} {function_call_text}",
|
1135
|
-
}
|
1136
|
-
)
|
1137
|
-
elif text_content and not self.tool_calls:
|
1138
|
-
cohere_message = [
|
1139
|
-
{
|
1140
|
-
"role": "CHATBOT",
|
1141
|
-
"message": text_content,
|
1142
|
-
}
|
1143
|
-
]
|
1144
|
-
else:
|
1145
|
-
raise ValueError("Message does not have content nor tool_calls")
|
1146
|
-
|
1147
|
-
elif self.role == "tool":
|
1148
|
-
assert all([v is not None for v in [self.role, self.tool_call_id]]), vars(self)
|
1149
|
-
function_response_text = text_content
|
1150
|
-
cohere_message = [
|
1151
|
-
{
|
1152
|
-
"role": function_response_role,
|
1153
|
-
"message": f"{function_response_prefix} {function_response_text}",
|
1154
|
-
}
|
1155
|
-
]
|
1156
|
-
|
1157
|
-
else:
|
1158
|
-
raise ValueError(self.role)
|
1159
|
-
|
1160
|
-
return cohere_message
|
1161
|
-
|
1162
1054
|
@staticmethod
|
1163
1055
|
def generate_otid_from_id(message_id: str, index: int) -> str:
|
1164
1056
|
"""
|
@@ -135,6 +135,7 @@ class ChatCompletionRequest(BaseModel):
|
|
135
135
|
user: Optional[str] = None # unique ID of the end-user (for monitoring)
|
136
136
|
parallel_tool_calls: Optional[bool] = None
|
137
137
|
instructions: Optional[str] = None
|
138
|
+
verbosity: Optional[Literal["low", "medium", "high"]] = None # For verbosity control in GPT-5 models
|
138
139
|
|
139
140
|
# function-calling related
|
140
141
|
tools: Optional[List[Tool]] = None
|
@@ -5,7 +5,6 @@ from .azure import AzureProvider
|
|
5
5
|
from .base import Provider, ProviderBase, ProviderCheck, ProviderCreate, ProviderUpdate
|
6
6
|
from .bedrock import BedrockProvider
|
7
7
|
from .cerebras import CerebrasProvider
|
8
|
-
from .cohere import CohereProvider
|
9
8
|
from .deepseek import DeepSeekProvider
|
10
9
|
from .google_gemini import GoogleAIProvider
|
11
10
|
from .google_vertex import GoogleVertexProvider
|
@@ -31,7 +30,6 @@ __all__ = [
|
|
31
30
|
"AzureProvider",
|
32
31
|
"BedrockProvider",
|
33
32
|
"CerebrasProvider", # NEW
|
34
|
-
"CohereProvider",
|
35
33
|
"DeepSeekProvider",
|
36
34
|
"GoogleAIProvider",
|
37
35
|
"GoogleVertexProvider",
|
@@ -1,12 +1,90 @@
|
|
1
1
|
import warnings
|
2
2
|
from typing import Literal
|
3
3
|
|
4
|
+
import anthropic
|
4
5
|
from pydantic import Field
|
5
6
|
|
6
7
|
from letta.schemas.enums import ProviderCategory, ProviderType
|
7
8
|
from letta.schemas.llm_config import LLMConfig
|
8
9
|
from letta.schemas.providers.base import Provider
|
9
10
|
|
11
|
+
# https://docs.anthropic.com/claude/docs/models-overview
|
12
|
+
# Sadly hardcoded
|
13
|
+
MODEL_LIST = [
|
14
|
+
## Opus 4.1
|
15
|
+
{
|
16
|
+
"name": "claude-opus-4-1-20250805",
|
17
|
+
"context_window": 200000,
|
18
|
+
},
|
19
|
+
## Opus 3
|
20
|
+
{
|
21
|
+
"name": "claude-3-opus-20240229",
|
22
|
+
"context_window": 200000,
|
23
|
+
},
|
24
|
+
# 3 latest
|
25
|
+
{
|
26
|
+
"name": "claude-3-opus-latest",
|
27
|
+
"context_window": 200000,
|
28
|
+
},
|
29
|
+
# 4
|
30
|
+
{
|
31
|
+
"name": "claude-opus-4-20250514",
|
32
|
+
"context_window": 200000,
|
33
|
+
},
|
34
|
+
## Sonnet
|
35
|
+
# 3.0
|
36
|
+
{
|
37
|
+
"name": "claude-3-sonnet-20240229",
|
38
|
+
"context_window": 200000,
|
39
|
+
},
|
40
|
+
# 3.5
|
41
|
+
{
|
42
|
+
"name": "claude-3-5-sonnet-20240620",
|
43
|
+
"context_window": 200000,
|
44
|
+
},
|
45
|
+
# 3.5 new
|
46
|
+
{
|
47
|
+
"name": "claude-3-5-sonnet-20241022",
|
48
|
+
"context_window": 200000,
|
49
|
+
},
|
50
|
+
# 3.5 latest
|
51
|
+
{
|
52
|
+
"name": "claude-3-5-sonnet-latest",
|
53
|
+
"context_window": 200000,
|
54
|
+
},
|
55
|
+
# 3.7
|
56
|
+
{
|
57
|
+
"name": "claude-3-7-sonnet-20250219",
|
58
|
+
"context_window": 200000,
|
59
|
+
},
|
60
|
+
# 3.7 latest
|
61
|
+
{
|
62
|
+
"name": "claude-3-7-sonnet-latest",
|
63
|
+
"context_window": 200000,
|
64
|
+
},
|
65
|
+
# 4
|
66
|
+
{
|
67
|
+
"name": "claude-sonnet-4-20250514",
|
68
|
+
"context_window": 200000,
|
69
|
+
},
|
70
|
+
## Haiku
|
71
|
+
# 3.0
|
72
|
+
{
|
73
|
+
"name": "claude-3-haiku-20240307",
|
74
|
+
"context_window": 200000,
|
75
|
+
},
|
76
|
+
# 3.5
|
77
|
+
{
|
78
|
+
"name": "claude-3-5-haiku-20241022",
|
79
|
+
"context_window": 200000,
|
80
|
+
},
|
81
|
+
# 3.5 latest
|
82
|
+
{
|
83
|
+
"name": "claude-3-5-haiku-latest",
|
84
|
+
"context_window": 200000,
|
85
|
+
},
|
86
|
+
]
|
87
|
+
|
10
88
|
|
11
89
|
class AnthropicProvider(Provider):
|
12
90
|
provider_type: Literal[ProviderType.anthropic] = Field(ProviderType.anthropic, description="The type of the provider.")
|
@@ -15,19 +93,39 @@ class AnthropicProvider(Provider):
|
|
15
93
|
base_url: str = "https://api.anthropic.com/v1"
|
16
94
|
|
17
95
|
async def check_api_key(self):
|
18
|
-
|
19
|
-
|
20
|
-
|
96
|
+
if self.api_key:
|
97
|
+
anthropic_client = anthropic.Anthropic(api_key=self.api_key)
|
98
|
+
try:
|
99
|
+
# just use a cheap model to count some tokens - as of 5/7/2025 this is faster than fetching the list of models
|
100
|
+
anthropic_client.messages.count_tokens(model=MODEL_LIST[-1]["name"], messages=[{"role": "user", "content": "a"}])
|
101
|
+
except anthropic.AuthenticationError as e:
|
102
|
+
raise LLMAuthenticationError(message=f"Failed to authenticate with Anthropic: {e}", code=ErrorCode.UNAUTHENTICATED)
|
103
|
+
except Exception as e:
|
104
|
+
raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
|
105
|
+
else:
|
106
|
+
raise ValueError("No API key provided")
|
21
107
|
|
22
108
|
async def list_llm_models_async(self) -> list[LLMConfig]:
|
23
|
-
|
109
|
+
"""
|
110
|
+
https://docs.anthropic.com/claude/docs/models-overview
|
24
111
|
|
25
|
-
models
|
26
|
-
|
112
|
+
NOTE: currently there is no GET /models, so we need to hardcode
|
113
|
+
"""
|
114
|
+
if self.api_key:
|
115
|
+
anthropic_client = anthropic.AsyncAnthropic(api_key=self.api_key)
|
116
|
+
elif model_settings.anthropic_api_key:
|
117
|
+
anthropic_client = anthropic.AsyncAnthropic()
|
118
|
+
else:
|
119
|
+
raise ValueError("No API key provided")
|
27
120
|
|
28
|
-
|
29
|
-
|
121
|
+
models = await anthropic_client.models.list()
|
122
|
+
models_json = models.model_dump()
|
123
|
+
assert "data" in models_json, f"Anthropic model query response missing 'data' field: {models_json}"
|
124
|
+
models_data = models_json["data"]
|
30
125
|
|
126
|
+
return self._list_llm_models(models_data)
|
127
|
+
|
128
|
+
def _list_llm_models(self, models) -> list[LLMConfig]:
|
31
129
|
configs = []
|
32
130
|
for model in models:
|
33
131
|
if any((model.get("type") != "model", "id" not in model, model.get("id").startswith("claude-2"))):
|