letta-nightly 0.8.17.dev20250723104501__py3-none-any.whl → 0.9.0.dev20250724081419__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. letta/__init__.py +5 -3
  2. letta/agent.py +3 -2
  3. letta/agents/base_agent.py +4 -1
  4. letta/agents/voice_agent.py +1 -0
  5. letta/constants.py +4 -2
  6. letta/functions/schema_generator.py +2 -1
  7. letta/groups/dynamic_multi_agent.py +1 -0
  8. letta/helpers/converters.py +13 -5
  9. letta/helpers/json_helpers.py +6 -1
  10. letta/llm_api/anthropic.py +2 -2
  11. letta/llm_api/aws_bedrock.py +24 -94
  12. letta/llm_api/deepseek.py +1 -1
  13. letta/llm_api/google_ai_client.py +0 -38
  14. letta/llm_api/google_constants.py +6 -3
  15. letta/llm_api/helpers.py +1 -1
  16. letta/llm_api/llm_api_tools.py +4 -7
  17. letta/llm_api/mistral.py +12 -37
  18. letta/llm_api/openai.py +17 -17
  19. letta/llm_api/sample_response_jsons/aws_bedrock.json +38 -0
  20. letta/llm_api/sample_response_jsons/lmstudio_embedding_list.json +15 -0
  21. letta/llm_api/sample_response_jsons/lmstudio_model_list.json +15 -0
  22. letta/local_llm/constants.py +2 -23
  23. letta/local_llm/json_parser.py +11 -1
  24. letta/local_llm/llm_chat_completion_wrappers/airoboros.py +9 -9
  25. letta/local_llm/llm_chat_completion_wrappers/chatml.py +7 -8
  26. letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +6 -6
  27. letta/local_llm/llm_chat_completion_wrappers/dolphin.py +3 -3
  28. letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +1 -1
  29. letta/local_llm/ollama/api.py +2 -2
  30. letta/orm/__init__.py +1 -0
  31. letta/orm/agent.py +33 -2
  32. letta/orm/files_agents.py +13 -10
  33. letta/orm/mixins.py +8 -0
  34. letta/orm/prompt.py +13 -0
  35. letta/orm/sqlite_functions.py +61 -17
  36. letta/otel/db_pool_monitoring.py +13 -12
  37. letta/schemas/agent.py +69 -4
  38. letta/schemas/agent_file.py +2 -0
  39. letta/schemas/block.py +11 -0
  40. letta/schemas/embedding_config.py +15 -3
  41. letta/schemas/enums.py +2 -0
  42. letta/schemas/file.py +1 -1
  43. letta/schemas/folder.py +74 -0
  44. letta/schemas/memory.py +12 -6
  45. letta/schemas/prompt.py +9 -0
  46. letta/schemas/providers/__init__.py +47 -0
  47. letta/schemas/providers/anthropic.py +78 -0
  48. letta/schemas/providers/azure.py +80 -0
  49. letta/schemas/providers/base.py +201 -0
  50. letta/schemas/providers/bedrock.py +78 -0
  51. letta/schemas/providers/cerebras.py +79 -0
  52. letta/schemas/providers/cohere.py +18 -0
  53. letta/schemas/providers/deepseek.py +63 -0
  54. letta/schemas/providers/google_gemini.py +102 -0
  55. letta/schemas/providers/google_vertex.py +54 -0
  56. letta/schemas/providers/groq.py +35 -0
  57. letta/schemas/providers/letta.py +39 -0
  58. letta/schemas/providers/lmstudio.py +97 -0
  59. letta/schemas/providers/mistral.py +41 -0
  60. letta/schemas/providers/ollama.py +151 -0
  61. letta/schemas/providers/openai.py +241 -0
  62. letta/schemas/providers/together.py +85 -0
  63. letta/schemas/providers/vllm.py +57 -0
  64. letta/schemas/providers/xai.py +66 -0
  65. letta/server/db.py +0 -5
  66. letta/server/rest_api/app.py +4 -3
  67. letta/server/rest_api/routers/v1/__init__.py +2 -0
  68. letta/server/rest_api/routers/v1/agents.py +152 -4
  69. letta/server/rest_api/routers/v1/folders.py +490 -0
  70. letta/server/rest_api/routers/v1/providers.py +2 -2
  71. letta/server/rest_api/routers/v1/sources.py +21 -26
  72. letta/server/rest_api/routers/v1/tools.py +90 -15
  73. letta/server/server.py +50 -95
  74. letta/services/agent_manager.py +420 -81
  75. letta/services/agent_serialization_manager.py +707 -0
  76. letta/services/block_manager.py +132 -11
  77. letta/services/file_manager.py +104 -29
  78. letta/services/file_processor/embedder/pinecone_embedder.py +8 -2
  79. letta/services/file_processor/file_processor.py +75 -24
  80. letta/services/file_processor/parser/markitdown_parser.py +95 -0
  81. letta/services/files_agents_manager.py +57 -17
  82. letta/services/group_manager.py +7 -0
  83. letta/services/helpers/agent_manager_helper.py +25 -15
  84. letta/services/provider_manager.py +2 -2
  85. letta/services/source_manager.py +35 -16
  86. letta/services/tool_executor/files_tool_executor.py +12 -5
  87. letta/services/tool_manager.py +12 -0
  88. letta/services/tool_sandbox/e2b_sandbox.py +52 -48
  89. letta/settings.py +9 -6
  90. letta/streaming_utils.py +2 -1
  91. letta/utils.py +34 -1
  92. {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/METADATA +9 -8
  93. {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/RECORD +96 -68
  94. {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/LICENSE +0 -0
  95. {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/WHEEL +0 -0
  96. {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,78 @@
1
+ import warnings
2
+ from typing import Literal
3
+
4
+ from pydantic import Field
5
+
6
+ from letta.schemas.enums import ProviderCategory, ProviderType
7
+ from letta.schemas.llm_config import LLMConfig
8
+ from letta.schemas.providers.base import Provider
9
+
10
+
11
+ class AnthropicProvider(Provider):
12
+ provider_type: Literal[ProviderType.anthropic] = Field(ProviderType.anthropic, description="The type of the provider.")
13
+ provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
14
+ api_key: str = Field(..., description="API key for the Anthropic API.")
15
+ base_url: str = "https://api.anthropic.com/v1"
16
+
17
+ async def check_api_key(self):
18
+ from letta.llm_api.anthropic import anthropic_check_valid_api_key
19
+
20
+ anthropic_check_valid_api_key(self.api_key)
21
+
22
+ async def list_llm_models_async(self) -> list[LLMConfig]:
23
+ from letta.llm_api.anthropic import anthropic_get_model_list_async
24
+
25
+ models = await anthropic_get_model_list_async(api_key=self.api_key)
26
+ return self._list_llm_models(models)
27
+
28
+ def _list_llm_models(self, models) -> list[LLMConfig]:
29
+ from letta.llm_api.anthropic import MODEL_LIST
30
+
31
+ configs = []
32
+ for model in models:
33
+ if any((model.get("type") != "model", "id" not in model, model.get("id").startswith("claude-2"))):
34
+ continue
35
+
36
+ # Anthropic doesn't return the context window in their API
37
+ if "context_window" not in model:
38
+ # Remap list to name: context_window
39
+ model_library = {m["name"]: m["context_window"] for m in MODEL_LIST}
40
+ # Attempt to look it up in a hardcoded list
41
+ if model["id"] in model_library:
42
+ model["context_window"] = model_library[model["id"]]
43
+ else:
44
+ # On fallback, we can set 200k (generally safe), but we should warn the user
45
+ warnings.warn(f"Couldn't find context window size for model {model['id']}, defaulting to 200,000")
46
+ model["context_window"] = 200000
47
+
48
+ max_tokens = 8192
49
+ if "claude-3-opus" in model["id"]:
50
+ max_tokens = 4096
51
+ if "claude-3-haiku" in model["id"]:
52
+ max_tokens = 4096
53
+ # TODO: set for 3-7 extended thinking mode
54
+
55
+ # NOTE: from 2025-02
56
+ # We set this to false by default, because Anthropic can
57
+ # natively support <thinking> tags inside of content fields
58
+ # However, putting COT inside of tool calls can make it more
59
+ # reliable for tool calling (no chance of a non-tool call step)
60
+ # Since tool_choice_type 'any' doesn't work with in-content COT
61
+ # NOTE For Haiku, it can be flaky if we don't enable this by default
62
+ # inner_thoughts_in_kwargs = True if "haiku" in model["id"] else False
63
+ inner_thoughts_in_kwargs = True # we no longer support thinking tags
64
+
65
+ configs.append(
66
+ LLMConfig(
67
+ model=model["id"],
68
+ model_endpoint_type="anthropic",
69
+ model_endpoint=self.base_url,
70
+ context_window=model["context_window"],
71
+ handle=self.get_handle(model["id"]),
72
+ put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs,
73
+ max_tokens=max_tokens,
74
+ provider_name=self.name,
75
+ provider_category=self.provider_category,
76
+ )
77
+ )
78
+ return configs
@@ -0,0 +1,80 @@
1
+ from typing import ClassVar, Literal
2
+
3
+ from pydantic import Field, field_validator
4
+
5
+ from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_TOKENS
6
+ from letta.llm_api.azure_openai import get_azure_chat_completions_endpoint, get_azure_embeddings_endpoint
7
+ from letta.llm_api.azure_openai_constants import AZURE_MODEL_TO_CONTEXT_LENGTH
8
+ from letta.schemas.embedding_config import EmbeddingConfig
9
+ from letta.schemas.enums import ProviderCategory, ProviderType
10
+ from letta.schemas.llm_config import LLMConfig
11
+ from letta.schemas.providers.base import Provider
12
+
13
+
14
+ class AzureProvider(Provider):
15
+ LATEST_API_VERSION: ClassVar[str] = "2024-09-01-preview"
16
+
17
+ provider_type: Literal[ProviderType.azure] = Field(ProviderType.azure, description="The type of the provider.")
18
+ provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
19
+ # Note: 2024-09-01-preview was set here until 2025-07-16.
20
+ # set manually, see: https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
21
+ latest_api_version: str = "2025-04-01-preview"
22
+ base_url: str = Field(
23
+ ..., description="Base URL for the Azure API endpoint. This should be specific to your org, e.g. `https://letta.openai.azure.com`."
24
+ )
25
+ api_key: str = Field(..., description="API key for the Azure API.")
26
+ api_version: str = Field(default=LATEST_API_VERSION, description="API version for the Azure API")
27
+
28
+ @field_validator("api_version", mode="before")
29
+ def replace_none_with_default(cls, v):
30
+ return v if v is not None else cls.LATEST_API_VERSION
31
+
32
+ async def list_llm_models_async(self) -> list[LLMConfig]:
33
+ # TODO (cliandy): asyncify
34
+ from letta.llm_api.azure_openai import azure_openai_get_chat_completion_model_list
35
+
36
+ model_options = azure_openai_get_chat_completion_model_list(self.base_url, api_key=self.api_key, api_version=self.api_version)
37
+ configs = []
38
+ for model_option in model_options:
39
+ model_name = model_option["id"]
40
+ context_window_size = self.get_model_context_window(model_name)
41
+ model_endpoint = get_azure_chat_completions_endpoint(self.base_url, model_name, self.api_version)
42
+ configs.append(
43
+ LLMConfig(
44
+ model=model_name,
45
+ model_endpoint_type="azure",
46
+ model_endpoint=model_endpoint,
47
+ context_window=context_window_size,
48
+ handle=self.get_handle(model_name),
49
+ provider_name=self.name,
50
+ provider_category=self.provider_category,
51
+ )
52
+ )
53
+ return configs
54
+
55
+ async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
56
+ # TODO (cliandy): asyncify dependent function calls
57
+ from letta.llm_api.azure_openai import azure_openai_get_embeddings_model_list
58
+
59
+ model_options = azure_openai_get_embeddings_model_list(self.base_url, api_key=self.api_key, api_version=self.api_version)
60
+ configs = []
61
+ for model_option in model_options:
62
+ model_name = model_option["id"]
63
+ model_endpoint = get_azure_embeddings_endpoint(self.base_url, model_name, self.api_version)
64
+ configs.append(
65
+ EmbeddingConfig(
66
+ embedding_model=model_name,
67
+ embedding_endpoint_type="azure",
68
+ embedding_endpoint=model_endpoint,
69
+ embedding_dim=768, # TODO generated 1536?
70
+ embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE, # old note: max is 2048
71
+ handle=self.get_handle(model_name, is_embedding=True),
72
+ batch_size=1024,
73
+ )
74
+ )
75
+ return configs
76
+
77
+ def get_model_context_window(self, model_name: str) -> int | None:
78
+ # Hard coded as there are no API endpoints for this
79
+ llm_default = LLM_MAX_TOKENS.get(model_name, 4096)
80
+ return AZURE_MODEL_TO_CONTEXT_LENGTH.get(model_name, llm_default)
@@ -0,0 +1,201 @@
1
+ from datetime import datetime
2
+
3
+ from pydantic import BaseModel, Field, model_validator
4
+
5
+ from letta.schemas.embedding_config import EmbeddingConfig
6
+ from letta.schemas.embedding_config_overrides import EMBEDDING_HANDLE_OVERRIDES
7
+ from letta.schemas.enums import ProviderCategory, ProviderType
8
+ from letta.schemas.letta_base import LettaBase
9
+ from letta.schemas.llm_config import LLMConfig
10
+ from letta.schemas.llm_config_overrides import LLM_HANDLE_OVERRIDES
11
+ from letta.settings import model_settings
12
+
13
+
14
+ class ProviderBase(LettaBase):
15
+ __id_prefix__ = "provider"
16
+
17
+
18
+ class Provider(ProviderBase):
19
+ id: str | None = Field(None, description="The id of the provider, lazily created by the database manager.")
20
+ name: str = Field(..., description="The name of the provider")
21
+ provider_type: ProviderType = Field(..., description="The type of the provider")
22
+ provider_category: ProviderCategory = Field(..., description="The category of the provider (base or byok)")
23
+ api_key: str | None = Field(None, description="API key or secret key used for requests to the provider.")
24
+ base_url: str | None = Field(None, description="Base URL for the provider.")
25
+ access_key: str | None = Field(None, description="Access key used for requests to the provider.")
26
+ region: str | None = Field(None, description="Region used for requests to the provider.")
27
+ organization_id: str | None = Field(None, description="The organization id of the user")
28
+ updated_at: datetime | None = Field(None, description="The last update timestamp of the provider.")
29
+
30
+ @model_validator(mode="after")
31
+ def default_base_url(self):
32
+ if self.provider_type == ProviderType.openai and self.base_url is None:
33
+ self.base_url = model_settings.openai_api_base
34
+ return self
35
+
36
+ def resolve_identifier(self):
37
+ if not self.id:
38
+ self.id = ProviderBase.generate_id(prefix=ProviderBase.__id_prefix__)
39
+
40
+ async def check_api_key(self):
41
+ """Check if the API key is valid for the provider"""
42
+ raise NotImplementedError
43
+
44
+ def list_llm_models(self) -> list[LLMConfig]:
45
+ """List available LLM models (deprecated: use list_llm_models_async)"""
46
+ import asyncio
47
+ import warnings
48
+
49
+ warnings.warn("list_llm_models is deprecated, use list_llm_models_async instead", DeprecationWarning, stacklevel=2)
50
+
51
+ # Simplified asyncio handling - just use asyncio.run()
52
+ # This works in most contexts and avoids complex event loop detection
53
+ try:
54
+ return asyncio.run(self.list_llm_models_async())
55
+ except RuntimeError as e:
56
+ # If we're in an active event loop context, use a thread pool
57
+ if "cannot be called from a running event loop" in str(e):
58
+ import concurrent.futures
59
+
60
+ with concurrent.futures.ThreadPoolExecutor() as executor:
61
+ future = executor.submit(asyncio.run, self.list_llm_models_async())
62
+ return future.result()
63
+ else:
64
+ raise
65
+
66
+ async def list_llm_models_async(self) -> list[LLMConfig]:
67
+ return []
68
+
69
+ def list_embedding_models(self) -> list[EmbeddingConfig]:
70
+ """List available embedding models (deprecated: use list_embedding_models_async)"""
71
+ import asyncio
72
+ import warnings
73
+
74
+ warnings.warn("list_embedding_models is deprecated, use list_embedding_models_async instead", DeprecationWarning, stacklevel=2)
75
+
76
+ # Simplified asyncio handling - just use asyncio.run()
77
+ # This works in most contexts and avoids complex event loop detection
78
+ try:
79
+ return asyncio.run(self.list_embedding_models_async())
80
+ except RuntimeError as e:
81
+ # If we're in an active event loop context, use a thread pool
82
+ if "cannot be called from a running event loop" in str(e):
83
+ import concurrent.futures
84
+
85
+ with concurrent.futures.ThreadPoolExecutor() as executor:
86
+ future = executor.submit(asyncio.run, self.list_embedding_models_async())
87
+ return future.result()
88
+ else:
89
+ raise
90
+
91
+ async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
92
+ """List available embedding models. The following do not have support for embedding models:
93
+ Anthropic, Bedrock, Cerebras, Deepseek, Groq, Mistral, xAI
94
+ """
95
+ return []
96
+
97
+ def get_model_context_window(self, model_name: str) -> int | None:
98
+ raise NotImplementedError
99
+
100
+ async def get_model_context_window_async(self, model_name: str) -> int | None:
101
+ raise NotImplementedError
102
+
103
+ def get_handle(self, model_name: str, is_embedding: bool = False, base_name: str | None = None) -> str:
104
+ """
105
+ Get the handle for a model, with support for custom overrides.
106
+
107
+ Args:
108
+ model_name (str): The name of the model.
109
+ is_embedding (bool, optional): Whether the handle is for an embedding model. Defaults to False.
110
+
111
+ Returns:
112
+ str: The handle for the model.
113
+ """
114
+ base_name = base_name if base_name else self.name
115
+
116
+ overrides = EMBEDDING_HANDLE_OVERRIDES if is_embedding else LLM_HANDLE_OVERRIDES
117
+ if base_name in overrides and model_name in overrides[base_name]:
118
+ model_name = overrides[base_name][model_name]
119
+
120
+ return f"{base_name}/{model_name}"
121
+
122
+ def cast_to_subtype(self):
123
+ # Import here to avoid circular imports
124
+ from letta.schemas.providers import (
125
+ AnthropicProvider,
126
+ AzureProvider,
127
+ BedrockProvider,
128
+ CerebrasProvider,
129
+ CohereProvider,
130
+ DeepSeekProvider,
131
+ GoogleAIProvider,
132
+ GoogleVertexProvider,
133
+ GroqProvider,
134
+ LettaProvider,
135
+ LMStudioOpenAIProvider,
136
+ MistralProvider,
137
+ OllamaProvider,
138
+ OpenAIProvider,
139
+ TogetherProvider,
140
+ VLLMProvider,
141
+ XAIProvider,
142
+ )
143
+
144
+ match self.provider_type:
145
+ case ProviderType.letta:
146
+ return LettaProvider(**self.model_dump(exclude_none=True))
147
+ case ProviderType.openai:
148
+ return OpenAIProvider(**self.model_dump(exclude_none=True))
149
+ case ProviderType.anthropic:
150
+ return AnthropicProvider(**self.model_dump(exclude_none=True))
151
+ case ProviderType.google_ai:
152
+ return GoogleAIProvider(**self.model_dump(exclude_none=True))
153
+ case ProviderType.google_vertex:
154
+ return GoogleVertexProvider(**self.model_dump(exclude_none=True))
155
+ case ProviderType.azure:
156
+ return AzureProvider(**self.model_dump(exclude_none=True))
157
+ case ProviderType.groq:
158
+ return GroqProvider(**self.model_dump(exclude_none=True))
159
+ case ProviderType.together:
160
+ return TogetherProvider(**self.model_dump(exclude_none=True))
161
+ case ProviderType.ollama:
162
+ return OllamaProvider(**self.model_dump(exclude_none=True))
163
+ case ProviderType.vllm:
164
+ return VLLMProvider(**self.model_dump(exclude_none=True)) # Removed support for CompletionsProvider
165
+ case ProviderType.mistral:
166
+ return MistralProvider(**self.model_dump(exclude_none=True))
167
+ case ProviderType.deepseek:
168
+ return DeepSeekProvider(**self.model_dump(exclude_none=True))
169
+ case ProviderType.cerebras:
170
+ return CerebrasProvider(**self.model_dump(exclude_none=True))
171
+ case ProviderType.xai:
172
+ return XAIProvider(**self.model_dump(exclude_none=True))
173
+ case ProviderType.lmstudio_openai:
174
+ return LMStudioOpenAIProvider(**self.model_dump(exclude_none=True))
175
+ case ProviderType.bedrock:
176
+ return BedrockProvider(**self.model_dump(exclude_none=True))
177
+ case ProviderType.cohere:
178
+ return CohereProvider(**self.model_dump(exclude_none=True))
179
+ case _:
180
+ raise ValueError(f"Unknown provider type: {self.provider_type}")
181
+
182
+
183
+ class ProviderCreate(ProviderBase):
184
+ name: str = Field(..., description="The name of the provider.")
185
+ provider_type: ProviderType = Field(..., description="The type of the provider.")
186
+ api_key: str = Field(..., description="API key or secret key used for requests to the provider.")
187
+ access_key: str | None = Field(None, description="Access key used for requests to the provider.")
188
+ region: str | None = Field(None, description="Region used for requests to the provider.")
189
+
190
+
191
+ class ProviderUpdate(ProviderBase):
192
+ api_key: str = Field(..., description="API key or secret key used for requests to the provider.")
193
+ access_key: str | None = Field(None, description="Access key used for requests to the provider.")
194
+ region: str | None = Field(None, description="Region used for requests to the provider.")
195
+
196
+
197
+ class ProviderCheck(BaseModel):
198
+ provider_type: ProviderType = Field(..., description="The type of the provider.")
199
+ api_key: str = Field(..., description="API key or secret key used for requests to the provider.")
200
+ access_key: str | None = Field(None, description="Access key used for requests to the provider.")
201
+ region: str | None = Field(None, description="Region used for requests to the provider.")
@@ -0,0 +1,78 @@
1
+ """
2
+ Note that this formally only supports Anthropic Bedrock.
3
+ TODO (cliandy): determine what other providers are supported and what is needed to add support.
4
+ """
5
+
6
+ from typing import Literal
7
+
8
+ from pydantic import Field
9
+
10
+ from letta.log import get_logger
11
+ from letta.schemas.enums import ProviderCategory, ProviderType
12
+ from letta.schemas.llm_config import LLMConfig
13
+ from letta.schemas.providers.base import Provider
14
+
15
+ logger = get_logger(__name__)
16
+
17
+
18
+ class BedrockProvider(Provider):
19
+ provider_type: Literal[ProviderType.bedrock] = Field(ProviderType.bedrock, description="The type of the provider.")
20
+ provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
21
+ region: str = Field(..., description="AWS region for Bedrock")
22
+
23
+ async def check_api_key(self):
24
+ """Check if the Bedrock credentials are valid"""
25
+ from letta.errors import LLMAuthenticationError
26
+ from letta.llm_api.aws_bedrock import bedrock_get_model_list_async
27
+
28
+ try:
29
+ # For BYOK providers, use the custom credentials
30
+ if self.provider_category == ProviderCategory.byok:
31
+ # If we can list models, the credentials are valid
32
+ await bedrock_get_model_list_async(
33
+ access_key_id=self.access_key,
34
+ secret_access_key=self.api_key, # api_key stores the secret access key
35
+ region_name=self.region,
36
+ )
37
+ else:
38
+ # For base providers, use default credentials
39
+ bedrock_get_model_list(region_name=self.region)
40
+ except Exception as e:
41
+ raise LLMAuthenticationError(message=f"Failed to authenticate with Bedrock: {e}")
42
+
43
+ async def list_llm_models_async(self) -> list[LLMConfig]:
44
+ from letta.llm_api.aws_bedrock import bedrock_get_model_list_async
45
+
46
+ models = await bedrock_get_model_list_async(
47
+ self.access_key,
48
+ self.api_key,
49
+ self.region,
50
+ )
51
+
52
+ configs = []
53
+ for model_summary in models:
54
+ model_arn = model_summary["inferenceProfileArn"]
55
+ configs.append(
56
+ LLMConfig(
57
+ model=model_arn,
58
+ model_endpoint_type=self.provider_type.value,
59
+ model_endpoint=None,
60
+ context_window=self.get_model_context_window(model_arn),
61
+ handle=self.get_handle(model_arn),
62
+ provider_name=self.name,
63
+ provider_category=self.provider_category,
64
+ )
65
+ )
66
+
67
+ return configs
68
+
69
+ def get_model_context_window(self, model_name: str) -> int | None:
70
+ # Context windows for Claude models
71
+ from letta.llm_api.aws_bedrock import bedrock_get_model_context_window
72
+
73
+ return bedrock_get_model_context_window(model_name)
74
+
75
+ def get_handle(self, model_name: str, is_embedding: bool = False, base_name: str | None = None) -> str:
76
+ logger.debug("Getting handle for model_name: %s", model_name)
77
+ model = model_name.split(".")[-1]
78
+ return f"{self.name}/{model}"
@@ -0,0 +1,79 @@
1
+ import warnings
2
+ from typing import Literal
3
+
4
+ from pydantic import Field
5
+
6
+ from letta.schemas.enums import ProviderCategory, ProviderType
7
+ from letta.schemas.llm_config import LLMConfig
8
+ from letta.schemas.providers.openai import OpenAIProvider
9
+
10
+
11
+ class CerebrasProvider(OpenAIProvider):
12
+ """
13
+ Cerebras Inference API is OpenAI-compatible and focuses on ultra-fast inference.
14
+
15
+ Available Models (as of 2025):
16
+ - llama-4-scout-17b-16e-instruct: Llama 4 Scout (109B params, 10M context, ~2600 tokens/s)
17
+ - llama3.1-8b: Llama 3.1 8B (8B params, 128K context, ~2200 tokens/s)
18
+ - llama-3.3-70b: Llama 3.3 70B (70B params, 128K context, ~2100 tokens/s)
19
+ - qwen-3-32b: Qwen 3 32B (32B params, 131K context, ~2100 tokens/s)
20
+ - deepseek-r1-distill-llama-70b: DeepSeek R1 Distill (70B params, 128K context, ~1700 tokens/s)
21
+ """
22
+
23
+ provider_type: Literal[ProviderType.cerebras] = Field(ProviderType.cerebras, description="The type of the provider.")
24
+ provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
25
+ base_url: str = Field("https://api.cerebras.ai/v1", description="Base URL for the Cerebras API.")
26
+ api_key: str = Field(..., description="API key for the Cerebras API.")
27
+
28
+ def get_model_context_window_size(self, model_name: str) -> int | None:
29
+ """Cerebras has limited context window sizes.
30
+
31
+ see https://inference-docs.cerebras.ai/support/pricing for details by plan
32
+ """
33
+ is_free_tier = True
34
+ if is_free_tier:
35
+ return 8192
36
+ return 128000
37
+
38
+ async def list_llm_models_async(self) -> list[LLMConfig]:
39
+ from letta.llm_api.openai import openai_get_model_list_async
40
+
41
+ response = await openai_get_model_list_async(self.base_url, api_key=self.api_key)
42
+
43
+ if "data" in response:
44
+ data = response["data"]
45
+ else:
46
+ data = response
47
+
48
+ configs = []
49
+ for model in data:
50
+ assert "id" in model, f"Cerebras model missing 'id' field: {model}"
51
+ model_name = model["id"]
52
+
53
+ # Check if model has context_length in response
54
+ if "context_length" in model:
55
+ context_window_size = model["context_length"]
56
+ else:
57
+ context_window_size = self.get_model_context_window_size(model_name)
58
+
59
+ if not context_window_size:
60
+ warnings.warn(f"Couldn't find context window size for model {model_name}")
61
+ continue
62
+
63
+ # Cerebras supports function calling
64
+ put_inner_thoughts_in_kwargs = True
65
+
66
+ configs.append(
67
+ LLMConfig(
68
+ model=model_name,
69
+ model_endpoint_type="openai", # Cerebras uses OpenAI-compatible endpoint
70
+ model_endpoint=self.base_url,
71
+ context_window=context_window_size,
72
+ handle=self.get_handle(model_name),
73
+ put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
74
+ provider_name=self.name,
75
+ provider_category=self.provider_category,
76
+ )
77
+ )
78
+
79
+ return configs
@@ -0,0 +1,18 @@
1
+ from typing import Literal
2
+
3
+ from pydantic import Field
4
+
5
+ from letta.schemas.enums import ProviderCategory, ProviderType
6
+ from letta.schemas.llm_config import LLMConfig
7
+ from letta.schemas.providers.openai import OpenAIProvider
8
+
9
+
10
+ # TODO (cliandy): this needs to be implemented
11
+ class CohereProvider(OpenAIProvider):
12
+ provider_type: Literal[ProviderType.cohere] = Field(ProviderType.cohere, description="The type of the provider.")
13
+ provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
14
+ base_url: str = ""
15
+ api_key: str = Field(..., description="API key for the Cohere API.")
16
+
17
+ async def list_llm_models_async(self) -> list[LLMConfig]:
18
+ raise NotImplementedError
@@ -0,0 +1,63 @@
1
+ from typing import Literal
2
+
3
+ from pydantic import Field
4
+
5
+ from letta.schemas.enums import ProviderCategory, ProviderType
6
+ from letta.schemas.llm_config import LLMConfig
7
+ from letta.schemas.providers.openai import OpenAIProvider
8
+
9
+
10
+ class DeepSeekProvider(OpenAIProvider):
11
+ """
12
+ DeepSeek ChatCompletions API is similar to OpenAI's reasoning API,
13
+ but with slight differences:
14
+ * For example, DeepSeek's API requires perfect interleaving of user/assistant
15
+ * It also does not support native function calling
16
+ """
17
+
18
+ provider_type: Literal[ProviderType.deepseek] = Field(ProviderType.deepseek, description="The type of the provider.")
19
+ provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
20
+ base_url: str = Field("https://api.deepseek.com/v1", description="Base URL for the DeepSeek API.")
21
+ api_key: str = Field(..., description="API key for the DeepSeek API.")
22
+
23
+ # TODO (cliandy): this may need to be updated to reflect current models
24
+ def get_model_context_window_size(self, model_name: str) -> int | None:
25
+ # DeepSeek doesn't return context window in the model listing,
26
+ # so these are hardcoded from their website
27
+ if model_name == "deepseek-reasoner":
28
+ return 64000
29
+ elif model_name == "deepseek-chat":
30
+ return 64000
31
+ else:
32
+ return None
33
+
34
+ async def list_llm_models_async(self) -> list[LLMConfig]:
35
+ from letta.llm_api.openai import openai_get_model_list_async
36
+
37
+ response = await openai_get_model_list_async(self.base_url, api_key=self.api_key)
38
+ data = response.get("data", response)
39
+
40
+ configs = []
41
+ for model in data:
42
+ check = self._do_model_checks_for_name_and_context_size(model)
43
+ if check is None:
44
+ continue
45
+ model_name, context_window_size = check
46
+
47
+ # Not used for deepseek-reasoner, but otherwise is true
48
+ put_inner_thoughts_in_kwargs = False if model_name == "deepseek-reasoner" else True
49
+
50
+ configs.append(
51
+ LLMConfig(
52
+ model=model_name,
53
+ model_endpoint_type="deepseek",
54
+ model_endpoint=self.base_url,
55
+ context_window=context_window_size,
56
+ handle=self.get_handle(model_name),
57
+ put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
58
+ provider_name=self.name,
59
+ provider_category=self.provider_category,
60
+ )
61
+ )
62
+
63
+ return configs