letta-nightly 0.8.17.dev20250722104501__py3-none-any.whl → 0.9.0.dev20250724081419__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +5 -3
- letta/agent.py +3 -2
- letta/agents/base_agent.py +4 -1
- letta/agents/voice_agent.py +1 -0
- letta/constants.py +4 -2
- letta/functions/schema_generator.py +2 -1
- letta/groups/dynamic_multi_agent.py +1 -0
- letta/helpers/converters.py +13 -5
- letta/helpers/json_helpers.py +6 -1
- letta/llm_api/anthropic.py +2 -2
- letta/llm_api/aws_bedrock.py +24 -94
- letta/llm_api/deepseek.py +1 -1
- letta/llm_api/google_ai_client.py +0 -38
- letta/llm_api/google_constants.py +6 -3
- letta/llm_api/helpers.py +1 -1
- letta/llm_api/llm_api_tools.py +4 -7
- letta/llm_api/mistral.py +12 -37
- letta/llm_api/openai.py +17 -17
- letta/llm_api/sample_response_jsons/aws_bedrock.json +38 -0
- letta/llm_api/sample_response_jsons/lmstudio_embedding_list.json +15 -0
- letta/llm_api/sample_response_jsons/lmstudio_model_list.json +15 -0
- letta/local_llm/constants.py +2 -23
- letta/local_llm/json_parser.py +11 -1
- letta/local_llm/llm_chat_completion_wrappers/airoboros.py +9 -9
- letta/local_llm/llm_chat_completion_wrappers/chatml.py +7 -8
- letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +6 -6
- letta/local_llm/llm_chat_completion_wrappers/dolphin.py +3 -3
- letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +1 -1
- letta/local_llm/ollama/api.py +2 -2
- letta/orm/__init__.py +1 -0
- letta/orm/agent.py +33 -2
- letta/orm/files_agents.py +13 -10
- letta/orm/mixins.py +8 -0
- letta/orm/prompt.py +13 -0
- letta/orm/sqlite_functions.py +61 -17
- letta/otel/db_pool_monitoring.py +13 -12
- letta/schemas/agent.py +69 -4
- letta/schemas/agent_file.py +2 -0
- letta/schemas/block.py +11 -0
- letta/schemas/embedding_config.py +15 -3
- letta/schemas/enums.py +2 -0
- letta/schemas/file.py +1 -1
- letta/schemas/folder.py +74 -0
- letta/schemas/memory.py +12 -6
- letta/schemas/prompt.py +9 -0
- letta/schemas/providers/__init__.py +47 -0
- letta/schemas/providers/anthropic.py +78 -0
- letta/schemas/providers/azure.py +80 -0
- letta/schemas/providers/base.py +201 -0
- letta/schemas/providers/bedrock.py +78 -0
- letta/schemas/providers/cerebras.py +79 -0
- letta/schemas/providers/cohere.py +18 -0
- letta/schemas/providers/deepseek.py +63 -0
- letta/schemas/providers/google_gemini.py +102 -0
- letta/schemas/providers/google_vertex.py +54 -0
- letta/schemas/providers/groq.py +35 -0
- letta/schemas/providers/letta.py +39 -0
- letta/schemas/providers/lmstudio.py +97 -0
- letta/schemas/providers/mistral.py +41 -0
- letta/schemas/providers/ollama.py +151 -0
- letta/schemas/providers/openai.py +241 -0
- letta/schemas/providers/together.py +85 -0
- letta/schemas/providers/vllm.py +57 -0
- letta/schemas/providers/xai.py +66 -0
- letta/server/db.py +0 -5
- letta/server/rest_api/app.py +4 -3
- letta/server/rest_api/routers/v1/__init__.py +2 -0
- letta/server/rest_api/routers/v1/agents.py +152 -4
- letta/server/rest_api/routers/v1/folders.py +490 -0
- letta/server/rest_api/routers/v1/providers.py +2 -2
- letta/server/rest_api/routers/v1/sources.py +21 -26
- letta/server/rest_api/routers/v1/tools.py +90 -15
- letta/server/server.py +50 -95
- letta/services/agent_manager.py +420 -81
- letta/services/agent_serialization_manager.py +707 -0
- letta/services/block_manager.py +132 -11
- letta/services/file_manager.py +104 -29
- letta/services/file_processor/embedder/pinecone_embedder.py +8 -2
- letta/services/file_processor/file_processor.py +75 -24
- letta/services/file_processor/parser/markitdown_parser.py +95 -0
- letta/services/files_agents_manager.py +57 -17
- letta/services/group_manager.py +7 -0
- letta/services/helpers/agent_manager_helper.py +25 -15
- letta/services/provider_manager.py +2 -2
- letta/services/source_manager.py +35 -16
- letta/services/tool_executor/files_tool_executor.py +12 -5
- letta/services/tool_manager.py +12 -0
- letta/services/tool_sandbox/e2b_sandbox.py +52 -48
- letta/settings.py +9 -6
- letta/streaming_utils.py +2 -1
- letta/utils.py +34 -1
- {letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/METADATA +9 -8
- {letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/RECORD +96 -68
- {letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/LICENSE +0 -0
- {letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/WHEEL +0 -0
- {letta_nightly-0.8.17.dev20250722104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,241 @@
|
|
1
|
+
from typing import Literal
|
2
|
+
|
3
|
+
from pydantic import Field
|
4
|
+
|
5
|
+
from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_TOKENS
|
6
|
+
from letta.log import get_logger
|
7
|
+
from letta.schemas.embedding_config import EmbeddingConfig
|
8
|
+
from letta.schemas.enums import ProviderCategory, ProviderType
|
9
|
+
from letta.schemas.llm_config import LLMConfig
|
10
|
+
from letta.schemas.providers.base import Provider
|
11
|
+
|
12
|
+
logger = get_logger(__name__)
|
13
|
+
|
14
|
+
ALLOWED_PREFIXES = {"gpt-4", "o1", "o3", "o4"}
|
15
|
+
DISALLOWED_KEYWORDS = {"transcribe", "search", "realtime", "tts", "audio", "computer", "o1-mini", "o1-preview", "o1-pro"}
|
16
|
+
DEFAULT_EMBEDDING_BATCH_SIZE = 1024
|
17
|
+
|
18
|
+
|
19
|
+
class OpenAIProvider(Provider):
|
20
|
+
provider_type: Literal[ProviderType.openai] = Field(ProviderType.openai, description="The type of the provider.")
|
21
|
+
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
|
22
|
+
api_key: str = Field(..., description="API key for the OpenAI API.")
|
23
|
+
base_url: str = Field(..., description="Base URL for the OpenAI API.")
|
24
|
+
|
25
|
+
async def check_api_key(self):
|
26
|
+
from letta.llm_api.openai import openai_check_valid_api_key
|
27
|
+
|
28
|
+
openai_check_valid_api_key(self.base_url, self.api_key)
|
29
|
+
|
30
|
+
async def _get_models_async(self) -> list[dict]:
|
31
|
+
from letta.llm_api.openai import openai_get_model_list_async
|
32
|
+
|
33
|
+
# Some hardcoded support for OpenRouter (so that we only get models with tool calling support)...
|
34
|
+
# See: https://openrouter.ai/docs/requests
|
35
|
+
extra_params = {"supported_parameters": "tools"} if "openrouter.ai" in self.base_url else None
|
36
|
+
|
37
|
+
# Similar to Nebius
|
38
|
+
extra_params = {"verbose": True} if "nebius.com" in self.base_url else None
|
39
|
+
|
40
|
+
response = await openai_get_model_list_async(
|
41
|
+
self.base_url,
|
42
|
+
api_key=self.api_key,
|
43
|
+
extra_params=extra_params,
|
44
|
+
# fix_url=True, # NOTE: make sure together ends with /v1
|
45
|
+
)
|
46
|
+
|
47
|
+
# TODO (cliandy): this is brittle as TogetherAI seems to result in a list instead of having a 'data' field
|
48
|
+
data = response.get("data", response)
|
49
|
+
assert isinstance(data, list)
|
50
|
+
return data
|
51
|
+
|
52
|
+
async def list_llm_models_async(self) -> list[LLMConfig]:
|
53
|
+
data = await self._get_models_async()
|
54
|
+
return self._list_llm_models(data)
|
55
|
+
|
56
|
+
def _list_llm_models(self, data: list[dict]) -> list[LLMConfig]:
|
57
|
+
"""
|
58
|
+
This handles filtering out LLM Models by provider that meet Letta's requirements.
|
59
|
+
"""
|
60
|
+
configs = []
|
61
|
+
for model in data:
|
62
|
+
check = self._do_model_checks_for_name_and_context_size(model)
|
63
|
+
if check is None:
|
64
|
+
continue
|
65
|
+
model_name, context_window_size = check
|
66
|
+
|
67
|
+
# ===== Provider filtering =====
|
68
|
+
# TogetherAI: includes the type, which we can use to filter out embedding models
|
69
|
+
if "api.together.ai" in self.base_url or "api.together.xyz" in self.base_url:
|
70
|
+
if "type" in model and model["type"] not in ["chat", "language"]:
|
71
|
+
continue
|
72
|
+
|
73
|
+
# for TogetherAI, we need to skip the models that don't support JSON mode / function calling
|
74
|
+
# requests.exceptions.HTTPError: HTTP error occurred: 400 Client Error: Bad Request for url: https://api.together.ai/v1/chat/completions | Status code: 400, Message: {
|
75
|
+
# "error": {
|
76
|
+
# "message": "mistralai/Mixtral-8x7B-v0.1 is not supported for JSON mode/function calling",
|
77
|
+
# "type": "invalid_request_error",
|
78
|
+
# "param": null,
|
79
|
+
# "code": "constraints_model"
|
80
|
+
# }
|
81
|
+
# }
|
82
|
+
if "config" not in model:
|
83
|
+
continue
|
84
|
+
|
85
|
+
# Nebius: includes the type, which we can use to filter for text models
|
86
|
+
if "nebius.com" in self.base_url:
|
87
|
+
model_type = model.get("architecture", {}).get("modality")
|
88
|
+
if model_type not in ["text->text", "text+image->text"]:
|
89
|
+
continue
|
90
|
+
|
91
|
+
# OpenAI
|
92
|
+
# NOTE: o1-mini and o1-preview do not support tool calling
|
93
|
+
# NOTE: o1-mini does not support system messages
|
94
|
+
# NOTE: o1-pro is only available in Responses API
|
95
|
+
if self.base_url == "https://api.openai.com/v1":
|
96
|
+
if any(keyword in model_name for keyword in DISALLOWED_KEYWORDS) or not any(
|
97
|
+
model_name.startswith(prefix) for prefix in ALLOWED_PREFIXES
|
98
|
+
):
|
99
|
+
continue
|
100
|
+
|
101
|
+
# We'll set the model endpoint based on the base URL
|
102
|
+
# Note: openai-proxy just means that the model is using the OpenAIProvider
|
103
|
+
if self.base_url != "https://api.openai.com/v1":
|
104
|
+
handle = self.get_handle(model_name, base_name="openai-proxy")
|
105
|
+
else:
|
106
|
+
handle = self.get_handle(model_name)
|
107
|
+
|
108
|
+
config = LLMConfig(
|
109
|
+
model=model_name,
|
110
|
+
model_endpoint_type="openai",
|
111
|
+
model_endpoint=self.base_url,
|
112
|
+
context_window=context_window_size,
|
113
|
+
handle=handle,
|
114
|
+
provider_name=self.name,
|
115
|
+
provider_category=self.provider_category,
|
116
|
+
)
|
117
|
+
|
118
|
+
config = self._set_model_parameter_tuned_defaults(model_name, config)
|
119
|
+
configs.append(config)
|
120
|
+
|
121
|
+
# for OpenAI, sort in reverse order
|
122
|
+
if self.base_url == "https://api.openai.com/v1":
|
123
|
+
configs.sort(key=lambda x: x.model, reverse=True)
|
124
|
+
return configs
|
125
|
+
|
126
|
+
def _do_model_checks_for_name_and_context_size(self, model: dict, length_key: str = "context_length") -> tuple[str, int] | None:
|
127
|
+
if "id" not in model:
|
128
|
+
logger.warning("Model missing 'id' field for provider: %s and model: %s", self.provider_type, model)
|
129
|
+
return None
|
130
|
+
|
131
|
+
model_name = model["id"]
|
132
|
+
context_window_size = model.get(length_key) or self.get_model_context_window_size(model_name)
|
133
|
+
|
134
|
+
if not context_window_size:
|
135
|
+
logger.info("No context window size found for model: %s", model_name)
|
136
|
+
return None
|
137
|
+
|
138
|
+
return model_name, context_window_size
|
139
|
+
|
140
|
+
@staticmethod
|
141
|
+
def _set_model_parameter_tuned_defaults(model_name: str, llm_config: LLMConfig):
|
142
|
+
"""This function is used to tune LLMConfig parameters to improve model performance."""
|
143
|
+
|
144
|
+
# gpt-4o-mini has started to regress with pretty bad emoji spam loops (2025-07)
|
145
|
+
if "gpt-4o-mini" in model_name or "gpt-4.1-mini" in model_name:
|
146
|
+
llm_config.frequency_penalty = 1.0
|
147
|
+
return llm_config
|
148
|
+
|
149
|
+
async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
|
150
|
+
if self.base_url == "https://api.openai.com/v1":
|
151
|
+
# TODO: actually automatically list models for OpenAI
|
152
|
+
return [
|
153
|
+
EmbeddingConfig(
|
154
|
+
embedding_model="text-embedding-ada-002",
|
155
|
+
embedding_endpoint_type="openai",
|
156
|
+
embedding_endpoint=self.base_url,
|
157
|
+
embedding_dim=1536,
|
158
|
+
embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
|
159
|
+
handle=self.get_handle("text-embedding-ada-002", is_embedding=True),
|
160
|
+
batch_size=DEFAULT_EMBEDDING_BATCH_SIZE,
|
161
|
+
),
|
162
|
+
EmbeddingConfig(
|
163
|
+
embedding_model="text-embedding-3-small",
|
164
|
+
embedding_endpoint_type="openai",
|
165
|
+
embedding_endpoint=self.base_url,
|
166
|
+
embedding_dim=2000,
|
167
|
+
embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
|
168
|
+
handle=self.get_handle("text-embedding-3-small", is_embedding=True),
|
169
|
+
batch_size=DEFAULT_EMBEDDING_BATCH_SIZE,
|
170
|
+
),
|
171
|
+
EmbeddingConfig(
|
172
|
+
embedding_model="text-embedding-3-large",
|
173
|
+
embedding_endpoint_type="openai",
|
174
|
+
embedding_endpoint=self.base_url,
|
175
|
+
embedding_dim=2000,
|
176
|
+
embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
|
177
|
+
handle=self.get_handle("text-embedding-3-large", is_embedding=True),
|
178
|
+
batch_size=DEFAULT_EMBEDDING_BATCH_SIZE,
|
179
|
+
),
|
180
|
+
]
|
181
|
+
else:
|
182
|
+
# TODO: this has filtering that doesn't apply for embedding models, fix this.
|
183
|
+
data = await self._get_models_async()
|
184
|
+
return self._list_embedding_models(data)
|
185
|
+
|
186
|
+
def _list_embedding_models(self, data) -> list[EmbeddingConfig]:
|
187
|
+
configs = []
|
188
|
+
for model in data:
|
189
|
+
check = self._do_model_checks_for_name_and_context_size(model)
|
190
|
+
if check is None:
|
191
|
+
continue
|
192
|
+
model_name, context_window_size = check
|
193
|
+
|
194
|
+
# ===== Provider filtering =====
|
195
|
+
# TogetherAI: includes the type, which we can use to filter for embedding models
|
196
|
+
if "api.together.ai" in self.base_url or "api.together.xyz" in self.base_url:
|
197
|
+
if "type" in model and model["type"] not in ["embedding"]:
|
198
|
+
continue
|
199
|
+
# Nebius: includes the type, which we can use to filter for text models
|
200
|
+
elif "nebius.com" in self.base_url:
|
201
|
+
model_type = model.get("architecture", {}).get("modality")
|
202
|
+
if model_type not in ["text->embedding"]:
|
203
|
+
continue
|
204
|
+
else:
|
205
|
+
logger.info(
|
206
|
+
f"Skipping embedding models for %s by default, as we don't assume embeddings are supported."
|
207
|
+
"Please open an issue on GitHub if support is required.",
|
208
|
+
self.base_url,
|
209
|
+
)
|
210
|
+
continue
|
211
|
+
|
212
|
+
configs.append(
|
213
|
+
EmbeddingConfig(
|
214
|
+
embedding_model=model_name,
|
215
|
+
embedding_endpoint_type=self.provider_type,
|
216
|
+
embedding_endpoint=self.base_url,
|
217
|
+
embedding_dim=context_window_size,
|
218
|
+
embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
|
219
|
+
handle=self.get_handle(model, is_embedding=True),
|
220
|
+
)
|
221
|
+
)
|
222
|
+
|
223
|
+
return configs
|
224
|
+
|
225
|
+
def get_model_context_window_size(self, model_name: str) -> int | None:
|
226
|
+
if model_name in LLM_MAX_TOKENS:
|
227
|
+
return LLM_MAX_TOKENS[model_name]
|
228
|
+
else:
|
229
|
+
logger.debug(
|
230
|
+
f"Model %s on %s for provider %s not found in LLM_MAX_TOKENS. Using default of {{LLM_MAX_TOKENS['DEFAULT']}}",
|
231
|
+
model_name,
|
232
|
+
self.base_url,
|
233
|
+
self.__class__.__name__,
|
234
|
+
)
|
235
|
+
return LLM_MAX_TOKENS["DEFAULT"]
|
236
|
+
|
237
|
+
def get_model_context_window(self, model_name: str) -> int | None:
|
238
|
+
return self.get_model_context_window_size(model_name)
|
239
|
+
|
240
|
+
async def get_model_context_window_async(self, model_name: str) -> int | None:
|
241
|
+
return self.get_model_context_window_size(model_name)
|
@@ -0,0 +1,85 @@
|
|
1
|
+
"""
|
2
|
+
Note: this supports completions (deprecated by openai) and chat completions via the OpenAI API.
|
3
|
+
"""
|
4
|
+
|
5
|
+
from typing import Literal
|
6
|
+
|
7
|
+
from pydantic import Field
|
8
|
+
|
9
|
+
from letta.constants import MIN_CONTEXT_WINDOW
|
10
|
+
from letta.schemas.embedding_config import EmbeddingConfig
|
11
|
+
from letta.schemas.enums import ProviderCategory, ProviderType
|
12
|
+
from letta.schemas.llm_config import LLMConfig
|
13
|
+
from letta.schemas.providers.openai import OpenAIProvider
|
14
|
+
|
15
|
+
|
16
|
+
class TogetherProvider(OpenAIProvider):
|
17
|
+
provider_type: Literal[ProviderType.together] = Field(ProviderType.together, description="The type of the provider.")
|
18
|
+
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
|
19
|
+
base_url: str = "https://api.together.xyz/v1"
|
20
|
+
api_key: str = Field(..., description="API key for the Together API.")
|
21
|
+
default_prompt_formatter: str = Field(..., description="Default prompt formatter (aka model wrapper) to use on vLLM /completions API.")
|
22
|
+
|
23
|
+
async def list_llm_models_async(self) -> list[LLMConfig]:
|
24
|
+
from letta.llm_api.openai import openai_get_model_list_async
|
25
|
+
|
26
|
+
models = await openai_get_model_list_async(self.base_url, api_key=self.api_key)
|
27
|
+
return self._list_llm_models(models)
|
28
|
+
|
29
|
+
async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
|
30
|
+
import warnings
|
31
|
+
|
32
|
+
warnings.warn(
|
33
|
+
"Letta does not currently support listing embedding models for Together. Please "
|
34
|
+
"contact support or reach out via GitHub or Discord to get support."
|
35
|
+
)
|
36
|
+
return []
|
37
|
+
|
38
|
+
# TODO (cliandy): verify this with openai
|
39
|
+
def _list_llm_models(self, models) -> list[LLMConfig]:
|
40
|
+
pass
|
41
|
+
|
42
|
+
# TogetherAI's response is missing the 'data' field
|
43
|
+
# assert "data" in response, f"OpenAI model query response missing 'data' field: {response}"
|
44
|
+
if "data" in models:
|
45
|
+
data = models["data"]
|
46
|
+
else:
|
47
|
+
data = models
|
48
|
+
|
49
|
+
configs = []
|
50
|
+
for model in data:
|
51
|
+
assert "id" in model, f"TogetherAI model missing 'id' field: {model}"
|
52
|
+
model_name = model["id"]
|
53
|
+
|
54
|
+
if "context_length" in model:
|
55
|
+
# Context length is returned in OpenRouter as "context_length"
|
56
|
+
context_window_size = model["context_length"]
|
57
|
+
else:
|
58
|
+
context_window_size = self.get_model_context_window_size(model_name)
|
59
|
+
|
60
|
+
# We need the context length for embeddings too
|
61
|
+
if not context_window_size:
|
62
|
+
continue
|
63
|
+
|
64
|
+
# Skip models that are too small for Letta
|
65
|
+
if context_window_size <= MIN_CONTEXT_WINDOW:
|
66
|
+
continue
|
67
|
+
|
68
|
+
# TogetherAI includes the type, which we can use to filter for embedding models
|
69
|
+
if "type" in model and model["type"] not in ["chat", "language"]:
|
70
|
+
continue
|
71
|
+
|
72
|
+
configs.append(
|
73
|
+
LLMConfig(
|
74
|
+
model=model_name,
|
75
|
+
model_endpoint_type="together",
|
76
|
+
model_endpoint=self.base_url,
|
77
|
+
model_wrapper=self.default_prompt_formatter,
|
78
|
+
context_window=context_window_size,
|
79
|
+
handle=self.get_handle(model_name),
|
80
|
+
provider_name=self.name,
|
81
|
+
provider_category=self.provider_category,
|
82
|
+
)
|
83
|
+
)
|
84
|
+
|
85
|
+
return configs
|
@@ -0,0 +1,57 @@
|
|
1
|
+
"""
|
2
|
+
Note: this consolidates the vLLM provider for completions (deprecated by openai)
|
3
|
+
and chat completions. Support is provided primarily for the chat completions endpoint,
|
4
|
+
but to utilize the completions endpoint, set the proper `base_url` and
|
5
|
+
`default_prompt_formatter`.
|
6
|
+
"""
|
7
|
+
|
8
|
+
from typing import Literal
|
9
|
+
|
10
|
+
from pydantic import Field
|
11
|
+
|
12
|
+
from letta.schemas.embedding_config import EmbeddingConfig
|
13
|
+
from letta.schemas.enums import ProviderCategory, ProviderType
|
14
|
+
from letta.schemas.llm_config import LLMConfig
|
15
|
+
from letta.schemas.providers.base import Provider
|
16
|
+
|
17
|
+
|
18
|
+
class VLLMProvider(Provider):
|
19
|
+
provider_type: Literal[ProviderType.vllm] = Field(ProviderType.vllm, description="The type of the provider.")
|
20
|
+
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
|
21
|
+
base_url: str = Field(..., description="Base URL for the vLLM API.")
|
22
|
+
api_key: str | None = Field(None, description="API key for the vLLM API.")
|
23
|
+
default_prompt_formatter: str | None = Field(
|
24
|
+
default=None, description="Default prompt formatter (aka model wrapper) to use on a /completions style API."
|
25
|
+
)
|
26
|
+
|
27
|
+
async def list_llm_models_async(self) -> list[LLMConfig]:
|
28
|
+
from letta.llm_api.openai import openai_get_model_list_async
|
29
|
+
|
30
|
+
# TODO (cliandy): previously unsupported with vLLM; confirm if this is still the case or not
|
31
|
+
response = await openai_get_model_list_async(self.base_url, api_key=self.api_key)
|
32
|
+
|
33
|
+
data = response.get("data", response)
|
34
|
+
|
35
|
+
configs = []
|
36
|
+
for model in data:
|
37
|
+
model_name = model["id"]
|
38
|
+
|
39
|
+
configs.append(
|
40
|
+
LLMConfig(
|
41
|
+
model=model_name,
|
42
|
+
model_endpoint_type="openai", # TODO (cliandy): this was previous vllm for the completions provider, why?
|
43
|
+
model_endpoint=self.base_url,
|
44
|
+
model_wrapper=self.default_prompt_formatter,
|
45
|
+
context_window=model["max_model_len"],
|
46
|
+
handle=self.get_handle(model_name),
|
47
|
+
provider_name=self.name,
|
48
|
+
provider_category=self.provider_category,
|
49
|
+
)
|
50
|
+
)
|
51
|
+
|
52
|
+
return configs
|
53
|
+
|
54
|
+
async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
|
55
|
+
# Note: vLLM technically can support embedding models though may require multiple instances
|
56
|
+
# for now, we will not support embedding models for vLLM.
|
57
|
+
return []
|
@@ -0,0 +1,66 @@
|
|
1
|
+
import warnings
|
2
|
+
from typing import Literal
|
3
|
+
|
4
|
+
from pydantic import Field
|
5
|
+
|
6
|
+
from letta.schemas.enums import ProviderCategory, ProviderType
|
7
|
+
from letta.schemas.llm_config import LLMConfig
|
8
|
+
from letta.schemas.providers.openai import OpenAIProvider
|
9
|
+
|
10
|
+
MODEL_CONTEXT_WINDOWS = {
|
11
|
+
"grok-3-fast": 131_072,
|
12
|
+
"grok-3": 131_072,
|
13
|
+
"grok-3-mini": 131_072,
|
14
|
+
"grok-3-mini-fast": 131_072,
|
15
|
+
"grok-4-0709": 256_000,
|
16
|
+
}
|
17
|
+
|
18
|
+
|
19
|
+
class XAIProvider(OpenAIProvider):
|
20
|
+
"""https://docs.x.ai/docs/api-reference"""
|
21
|
+
|
22
|
+
provider_type: Literal[ProviderType.xai] = Field(ProviderType.xai, description="The type of the provider.")
|
23
|
+
provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
|
24
|
+
api_key: str = Field(..., description="API key for the xAI/Grok API.")
|
25
|
+
base_url: str = Field("https://api.x.ai/v1", description="Base URL for the xAI/Grok API.")
|
26
|
+
|
27
|
+
def get_model_context_window_size(self, model_name: str) -> int | None:
|
28
|
+
# xAI doesn't return context window in the model listing,
|
29
|
+
# this is hardcoded from https://docs.x.ai/docs/models
|
30
|
+
return MODEL_CONTEXT_WINDOWS.get(model_name)
|
31
|
+
|
32
|
+
async def list_llm_models_async(self) -> list[LLMConfig]:
|
33
|
+
from letta.llm_api.openai import openai_get_model_list_async
|
34
|
+
|
35
|
+
response = await openai_get_model_list_async(self.base_url, api_key=self.api_key)
|
36
|
+
|
37
|
+
data = response.get("data", response)
|
38
|
+
|
39
|
+
configs = []
|
40
|
+
for model in data:
|
41
|
+
assert "id" in model, f"xAI/Grok model missing 'id' field: {model}"
|
42
|
+
model_name = model["id"]
|
43
|
+
|
44
|
+
# In case xAI starts supporting it in the future:
|
45
|
+
if "context_length" in model:
|
46
|
+
context_window_size = model["context_length"]
|
47
|
+
else:
|
48
|
+
context_window_size = self.get_model_context_window_size(model_name)
|
49
|
+
|
50
|
+
if not context_window_size:
|
51
|
+
warnings.warn(f"Couldn't find context window size for model {model_name}")
|
52
|
+
continue
|
53
|
+
|
54
|
+
configs.append(
|
55
|
+
LLMConfig(
|
56
|
+
model=model_name,
|
57
|
+
model_endpoint_type="xai",
|
58
|
+
model_endpoint=self.base_url,
|
59
|
+
context_window=context_window_size,
|
60
|
+
handle=self.get_handle(model_name),
|
61
|
+
provider_name=self.name,
|
62
|
+
provider_category=self.provider_category,
|
63
|
+
)
|
64
|
+
)
|
65
|
+
|
66
|
+
return configs
|
letta/server/db.py
CHANGED
@@ -400,11 +400,6 @@ class DatabaseRegistry:
|
|
400
400
|
"""Trace sync db caller information for debugging purposes."""
|
401
401
|
pass # wrapper used for otel tracing only
|
402
402
|
|
403
|
-
@trace_method
|
404
|
-
def session_caller_trace(self, caller_info: str):
|
405
|
-
"""Trace sync db caller information for debugging purposes."""
|
406
|
-
pass # wrapper used for otel tracing only
|
407
|
-
|
408
403
|
|
409
404
|
# Create a singleton instance
|
410
405
|
db_registry = DatabaseRegistry()
|
letta/server/rest_api/app.py
CHANGED
@@ -407,9 +407,10 @@ def start_server(
|
|
407
407
|
address=host or "127.0.0.1", # Note granian address must be an ip address
|
408
408
|
port=port or REST_DEFAULT_PORT,
|
409
409
|
workers=settings.uvicorn_workers,
|
410
|
-
#
|
410
|
+
# runtime_blocking_threads=
|
411
|
+
# runtime_threads=
|
411
412
|
reload=reload or settings.uvicorn_reload,
|
412
|
-
|
413
|
+
reload_paths=["letta/"],
|
413
414
|
reload_ignore_worker_failure=True,
|
414
415
|
reload_tick=4000, # set to 4s to prevent crashing on weird state
|
415
416
|
# log_level="info"
|
@@ -451,7 +452,7 @@ def start_server(
|
|
451
452
|
# runtime_blocking_threads=
|
452
453
|
# runtime_threads=
|
453
454
|
reload=reload or settings.uvicorn_reload,
|
454
|
-
reload_paths=["
|
455
|
+
reload_paths=["letta/"],
|
455
456
|
reload_ignore_worker_failure=True,
|
456
457
|
reload_tick=4000, # set to 4s to prevent crashing on weird state
|
457
458
|
# log_level="info"
|
@@ -1,6 +1,7 @@
|
|
1
1
|
from letta.server.rest_api.routers.v1.agents import router as agents_router
|
2
2
|
from letta.server.rest_api.routers.v1.blocks import router as blocks_router
|
3
3
|
from letta.server.rest_api.routers.v1.embeddings import router as embeddings_router
|
4
|
+
from letta.server.rest_api.routers.v1.folders import router as folders_router
|
4
5
|
from letta.server.rest_api.routers.v1.groups import router as groups_router
|
5
6
|
from letta.server.rest_api.routers.v1.health import router as health_router
|
6
7
|
from letta.server.rest_api.routers.v1.identities import router as identities_router
|
@@ -20,6 +21,7 @@ from letta.server.rest_api.routers.v1.voice import router as voice_router
|
|
20
21
|
ROUTERS = [
|
21
22
|
tools_router,
|
22
23
|
sources_router,
|
24
|
+
folders_router,
|
23
25
|
agents_router,
|
24
26
|
groups_router,
|
25
27
|
identities_router,
|