letta-nightly 0.8.17.dev20250723104501__py3-none-any.whl → 0.9.0.dev20250724081419__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. letta/__init__.py +5 -3
  2. letta/agent.py +3 -2
  3. letta/agents/base_agent.py +4 -1
  4. letta/agents/voice_agent.py +1 -0
  5. letta/constants.py +4 -2
  6. letta/functions/schema_generator.py +2 -1
  7. letta/groups/dynamic_multi_agent.py +1 -0
  8. letta/helpers/converters.py +13 -5
  9. letta/helpers/json_helpers.py +6 -1
  10. letta/llm_api/anthropic.py +2 -2
  11. letta/llm_api/aws_bedrock.py +24 -94
  12. letta/llm_api/deepseek.py +1 -1
  13. letta/llm_api/google_ai_client.py +0 -38
  14. letta/llm_api/google_constants.py +6 -3
  15. letta/llm_api/helpers.py +1 -1
  16. letta/llm_api/llm_api_tools.py +4 -7
  17. letta/llm_api/mistral.py +12 -37
  18. letta/llm_api/openai.py +17 -17
  19. letta/llm_api/sample_response_jsons/aws_bedrock.json +38 -0
  20. letta/llm_api/sample_response_jsons/lmstudio_embedding_list.json +15 -0
  21. letta/llm_api/sample_response_jsons/lmstudio_model_list.json +15 -0
  22. letta/local_llm/constants.py +2 -23
  23. letta/local_llm/json_parser.py +11 -1
  24. letta/local_llm/llm_chat_completion_wrappers/airoboros.py +9 -9
  25. letta/local_llm/llm_chat_completion_wrappers/chatml.py +7 -8
  26. letta/local_llm/llm_chat_completion_wrappers/configurable_wrapper.py +6 -6
  27. letta/local_llm/llm_chat_completion_wrappers/dolphin.py +3 -3
  28. letta/local_llm/llm_chat_completion_wrappers/simple_summary_wrapper.py +1 -1
  29. letta/local_llm/ollama/api.py +2 -2
  30. letta/orm/__init__.py +1 -0
  31. letta/orm/agent.py +33 -2
  32. letta/orm/files_agents.py +13 -10
  33. letta/orm/mixins.py +8 -0
  34. letta/orm/prompt.py +13 -0
  35. letta/orm/sqlite_functions.py +61 -17
  36. letta/otel/db_pool_monitoring.py +13 -12
  37. letta/schemas/agent.py +69 -4
  38. letta/schemas/agent_file.py +2 -0
  39. letta/schemas/block.py +11 -0
  40. letta/schemas/embedding_config.py +15 -3
  41. letta/schemas/enums.py +2 -0
  42. letta/schemas/file.py +1 -1
  43. letta/schemas/folder.py +74 -0
  44. letta/schemas/memory.py +12 -6
  45. letta/schemas/prompt.py +9 -0
  46. letta/schemas/providers/__init__.py +47 -0
  47. letta/schemas/providers/anthropic.py +78 -0
  48. letta/schemas/providers/azure.py +80 -0
  49. letta/schemas/providers/base.py +201 -0
  50. letta/schemas/providers/bedrock.py +78 -0
  51. letta/schemas/providers/cerebras.py +79 -0
  52. letta/schemas/providers/cohere.py +18 -0
  53. letta/schemas/providers/deepseek.py +63 -0
  54. letta/schemas/providers/google_gemini.py +102 -0
  55. letta/schemas/providers/google_vertex.py +54 -0
  56. letta/schemas/providers/groq.py +35 -0
  57. letta/schemas/providers/letta.py +39 -0
  58. letta/schemas/providers/lmstudio.py +97 -0
  59. letta/schemas/providers/mistral.py +41 -0
  60. letta/schemas/providers/ollama.py +151 -0
  61. letta/schemas/providers/openai.py +241 -0
  62. letta/schemas/providers/together.py +85 -0
  63. letta/schemas/providers/vllm.py +57 -0
  64. letta/schemas/providers/xai.py +66 -0
  65. letta/server/db.py +0 -5
  66. letta/server/rest_api/app.py +4 -3
  67. letta/server/rest_api/routers/v1/__init__.py +2 -0
  68. letta/server/rest_api/routers/v1/agents.py +152 -4
  69. letta/server/rest_api/routers/v1/folders.py +490 -0
  70. letta/server/rest_api/routers/v1/providers.py +2 -2
  71. letta/server/rest_api/routers/v1/sources.py +21 -26
  72. letta/server/rest_api/routers/v1/tools.py +90 -15
  73. letta/server/server.py +50 -95
  74. letta/services/agent_manager.py +420 -81
  75. letta/services/agent_serialization_manager.py +707 -0
  76. letta/services/block_manager.py +132 -11
  77. letta/services/file_manager.py +104 -29
  78. letta/services/file_processor/embedder/pinecone_embedder.py +8 -2
  79. letta/services/file_processor/file_processor.py +75 -24
  80. letta/services/file_processor/parser/markitdown_parser.py +95 -0
  81. letta/services/files_agents_manager.py +57 -17
  82. letta/services/group_manager.py +7 -0
  83. letta/services/helpers/agent_manager_helper.py +25 -15
  84. letta/services/provider_manager.py +2 -2
  85. letta/services/source_manager.py +35 -16
  86. letta/services/tool_executor/files_tool_executor.py +12 -5
  87. letta/services/tool_manager.py +12 -0
  88. letta/services/tool_sandbox/e2b_sandbox.py +52 -48
  89. letta/settings.py +9 -6
  90. letta/streaming_utils.py +2 -1
  91. letta/utils.py +34 -1
  92. {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/METADATA +9 -8
  93. {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/RECORD +96 -68
  94. {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/LICENSE +0 -0
  95. {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/WHEEL +0 -0
  96. {letta_nightly-0.8.17.dev20250723104501.dist-info → letta_nightly-0.9.0.dev20250724081419.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,241 @@
1
+ from typing import Literal
2
+
3
+ from pydantic import Field
4
+
5
+ from letta.constants import DEFAULT_EMBEDDING_CHUNK_SIZE, LLM_MAX_TOKENS
6
+ from letta.log import get_logger
7
+ from letta.schemas.embedding_config import EmbeddingConfig
8
+ from letta.schemas.enums import ProviderCategory, ProviderType
9
+ from letta.schemas.llm_config import LLMConfig
10
+ from letta.schemas.providers.base import Provider
11
+
12
+ logger = get_logger(__name__)
13
+
14
+ ALLOWED_PREFIXES = {"gpt-4", "o1", "o3", "o4"}
15
+ DISALLOWED_KEYWORDS = {"transcribe", "search", "realtime", "tts", "audio", "computer", "o1-mini", "o1-preview", "o1-pro"}
16
+ DEFAULT_EMBEDDING_BATCH_SIZE = 1024
17
+
18
+
19
+ class OpenAIProvider(Provider):
20
+ provider_type: Literal[ProviderType.openai] = Field(ProviderType.openai, description="The type of the provider.")
21
+ provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
22
+ api_key: str = Field(..., description="API key for the OpenAI API.")
23
+ base_url: str = Field(..., description="Base URL for the OpenAI API.")
24
+
25
+ async def check_api_key(self):
26
+ from letta.llm_api.openai import openai_check_valid_api_key
27
+
28
+ openai_check_valid_api_key(self.base_url, self.api_key)
29
+
30
+ async def _get_models_async(self) -> list[dict]:
31
+ from letta.llm_api.openai import openai_get_model_list_async
32
+
33
+ # Some hardcoded support for OpenRouter (so that we only get models with tool calling support)...
34
+ # See: https://openrouter.ai/docs/requests
35
+ extra_params = {"supported_parameters": "tools"} if "openrouter.ai" in self.base_url else None
36
+
37
+ # Similar to Nebius
38
+ extra_params = {"verbose": True} if "nebius.com" in self.base_url else None
39
+
40
+ response = await openai_get_model_list_async(
41
+ self.base_url,
42
+ api_key=self.api_key,
43
+ extra_params=extra_params,
44
+ # fix_url=True, # NOTE: make sure together ends with /v1
45
+ )
46
+
47
+ # TODO (cliandy): this is brittle as TogetherAI seems to result in a list instead of having a 'data' field
48
+ data = response.get("data", response)
49
+ assert isinstance(data, list)
50
+ return data
51
+
52
+ async def list_llm_models_async(self) -> list[LLMConfig]:
53
+ data = await self._get_models_async()
54
+ return self._list_llm_models(data)
55
+
56
+ def _list_llm_models(self, data: list[dict]) -> list[LLMConfig]:
57
+ """
58
+ This handles filtering out LLM Models by provider that meet Letta's requirements.
59
+ """
60
+ configs = []
61
+ for model in data:
62
+ check = self._do_model_checks_for_name_and_context_size(model)
63
+ if check is None:
64
+ continue
65
+ model_name, context_window_size = check
66
+
67
+ # ===== Provider filtering =====
68
+ # TogetherAI: includes the type, which we can use to filter out embedding models
69
+ if "api.together.ai" in self.base_url or "api.together.xyz" in self.base_url:
70
+ if "type" in model and model["type"] not in ["chat", "language"]:
71
+ continue
72
+
73
+ # for TogetherAI, we need to skip the models that don't support JSON mode / function calling
74
+ # requests.exceptions.HTTPError: HTTP error occurred: 400 Client Error: Bad Request for url: https://api.together.ai/v1/chat/completions | Status code: 400, Message: {
75
+ # "error": {
76
+ # "message": "mistralai/Mixtral-8x7B-v0.1 is not supported for JSON mode/function calling",
77
+ # "type": "invalid_request_error",
78
+ # "param": null,
79
+ # "code": "constraints_model"
80
+ # }
81
+ # }
82
+ if "config" not in model:
83
+ continue
84
+
85
+ # Nebius: includes the type, which we can use to filter for text models
86
+ if "nebius.com" in self.base_url:
87
+ model_type = model.get("architecture", {}).get("modality")
88
+ if model_type not in ["text->text", "text+image->text"]:
89
+ continue
90
+
91
+ # OpenAI
92
+ # NOTE: o1-mini and o1-preview do not support tool calling
93
+ # NOTE: o1-mini does not support system messages
94
+ # NOTE: o1-pro is only available in Responses API
95
+ if self.base_url == "https://api.openai.com/v1":
96
+ if any(keyword in model_name for keyword in DISALLOWED_KEYWORDS) or not any(
97
+ model_name.startswith(prefix) for prefix in ALLOWED_PREFIXES
98
+ ):
99
+ continue
100
+
101
+ # We'll set the model endpoint based on the base URL
102
+ # Note: openai-proxy just means that the model is using the OpenAIProvider
103
+ if self.base_url != "https://api.openai.com/v1":
104
+ handle = self.get_handle(model_name, base_name="openai-proxy")
105
+ else:
106
+ handle = self.get_handle(model_name)
107
+
108
+ config = LLMConfig(
109
+ model=model_name,
110
+ model_endpoint_type="openai",
111
+ model_endpoint=self.base_url,
112
+ context_window=context_window_size,
113
+ handle=handle,
114
+ provider_name=self.name,
115
+ provider_category=self.provider_category,
116
+ )
117
+
118
+ config = self._set_model_parameter_tuned_defaults(model_name, config)
119
+ configs.append(config)
120
+
121
+ # for OpenAI, sort in reverse order
122
+ if self.base_url == "https://api.openai.com/v1":
123
+ configs.sort(key=lambda x: x.model, reverse=True)
124
+ return configs
125
+
126
+ def _do_model_checks_for_name_and_context_size(self, model: dict, length_key: str = "context_length") -> tuple[str, int] | None:
127
+ if "id" not in model:
128
+ logger.warning("Model missing 'id' field for provider: %s and model: %s", self.provider_type, model)
129
+ return None
130
+
131
+ model_name = model["id"]
132
+ context_window_size = model.get(length_key) or self.get_model_context_window_size(model_name)
133
+
134
+ if not context_window_size:
135
+ logger.info("No context window size found for model: %s", model_name)
136
+ return None
137
+
138
+ return model_name, context_window_size
139
+
140
+ @staticmethod
141
+ def _set_model_parameter_tuned_defaults(model_name: str, llm_config: LLMConfig):
142
+ """This function is used to tune LLMConfig parameters to improve model performance."""
143
+
144
+ # gpt-4o-mini has started to regress with pretty bad emoji spam loops (2025-07)
145
+ if "gpt-4o-mini" in model_name or "gpt-4.1-mini" in model_name:
146
+ llm_config.frequency_penalty = 1.0
147
+ return llm_config
148
+
149
+ async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
150
+ if self.base_url == "https://api.openai.com/v1":
151
+ # TODO: actually automatically list models for OpenAI
152
+ return [
153
+ EmbeddingConfig(
154
+ embedding_model="text-embedding-ada-002",
155
+ embedding_endpoint_type="openai",
156
+ embedding_endpoint=self.base_url,
157
+ embedding_dim=1536,
158
+ embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
159
+ handle=self.get_handle("text-embedding-ada-002", is_embedding=True),
160
+ batch_size=DEFAULT_EMBEDDING_BATCH_SIZE,
161
+ ),
162
+ EmbeddingConfig(
163
+ embedding_model="text-embedding-3-small",
164
+ embedding_endpoint_type="openai",
165
+ embedding_endpoint=self.base_url,
166
+ embedding_dim=2000,
167
+ embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
168
+ handle=self.get_handle("text-embedding-3-small", is_embedding=True),
169
+ batch_size=DEFAULT_EMBEDDING_BATCH_SIZE,
170
+ ),
171
+ EmbeddingConfig(
172
+ embedding_model="text-embedding-3-large",
173
+ embedding_endpoint_type="openai",
174
+ embedding_endpoint=self.base_url,
175
+ embedding_dim=2000,
176
+ embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
177
+ handle=self.get_handle("text-embedding-3-large", is_embedding=True),
178
+ batch_size=DEFAULT_EMBEDDING_BATCH_SIZE,
179
+ ),
180
+ ]
181
+ else:
182
+ # TODO: this has filtering that doesn't apply for embedding models, fix this.
183
+ data = await self._get_models_async()
184
+ return self._list_embedding_models(data)
185
+
186
+ def _list_embedding_models(self, data) -> list[EmbeddingConfig]:
187
+ configs = []
188
+ for model in data:
189
+ check = self._do_model_checks_for_name_and_context_size(model)
190
+ if check is None:
191
+ continue
192
+ model_name, context_window_size = check
193
+
194
+ # ===== Provider filtering =====
195
+ # TogetherAI: includes the type, which we can use to filter for embedding models
196
+ if "api.together.ai" in self.base_url or "api.together.xyz" in self.base_url:
197
+ if "type" in model and model["type"] not in ["embedding"]:
198
+ continue
199
+ # Nebius: includes the type, which we can use to filter for text models
200
+ elif "nebius.com" in self.base_url:
201
+ model_type = model.get("architecture", {}).get("modality")
202
+ if model_type not in ["text->embedding"]:
203
+ continue
204
+ else:
205
+ logger.info(
206
+ f"Skipping embedding models for %s by default, as we don't assume embeddings are supported."
207
+ "Please open an issue on GitHub if support is required.",
208
+ self.base_url,
209
+ )
210
+ continue
211
+
212
+ configs.append(
213
+ EmbeddingConfig(
214
+ embedding_model=model_name,
215
+ embedding_endpoint_type=self.provider_type,
216
+ embedding_endpoint=self.base_url,
217
+ embedding_dim=context_window_size,
218
+ embedding_chunk_size=DEFAULT_EMBEDDING_CHUNK_SIZE,
219
+ handle=self.get_handle(model, is_embedding=True),
220
+ )
221
+ )
222
+
223
+ return configs
224
+
225
+ def get_model_context_window_size(self, model_name: str) -> int | None:
226
+ if model_name in LLM_MAX_TOKENS:
227
+ return LLM_MAX_TOKENS[model_name]
228
+ else:
229
+ logger.debug(
230
+ f"Model %s on %s for provider %s not found in LLM_MAX_TOKENS. Using default of {{LLM_MAX_TOKENS['DEFAULT']}}",
231
+ model_name,
232
+ self.base_url,
233
+ self.__class__.__name__,
234
+ )
235
+ return LLM_MAX_TOKENS["DEFAULT"]
236
+
237
+ def get_model_context_window(self, model_name: str) -> int | None:
238
+ return self.get_model_context_window_size(model_name)
239
+
240
+ async def get_model_context_window_async(self, model_name: str) -> int | None:
241
+ return self.get_model_context_window_size(model_name)
@@ -0,0 +1,85 @@
1
+ """
2
+ Note: this supports completions (deprecated by openai) and chat completions via the OpenAI API.
3
+ """
4
+
5
+ from typing import Literal
6
+
7
+ from pydantic import Field
8
+
9
+ from letta.constants import MIN_CONTEXT_WINDOW
10
+ from letta.schemas.embedding_config import EmbeddingConfig
11
+ from letta.schemas.enums import ProviderCategory, ProviderType
12
+ from letta.schemas.llm_config import LLMConfig
13
+ from letta.schemas.providers.openai import OpenAIProvider
14
+
15
+
16
+ class TogetherProvider(OpenAIProvider):
17
+ provider_type: Literal[ProviderType.together] = Field(ProviderType.together, description="The type of the provider.")
18
+ provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
19
+ base_url: str = "https://api.together.xyz/v1"
20
+ api_key: str = Field(..., description="API key for the Together API.")
21
+ default_prompt_formatter: str = Field(..., description="Default prompt formatter (aka model wrapper) to use on vLLM /completions API.")
22
+
23
+ async def list_llm_models_async(self) -> list[LLMConfig]:
24
+ from letta.llm_api.openai import openai_get_model_list_async
25
+
26
+ models = await openai_get_model_list_async(self.base_url, api_key=self.api_key)
27
+ return self._list_llm_models(models)
28
+
29
+ async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
30
+ import warnings
31
+
32
+ warnings.warn(
33
+ "Letta does not currently support listing embedding models for Together. Please "
34
+ "contact support or reach out via GitHub or Discord to get support."
35
+ )
36
+ return []
37
+
38
+ # TODO (cliandy): verify this with openai
39
+ def _list_llm_models(self, models) -> list[LLMConfig]:
40
+ pass
41
+
42
+ # TogetherAI's response is missing the 'data' field
43
+ # assert "data" in response, f"OpenAI model query response missing 'data' field: {response}"
44
+ if "data" in models:
45
+ data = models["data"]
46
+ else:
47
+ data = models
48
+
49
+ configs = []
50
+ for model in data:
51
+ assert "id" in model, f"TogetherAI model missing 'id' field: {model}"
52
+ model_name = model["id"]
53
+
54
+ if "context_length" in model:
55
+ # Context length is returned in OpenRouter as "context_length"
56
+ context_window_size = model["context_length"]
57
+ else:
58
+ context_window_size = self.get_model_context_window_size(model_name)
59
+
60
+ # We need the context length for embeddings too
61
+ if not context_window_size:
62
+ continue
63
+
64
+ # Skip models that are too small for Letta
65
+ if context_window_size <= MIN_CONTEXT_WINDOW:
66
+ continue
67
+
68
+ # TogetherAI includes the type, which we can use to filter for embedding models
69
+ if "type" in model and model["type"] not in ["chat", "language"]:
70
+ continue
71
+
72
+ configs.append(
73
+ LLMConfig(
74
+ model=model_name,
75
+ model_endpoint_type="together",
76
+ model_endpoint=self.base_url,
77
+ model_wrapper=self.default_prompt_formatter,
78
+ context_window=context_window_size,
79
+ handle=self.get_handle(model_name),
80
+ provider_name=self.name,
81
+ provider_category=self.provider_category,
82
+ )
83
+ )
84
+
85
+ return configs
@@ -0,0 +1,57 @@
1
+ """
2
+ Note: this consolidates the vLLM provider for completions (deprecated by openai)
3
+ and chat completions. Support is provided primarily for the chat completions endpoint,
4
+ but to utilize the completions endpoint, set the proper `base_url` and
5
+ `default_prompt_formatter`.
6
+ """
7
+
8
+ from typing import Literal
9
+
10
+ from pydantic import Field
11
+
12
+ from letta.schemas.embedding_config import EmbeddingConfig
13
+ from letta.schemas.enums import ProviderCategory, ProviderType
14
+ from letta.schemas.llm_config import LLMConfig
15
+ from letta.schemas.providers.base import Provider
16
+
17
+
18
+ class VLLMProvider(Provider):
19
+ provider_type: Literal[ProviderType.vllm] = Field(ProviderType.vllm, description="The type of the provider.")
20
+ provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
21
+ base_url: str = Field(..., description="Base URL for the vLLM API.")
22
+ api_key: str | None = Field(None, description="API key for the vLLM API.")
23
+ default_prompt_formatter: str | None = Field(
24
+ default=None, description="Default prompt formatter (aka model wrapper) to use on a /completions style API."
25
+ )
26
+
27
+ async def list_llm_models_async(self) -> list[LLMConfig]:
28
+ from letta.llm_api.openai import openai_get_model_list_async
29
+
30
+ # TODO (cliandy): previously unsupported with vLLM; confirm if this is still the case or not
31
+ response = await openai_get_model_list_async(self.base_url, api_key=self.api_key)
32
+
33
+ data = response.get("data", response)
34
+
35
+ configs = []
36
+ for model in data:
37
+ model_name = model["id"]
38
+
39
+ configs.append(
40
+ LLMConfig(
41
+ model=model_name,
42
+ model_endpoint_type="openai", # TODO (cliandy): this was previous vllm for the completions provider, why?
43
+ model_endpoint=self.base_url,
44
+ model_wrapper=self.default_prompt_formatter,
45
+ context_window=model["max_model_len"],
46
+ handle=self.get_handle(model_name),
47
+ provider_name=self.name,
48
+ provider_category=self.provider_category,
49
+ )
50
+ )
51
+
52
+ return configs
53
+
54
+ async def list_embedding_models_async(self) -> list[EmbeddingConfig]:
55
+ # Note: vLLM technically can support embedding models though may require multiple instances
56
+ # for now, we will not support embedding models for vLLM.
57
+ return []
@@ -0,0 +1,66 @@
1
+ import warnings
2
+ from typing import Literal
3
+
4
+ from pydantic import Field
5
+
6
+ from letta.schemas.enums import ProviderCategory, ProviderType
7
+ from letta.schemas.llm_config import LLMConfig
8
+ from letta.schemas.providers.openai import OpenAIProvider
9
+
10
+ MODEL_CONTEXT_WINDOWS = {
11
+ "grok-3-fast": 131_072,
12
+ "grok-3": 131_072,
13
+ "grok-3-mini": 131_072,
14
+ "grok-3-mini-fast": 131_072,
15
+ "grok-4-0709": 256_000,
16
+ }
17
+
18
+
19
+ class XAIProvider(OpenAIProvider):
20
+ """https://docs.x.ai/docs/api-reference"""
21
+
22
+ provider_type: Literal[ProviderType.xai] = Field(ProviderType.xai, description="The type of the provider.")
23
+ provider_category: ProviderCategory = Field(ProviderCategory.base, description="The category of the provider (base or byok)")
24
+ api_key: str = Field(..., description="API key for the xAI/Grok API.")
25
+ base_url: str = Field("https://api.x.ai/v1", description="Base URL for the xAI/Grok API.")
26
+
27
+ def get_model_context_window_size(self, model_name: str) -> int | None:
28
+ # xAI doesn't return context window in the model listing,
29
+ # this is hardcoded from https://docs.x.ai/docs/models
30
+ return MODEL_CONTEXT_WINDOWS.get(model_name)
31
+
32
+ async def list_llm_models_async(self) -> list[LLMConfig]:
33
+ from letta.llm_api.openai import openai_get_model_list_async
34
+
35
+ response = await openai_get_model_list_async(self.base_url, api_key=self.api_key)
36
+
37
+ data = response.get("data", response)
38
+
39
+ configs = []
40
+ for model in data:
41
+ assert "id" in model, f"xAI/Grok model missing 'id' field: {model}"
42
+ model_name = model["id"]
43
+
44
+ # In case xAI starts supporting it in the future:
45
+ if "context_length" in model:
46
+ context_window_size = model["context_length"]
47
+ else:
48
+ context_window_size = self.get_model_context_window_size(model_name)
49
+
50
+ if not context_window_size:
51
+ warnings.warn(f"Couldn't find context window size for model {model_name}")
52
+ continue
53
+
54
+ configs.append(
55
+ LLMConfig(
56
+ model=model_name,
57
+ model_endpoint_type="xai",
58
+ model_endpoint=self.base_url,
59
+ context_window=context_window_size,
60
+ handle=self.get_handle(model_name),
61
+ provider_name=self.name,
62
+ provider_category=self.provider_category,
63
+ )
64
+ )
65
+
66
+ return configs
letta/server/db.py CHANGED
@@ -400,11 +400,6 @@ class DatabaseRegistry:
400
400
  """Trace sync db caller information for debugging purposes."""
401
401
  pass # wrapper used for otel tracing only
402
402
 
403
- @trace_method
404
- def session_caller_trace(self, caller_info: str):
405
- """Trace sync db caller information for debugging purposes."""
406
- pass # wrapper used for otel tracing only
407
-
408
403
 
409
404
  # Create a singleton instance
410
405
  db_registry = DatabaseRegistry()
@@ -407,9 +407,10 @@ def start_server(
407
407
  address=host or "127.0.0.1", # Note granian address must be an ip address
408
408
  port=port or REST_DEFAULT_PORT,
409
409
  workers=settings.uvicorn_workers,
410
- # threads=
410
+ # runtime_blocking_threads=
411
+ # runtime_threads=
411
412
  reload=reload or settings.uvicorn_reload,
412
- reload_ignore_patterns=["openapi_letta.json"],
413
+ reload_paths=["letta/"],
413
414
  reload_ignore_worker_failure=True,
414
415
  reload_tick=4000, # set to 4s to prevent crashing on weird state
415
416
  # log_level="info"
@@ -451,7 +452,7 @@ def start_server(
451
452
  # runtime_blocking_threads=
452
453
  # runtime_threads=
453
454
  reload=reload or settings.uvicorn_reload,
454
- reload_paths=["../letta/"],
455
+ reload_paths=["letta/"],
455
456
  reload_ignore_worker_failure=True,
456
457
  reload_tick=4000, # set to 4s to prevent crashing on weird state
457
458
  # log_level="info"
@@ -1,6 +1,7 @@
1
1
  from letta.server.rest_api.routers.v1.agents import router as agents_router
2
2
  from letta.server.rest_api.routers.v1.blocks import router as blocks_router
3
3
  from letta.server.rest_api.routers.v1.embeddings import router as embeddings_router
4
+ from letta.server.rest_api.routers.v1.folders import router as folders_router
4
5
  from letta.server.rest_api.routers.v1.groups import router as groups_router
5
6
  from letta.server.rest_api.routers.v1.health import router as health_router
6
7
  from letta.server.rest_api.routers.v1.identities import router as identities_router
@@ -20,6 +21,7 @@ from letta.server.rest_api.routers.v1.voice import router as voice_router
20
21
  ROUTERS = [
21
22
  tools_router,
22
23
  sources_router,
24
+ folders_router,
23
25
  agents_router,
24
26
  groups_router,
25
27
  identities_router,