letta-nightly 0.4.1.dev20241008104105__py3-none-any.whl → 0.4.1.dev20241010104112__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/agent.py +18 -2
- letta/agent_store/db.py +23 -7
- letta/cli/cli.py +2 -1
- letta/cli/cli_config.py +1 -1098
- letta/client/client.py +8 -1
- letta/client/utils.py +7 -2
- letta/credentials.py +2 -2
- letta/embeddings.py +3 -0
- letta/functions/schema_generator.py +1 -1
- letta/interface.py +6 -2
- letta/llm_api/anthropic.py +3 -24
- letta/llm_api/azure_openai.py +47 -98
- letta/llm_api/azure_openai_constants.py +10 -0
- letta/llm_api/google_ai.py +38 -63
- letta/llm_api/helpers.py +64 -2
- letta/llm_api/llm_api_tools.py +6 -15
- letta/llm_api/openai.py +6 -49
- letta/local_llm/constants.py +3 -0
- letta/main.py +1 -1
- letta/metadata.py +2 -0
- letta/providers.py +165 -31
- letta/schemas/agent.py +14 -0
- letta/schemas/llm_config.py +0 -3
- letta/schemas/openai/chat_completion_response.py +3 -0
- letta/schemas/tool.py +3 -3
- letta/server/rest_api/routers/openai/assistants/threads.py +5 -5
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +2 -2
- letta/server/rest_api/routers/v1/agents.py +11 -11
- letta/server/rest_api/routers/v1/blocks.py +2 -2
- letta/server/rest_api/routers/v1/jobs.py +2 -2
- letta/server/rest_api/routers/v1/sources.py +12 -12
- letta/server/rest_api/routers/v1/tools.py +6 -6
- letta/server/server.py +26 -7
- letta/settings.py +3 -112
- letta/streaming_interface.py +8 -4
- {letta_nightly-0.4.1.dev20241008104105.dist-info → letta_nightly-0.4.1.dev20241010104112.dist-info}/METADATA +1 -1
- {letta_nightly-0.4.1.dev20241008104105.dist-info → letta_nightly-0.4.1.dev20241010104112.dist-info}/RECORD +40 -42
- letta/configs/anthropic.json +0 -13
- letta/configs/letta_hosted.json +0 -11
- letta/configs/openai.json +0 -12
- {letta_nightly-0.4.1.dev20241008104105.dist-info → letta_nightly-0.4.1.dev20241010104112.dist-info}/LICENSE +0 -0
- {letta_nightly-0.4.1.dev20241008104105.dist-info → letta_nightly-0.4.1.dev20241010104112.dist-info}/WHEEL +0 -0
- {letta_nightly-0.4.1.dev20241008104105.dist-info → letta_nightly-0.4.1.dev20241010104112.dist-info}/entry_points.txt +0 -0
letta/llm_api/llm_api_tools.py
CHANGED
|
@@ -28,7 +28,6 @@ from letta.local_llm.constants import (
|
|
|
28
28
|
INNER_THOUGHTS_KWARG,
|
|
29
29
|
INNER_THOUGHTS_KWARG_DESCRIPTION,
|
|
30
30
|
)
|
|
31
|
-
from letta.providers import GoogleAIProvider
|
|
32
31
|
from letta.schemas.enums import OptionState
|
|
33
32
|
from letta.schemas.llm_config import LLMConfig
|
|
34
33
|
from letta.schemas.message import Message
|
|
@@ -189,6 +188,9 @@ def create(
|
|
|
189
188
|
if model_settings.azure_base_url is None:
|
|
190
189
|
raise ValueError(f"Azure base url is missing. Did you set AZURE_BASE_URL in your env?")
|
|
191
190
|
|
|
191
|
+
if model_settings.azure_api_version is None:
|
|
192
|
+
raise ValueError(f"Azure API version is missing. Did you set AZURE_API_VERSION in your env?")
|
|
193
|
+
|
|
192
194
|
# Set the llm config model_endpoint from model_settings
|
|
193
195
|
# For Azure, this model_endpoint is required to be configured via env variable, so users don't need to provide it in the LLM config
|
|
194
196
|
llm_config.model_endpoint = model_settings.azure_base_url
|
|
@@ -215,20 +217,15 @@ def create(
|
|
|
215
217
|
if not use_tool_naming:
|
|
216
218
|
raise NotImplementedError("Only tool calling supported on Google AI API requests")
|
|
217
219
|
|
|
218
|
-
# NOTE: until Google AI supports CoT / text alongside function calls,
|
|
219
|
-
# we need to put it in a kwarg (unless we want to split the message into two)
|
|
220
|
-
google_ai_inner_thoughts_in_kwarg = True
|
|
221
|
-
|
|
222
220
|
if functions is not None:
|
|
223
221
|
tools = [{"type": "function", "function": f} for f in functions]
|
|
224
222
|
tools = [Tool(**t) for t in tools]
|
|
225
|
-
tools = convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=
|
|
223
|
+
tools = convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=True)
|
|
226
224
|
else:
|
|
227
225
|
tools = None
|
|
228
226
|
|
|
229
227
|
return google_ai_chat_completions_request(
|
|
230
|
-
|
|
231
|
-
service_endpoint=GoogleAIProvider(model_settings.gemini_api_key).service_endpoint,
|
|
228
|
+
base_url=llm_config.model_endpoint,
|
|
232
229
|
model=llm_config.model,
|
|
233
230
|
api_key=model_settings.gemini_api_key,
|
|
234
231
|
# see structure of payload here: https://ai.google.dev/docs/function_calling
|
|
@@ -236,6 +233,7 @@ def create(
|
|
|
236
233
|
contents=[m.to_google_ai_dict() for m in messages],
|
|
237
234
|
tools=tools,
|
|
238
235
|
),
|
|
236
|
+
inner_thoughts_in_kwargs=True,
|
|
239
237
|
)
|
|
240
238
|
|
|
241
239
|
elif llm_config.model_endpoint_type == "anthropic":
|
|
@@ -244,12 +242,6 @@ def create(
|
|
|
244
242
|
if not use_tool_naming:
|
|
245
243
|
raise NotImplementedError("Only tool calling supported on Anthropic API requests")
|
|
246
244
|
|
|
247
|
-
if functions is not None:
|
|
248
|
-
tools = [{"type": "function", "function": f} for f in functions]
|
|
249
|
-
tools = [Tool(**t) for t in tools]
|
|
250
|
-
else:
|
|
251
|
-
tools = None
|
|
252
|
-
|
|
253
245
|
return anthropic_chat_completions_request(
|
|
254
246
|
url=llm_config.model_endpoint,
|
|
255
247
|
api_key=model_settings.anthropic_api_key,
|
|
@@ -296,7 +288,6 @@ def create(
|
|
|
296
288
|
raise NotImplementedError(f"Streaming not yet implemented for Groq.")
|
|
297
289
|
|
|
298
290
|
if model_settings.groq_api_key is None and llm_config.model_endpoint == "https://api.groq.com/openai/v1/chat/completions":
|
|
299
|
-
# only is a problem if we are *not* using an openai proxy
|
|
300
291
|
raise ValueError(f"Groq key is missing from letta config file")
|
|
301
292
|
|
|
302
293
|
# force to true for groq, since they don't support 'content' is non-null
|
letta/llm_api/openai.py
CHANGED
|
@@ -9,7 +9,7 @@ from httpx_sse._exceptions import SSEError
|
|
|
9
9
|
|
|
10
10
|
from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
|
|
11
11
|
from letta.errors import LLMError
|
|
12
|
-
from letta.llm_api.helpers import add_inner_thoughts_to_functions
|
|
12
|
+
from letta.llm_api.helpers import add_inner_thoughts_to_functions, make_post_request
|
|
13
13
|
from letta.local_llm.constants import (
|
|
14
14
|
INNER_THOUGHTS_KWARG,
|
|
15
15
|
INNER_THOUGHTS_KWARG_DESCRIPTION,
|
|
@@ -145,6 +145,7 @@ def build_openai_chat_completions_request(
|
|
|
145
145
|
import uuid
|
|
146
146
|
|
|
147
147
|
data.user = str(uuid.UUID(int=0))
|
|
148
|
+
data.model = "memgpt-openai"
|
|
148
149
|
|
|
149
150
|
return data
|
|
150
151
|
|
|
@@ -483,58 +484,14 @@ def openai_chat_completions_request(
|
|
|
483
484
|
data.pop("tools")
|
|
484
485
|
data.pop("tool_choice", None) # extra safe, should exist always (default="auto")
|
|
485
486
|
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
response = requests.post(url, headers=headers, json=data)
|
|
489
|
-
printd(f"response = {response}, response.text = {response.text}")
|
|
490
|
-
# print(json.dumps(data, indent=4))
|
|
491
|
-
# raise requests.exceptions.HTTPError
|
|
492
|
-
response.raise_for_status() # Raises HTTPError for 4XX/5XX status
|
|
493
|
-
|
|
494
|
-
response = response.json() # convert to dict from string
|
|
495
|
-
printd(f"response.json = {response}")
|
|
496
|
-
|
|
497
|
-
response = ChatCompletionResponse(**response) # convert to 'dot-dict' style which is the openai python client default
|
|
498
|
-
return response
|
|
499
|
-
except requests.exceptions.HTTPError as http_err:
|
|
500
|
-
# Handle HTTP errors (e.g., response 4XX, 5XX)
|
|
501
|
-
printd(f"Got HTTPError, exception={http_err}, payload={data}")
|
|
502
|
-
raise http_err
|
|
503
|
-
except requests.exceptions.RequestException as req_err:
|
|
504
|
-
# Handle other requests-related errors (e.g., connection error)
|
|
505
|
-
printd(f"Got RequestException, exception={req_err}")
|
|
506
|
-
raise req_err
|
|
507
|
-
except Exception as e:
|
|
508
|
-
# Handle other potential errors
|
|
509
|
-
printd(f"Got unknown Exception, exception={e}")
|
|
510
|
-
raise e
|
|
487
|
+
response_json = make_post_request(url, headers, data)
|
|
488
|
+
return ChatCompletionResponse(**response_json)
|
|
511
489
|
|
|
512
490
|
|
|
513
491
|
def openai_embeddings_request(url: str, api_key: str, data: dict) -> EmbeddingResponse:
|
|
514
492
|
"""https://platform.openai.com/docs/api-reference/embeddings/create"""
|
|
515
|
-
from letta.utils import printd
|
|
516
493
|
|
|
517
494
|
url = smart_urljoin(url, "embeddings")
|
|
518
495
|
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
try:
|
|
522
|
-
response = requests.post(url, headers=headers, json=data)
|
|
523
|
-
printd(f"response = {response}")
|
|
524
|
-
response.raise_for_status() # Raises HTTPError for 4XX/5XX status
|
|
525
|
-
response = response.json() # convert to dict from string
|
|
526
|
-
printd(f"response.json = {response}")
|
|
527
|
-
response = EmbeddingResponse(**response) # convert to 'dot-dict' style which is the openai python client default
|
|
528
|
-
return response
|
|
529
|
-
except requests.exceptions.HTTPError as http_err:
|
|
530
|
-
# Handle HTTP errors (e.g., response 4XX, 5XX)
|
|
531
|
-
printd(f"Got HTTPError, exception={http_err}, payload={data}")
|
|
532
|
-
raise http_err
|
|
533
|
-
except requests.exceptions.RequestException as req_err:
|
|
534
|
-
# Handle other requests-related errors (e.g., connection error)
|
|
535
|
-
printd(f"Got RequestException, exception={req_err}")
|
|
536
|
-
raise req_err
|
|
537
|
-
except Exception as e:
|
|
538
|
-
# Handle other potential errors
|
|
539
|
-
printd(f"Got unknown Exception, exception={e}")
|
|
540
|
-
raise e
|
|
496
|
+
response_json = make_post_request(url, headers, data)
|
|
497
|
+
return EmbeddingResponse(**response_json)
|
letta/local_llm/constants.py
CHANGED
letta/main.py
CHANGED
|
@@ -366,7 +366,7 @@ def run_agent_loop(
|
|
|
366
366
|
first_message=False,
|
|
367
367
|
skip_verify=no_verify,
|
|
368
368
|
stream=stream,
|
|
369
|
-
|
|
369
|
+
inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs,
|
|
370
370
|
ms=ms,
|
|
371
371
|
)
|
|
372
372
|
new_messages = step_response.messages
|
letta/metadata.py
CHANGED
|
@@ -218,6 +218,7 @@ class AgentModel(Base):
|
|
|
218
218
|
tools = Column(JSON)
|
|
219
219
|
|
|
220
220
|
# configs
|
|
221
|
+
agent_type = Column(String)
|
|
221
222
|
llm_config = Column(LLMConfigColumn)
|
|
222
223
|
embedding_config = Column(EmbeddingConfigColumn)
|
|
223
224
|
|
|
@@ -243,6 +244,7 @@ class AgentModel(Base):
|
|
|
243
244
|
memory=Memory.load(self.memory), # load dictionary
|
|
244
245
|
system=self.system,
|
|
245
246
|
tools=self.tools,
|
|
247
|
+
agent_type=self.agent_type,
|
|
246
248
|
llm_config=self.llm_config,
|
|
247
249
|
embedding_config=self.embedding_config,
|
|
248
250
|
metadata_=self.metadata_,
|
letta/providers.py
CHANGED
|
@@ -1,14 +1,18 @@
|
|
|
1
1
|
from typing import List, Optional
|
|
2
2
|
|
|
3
|
-
from pydantic import BaseModel, Field
|
|
3
|
+
from pydantic import BaseModel, Field, model_validator
|
|
4
4
|
|
|
5
5
|
from letta.constants import LLM_MAX_TOKENS
|
|
6
|
+
from letta.llm_api.azure_openai import (
|
|
7
|
+
get_azure_chat_completions_endpoint,
|
|
8
|
+
get_azure_embeddings_endpoint,
|
|
9
|
+
)
|
|
10
|
+
from letta.llm_api.azure_openai_constants import AZURE_MODEL_TO_CONTEXT_LENGTH
|
|
6
11
|
from letta.schemas.embedding_config import EmbeddingConfig
|
|
7
12
|
from letta.schemas.llm_config import LLMConfig
|
|
8
13
|
|
|
9
14
|
|
|
10
15
|
class Provider(BaseModel):
|
|
11
|
-
base_url: str
|
|
12
16
|
|
|
13
17
|
def list_llm_models(self):
|
|
14
18
|
return []
|
|
@@ -20,6 +24,32 @@ class Provider(BaseModel):
|
|
|
20
24
|
pass
|
|
21
25
|
|
|
22
26
|
|
|
27
|
+
class LettaProvider(Provider):
|
|
28
|
+
|
|
29
|
+
name: str = "letta"
|
|
30
|
+
|
|
31
|
+
def list_llm_models(self) -> List[LLMConfig]:
|
|
32
|
+
return [
|
|
33
|
+
LLMConfig(
|
|
34
|
+
model="letta-free", # NOTE: renamed
|
|
35
|
+
model_endpoint_type="openai",
|
|
36
|
+
model_endpoint="https://inference.memgpt.ai",
|
|
37
|
+
context_window=16384,
|
|
38
|
+
)
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
def list_embedding_models(self):
|
|
42
|
+
return [
|
|
43
|
+
EmbeddingConfig(
|
|
44
|
+
embedding_model="letta-free", # NOTE: renamed
|
|
45
|
+
embedding_endpoint_type="hugging-face",
|
|
46
|
+
embedding_endpoint="https://embeddings.memgpt.ai",
|
|
47
|
+
embedding_dim=1024,
|
|
48
|
+
embedding_chunk_size=300,
|
|
49
|
+
)
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
|
|
23
53
|
class OpenAIProvider(Provider):
|
|
24
54
|
name: str = "openai"
|
|
25
55
|
api_key: str = Field(..., description="API key for the OpenAI API.")
|
|
@@ -122,34 +152,64 @@ class OllamaProvider(OpenAIProvider):
|
|
|
122
152
|
response = requests.post(f"{self.base_url}/api/show", json={"name": model_name, "verbose": True})
|
|
123
153
|
response_json = response.json()
|
|
124
154
|
|
|
125
|
-
|
|
126
|
-
possible_keys = [
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
]
|
|
142
|
-
|
|
155
|
+
## thank you vLLM: https://github.com/vllm-project/vllm/blob/main/vllm/config.py#L1675
|
|
156
|
+
# possible_keys = [
|
|
157
|
+
# # OPT
|
|
158
|
+
# "max_position_embeddings",
|
|
159
|
+
# # GPT-2
|
|
160
|
+
# "n_positions",
|
|
161
|
+
# # MPT
|
|
162
|
+
# "max_seq_len",
|
|
163
|
+
# # ChatGLM2
|
|
164
|
+
# "seq_length",
|
|
165
|
+
# # Command-R
|
|
166
|
+
# "model_max_length",
|
|
167
|
+
# # Others
|
|
168
|
+
# "max_sequence_length",
|
|
169
|
+
# "max_seq_length",
|
|
170
|
+
# "seq_len",
|
|
171
|
+
# ]
|
|
143
172
|
# max_position_embeddings
|
|
144
173
|
# parse model cards: nous, dolphon, llama
|
|
145
174
|
for key, value in response_json["model_info"].items():
|
|
146
|
-
if "
|
|
175
|
+
if "context_length" in key:
|
|
176
|
+
return value
|
|
177
|
+
return None
|
|
178
|
+
|
|
179
|
+
def get_model_embedding_dim(self, model_name: str):
|
|
180
|
+
import requests
|
|
181
|
+
|
|
182
|
+
response = requests.post(f"{self.base_url}/api/show", json={"name": model_name, "verbose": True})
|
|
183
|
+
response_json = response.json()
|
|
184
|
+
for key, value in response_json["model_info"].items():
|
|
185
|
+
if "embedding_length" in key:
|
|
147
186
|
return value
|
|
148
187
|
return None
|
|
149
188
|
|
|
150
189
|
def list_embedding_models(self) -> List[EmbeddingConfig]:
|
|
151
|
-
#
|
|
152
|
-
|
|
190
|
+
# https://github.com/ollama/ollama/blob/main/docs/api.md#list-local-models
|
|
191
|
+
import requests
|
|
192
|
+
|
|
193
|
+
response = requests.get(f"{self.base_url}/api/tags")
|
|
194
|
+
if response.status_code != 200:
|
|
195
|
+
raise Exception(f"Failed to list Ollama models: {response.text}")
|
|
196
|
+
response_json = response.json()
|
|
197
|
+
|
|
198
|
+
configs = []
|
|
199
|
+
for model in response_json["models"]:
|
|
200
|
+
embedding_dim = self.get_model_embedding_dim(model["name"])
|
|
201
|
+
if not embedding_dim:
|
|
202
|
+
continue
|
|
203
|
+
configs.append(
|
|
204
|
+
EmbeddingConfig(
|
|
205
|
+
embedding_model=model["name"],
|
|
206
|
+
embedding_endpoint_type="ollama",
|
|
207
|
+
embedding_endpoint=self.base_url,
|
|
208
|
+
embedding_dim=embedding_dim,
|
|
209
|
+
embedding_chunk_size=300,
|
|
210
|
+
)
|
|
211
|
+
)
|
|
212
|
+
return configs
|
|
153
213
|
|
|
154
214
|
|
|
155
215
|
class GroqProvider(OpenAIProvider):
|
|
@@ -182,20 +242,21 @@ class GroqProvider(OpenAIProvider):
|
|
|
182
242
|
class GoogleAIProvider(Provider):
|
|
183
243
|
# gemini
|
|
184
244
|
api_key: str = Field(..., description="API key for the Google AI API.")
|
|
185
|
-
service_endpoint: str = "generativelanguage"
|
|
186
245
|
base_url: str = "https://generativelanguage.googleapis.com"
|
|
187
246
|
|
|
188
247
|
def list_llm_models(self):
|
|
189
248
|
from letta.llm_api.google_ai import google_ai_get_model_list
|
|
190
249
|
|
|
191
|
-
|
|
192
|
-
|
|
250
|
+
model_options = google_ai_get_model_list(base_url=self.base_url, api_key=self.api_key)
|
|
251
|
+
# filter by 'generateContent' models
|
|
252
|
+
model_options = [mo for mo in model_options if "generateContent" in mo["supportedGenerationMethods"]]
|
|
193
253
|
model_options = [str(m["name"]) for m in model_options]
|
|
254
|
+
|
|
255
|
+
# filter by model names
|
|
194
256
|
model_options = [mo[len("models/") :] if mo.startswith("models/") else mo for mo in model_options]
|
|
257
|
+
|
|
195
258
|
# TODO remove manual filtering for gemini-pro
|
|
196
259
|
model_options = [mo for mo in model_options if str(mo).startswith("gemini") and "-pro" in str(mo)]
|
|
197
|
-
# TODO: add context windows
|
|
198
|
-
# model_options = ["gemini-pro"]
|
|
199
260
|
|
|
200
261
|
configs = []
|
|
201
262
|
for model in model_options:
|
|
@@ -210,21 +271,94 @@ class GoogleAIProvider(Provider):
|
|
|
210
271
|
return configs
|
|
211
272
|
|
|
212
273
|
def list_embedding_models(self):
|
|
213
|
-
|
|
274
|
+
from letta.llm_api.google_ai import google_ai_get_model_list
|
|
275
|
+
|
|
276
|
+
# TODO: use base_url instead
|
|
277
|
+
model_options = google_ai_get_model_list(base_url=self.base_url, api_key=self.api_key)
|
|
278
|
+
# filter by 'generateContent' models
|
|
279
|
+
model_options = [mo for mo in model_options if "embedContent" in mo["supportedGenerationMethods"]]
|
|
280
|
+
model_options = [str(m["name"]) for m in model_options]
|
|
281
|
+
model_options = [mo[len("models/") :] if mo.startswith("models/") else mo for mo in model_options]
|
|
282
|
+
|
|
283
|
+
configs = []
|
|
284
|
+
for model in model_options:
|
|
285
|
+
configs.append(
|
|
286
|
+
EmbeddingConfig(
|
|
287
|
+
embedding_model=model,
|
|
288
|
+
embedding_endpoint_type="google_ai",
|
|
289
|
+
embedding_endpoint=self.base_url,
|
|
290
|
+
embedding_dim=768,
|
|
291
|
+
embedding_chunk_size=300, # NOTE: max is 2048
|
|
292
|
+
)
|
|
293
|
+
)
|
|
294
|
+
return configs
|
|
214
295
|
|
|
215
296
|
def get_model_context_window(self, model_name: str):
|
|
216
297
|
from letta.llm_api.google_ai import google_ai_get_model_context_window
|
|
217
298
|
|
|
218
|
-
|
|
219
|
-
return google_ai_get_model_context_window(self.service_endpoint, self.api_key, model_name)
|
|
299
|
+
return google_ai_get_model_context_window(self.base_url, self.api_key, model_name)
|
|
220
300
|
|
|
221
301
|
|
|
222
302
|
class AzureProvider(Provider):
|
|
223
303
|
name: str = "azure"
|
|
304
|
+
latest_api_version: str = "2024-09-01-preview" # https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation
|
|
224
305
|
base_url: str = Field(
|
|
225
306
|
..., description="Base URL for the Azure API endpoint. This should be specific to your org, e.g. `https://letta.openai.azure.com`."
|
|
226
307
|
)
|
|
227
308
|
api_key: str = Field(..., description="API key for the Azure API.")
|
|
309
|
+
api_version: str = Field(latest_api_version, description="API version for the Azure API")
|
|
310
|
+
|
|
311
|
+
@model_validator(mode="before")
|
|
312
|
+
def set_default_api_version(cls, values):
|
|
313
|
+
"""
|
|
314
|
+
This ensures that api_version is always set to the default if None is passed in.
|
|
315
|
+
"""
|
|
316
|
+
if values.get("api_version") is None:
|
|
317
|
+
values["api_version"] = cls.model_fields["latest_api_version"].default
|
|
318
|
+
return values
|
|
319
|
+
|
|
320
|
+
def list_llm_models(self) -> List[LLMConfig]:
|
|
321
|
+
from letta.llm_api.azure_openai import (
|
|
322
|
+
azure_openai_get_chat_completion_model_list,
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
model_options = azure_openai_get_chat_completion_model_list(self.base_url, api_key=self.api_key, api_version=self.api_version)
|
|
326
|
+
configs = []
|
|
327
|
+
for model_option in model_options:
|
|
328
|
+
model_name = model_option["id"]
|
|
329
|
+
context_window_size = self.get_model_context_window(model_name)
|
|
330
|
+
model_endpoint = get_azure_chat_completions_endpoint(self.base_url, model_name, self.api_version)
|
|
331
|
+
configs.append(
|
|
332
|
+
LLMConfig(model=model_name, model_endpoint_type="azure", model_endpoint=model_endpoint, context_window=context_window_size)
|
|
333
|
+
)
|
|
334
|
+
return configs
|
|
335
|
+
|
|
336
|
+
def list_embedding_models(self) -> List[EmbeddingConfig]:
|
|
337
|
+
from letta.llm_api.azure_openai import azure_openai_get_embeddings_model_list
|
|
338
|
+
|
|
339
|
+
model_options = azure_openai_get_embeddings_model_list(
|
|
340
|
+
self.base_url, api_key=self.api_key, api_version=self.api_version, require_embedding_in_name=True
|
|
341
|
+
)
|
|
342
|
+
configs = []
|
|
343
|
+
for model_option in model_options:
|
|
344
|
+
model_name = model_option["id"]
|
|
345
|
+
model_endpoint = get_azure_embeddings_endpoint(self.base_url, model_name, self.api_version)
|
|
346
|
+
configs.append(
|
|
347
|
+
EmbeddingConfig(
|
|
348
|
+
embedding_model=model_name,
|
|
349
|
+
embedding_endpoint_type="azure",
|
|
350
|
+
embedding_endpoint=model_endpoint,
|
|
351
|
+
embedding_dim=768,
|
|
352
|
+
embedding_chunk_size=300, # NOTE: max is 2048
|
|
353
|
+
)
|
|
354
|
+
)
|
|
355
|
+
return configs
|
|
356
|
+
|
|
357
|
+
def get_model_context_window(self, model_name: str):
|
|
358
|
+
"""
|
|
359
|
+
This is hardcoded for now, since there is no API endpoints to retrieve metadata for a model.
|
|
360
|
+
"""
|
|
361
|
+
return AZURE_MODEL_TO_CONTEXT_LENGTH.get(model_name, 4096)
|
|
228
362
|
|
|
229
363
|
|
|
230
364
|
class VLLMProvider(OpenAIProvider):
|
letta/schemas/agent.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import uuid
|
|
2
2
|
from datetime import datetime
|
|
3
|
+
from enum import Enum
|
|
3
4
|
from typing import Dict, List, Optional, Union
|
|
4
5
|
|
|
5
6
|
from pydantic import BaseModel, Field, field_validator
|
|
@@ -21,6 +22,15 @@ class BaseAgent(LettaBase, validate_assignment=True):
|
|
|
21
22
|
user_id: Optional[str] = Field(None, description="The user id of the agent.")
|
|
22
23
|
|
|
23
24
|
|
|
25
|
+
class AgentType(str, Enum):
|
|
26
|
+
"""
|
|
27
|
+
Enum to represent the type of agent.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
memgpt_agent = "memgpt_agent"
|
|
31
|
+
split_thread_agent = "split_thread_agent"
|
|
32
|
+
|
|
33
|
+
|
|
24
34
|
class AgentState(BaseAgent):
|
|
25
35
|
"""
|
|
26
36
|
Representation of an agent's state. This is the state of the agent at a given time, and is persisted in the DB backend. The state has all the information needed to recreate a persisted agent.
|
|
@@ -52,6 +62,9 @@ class AgentState(BaseAgent):
|
|
|
52
62
|
# system prompt
|
|
53
63
|
system: str = Field(..., description="The system prompt used by the agent.")
|
|
54
64
|
|
|
65
|
+
# agent configuration
|
|
66
|
+
agent_type: AgentType = Field(..., description="The type of agent.")
|
|
67
|
+
|
|
55
68
|
# llm information
|
|
56
69
|
llm_config: LLMConfig = Field(..., description="The LLM configuration used by the agent.")
|
|
57
70
|
embedding_config: EmbeddingConfig = Field(..., description="The embedding configuration used by the agent.")
|
|
@@ -64,6 +77,7 @@ class CreateAgent(BaseAgent):
|
|
|
64
77
|
memory: Optional[Memory] = Field(None, description="The in-context memory of the agent.")
|
|
65
78
|
tools: Optional[List[str]] = Field(None, description="The tools used by the agent.")
|
|
66
79
|
system: Optional[str] = Field(None, description="The system prompt used by the agent.")
|
|
80
|
+
agent_type: Optional[AgentType] = Field(None, description="The type of agent.")
|
|
67
81
|
llm_config: Optional[LLMConfig] = Field(None, description="The LLM configuration used by the agent.")
|
|
68
82
|
embedding_config: Optional[EmbeddingConfig] = Field(None, description="The embedding configuration used by the agent.")
|
|
69
83
|
|
letta/schemas/llm_config.py
CHANGED
|
@@ -35,9 +35,6 @@ class LLMConfig(BaseModel):
|
|
|
35
35
|
"hugging-face",
|
|
36
36
|
] = Field(..., description="The endpoint type for the model.")
|
|
37
37
|
model_endpoint: Optional[str] = Field(None, description="The endpoint for the model.")
|
|
38
|
-
api_version: Optional[str] = Field(
|
|
39
|
-
None, description="The version for the model API. Used by the Azure provider backend, e.g. 2023-03-15-preview."
|
|
40
|
-
)
|
|
41
38
|
model_wrapper: Optional[str] = Field(None, description="The wrapper for the model.")
|
|
42
39
|
context_window: int = Field(..., description="The context window size for the model.")
|
|
43
40
|
|
|
@@ -74,6 +74,9 @@ class ChatCompletionResponse(BaseModel):
|
|
|
74
74
|
object: Literal["chat.completion"] = "chat.completion"
|
|
75
75
|
usage: UsageStatistics
|
|
76
76
|
|
|
77
|
+
def __str__(self):
|
|
78
|
+
return self.model_dump_json(indent=4)
|
|
79
|
+
|
|
77
80
|
|
|
78
81
|
class FunctionCallDelta(BaseModel):
|
|
79
82
|
# arguments: Optional[str] = None
|
letta/schemas/tool.py
CHANGED
|
@@ -93,7 +93,7 @@ class Tool(BaseTool):
|
|
|
93
93
|
# append heartbeat (necessary for triggering another reasoning step after this tool call)
|
|
94
94
|
json_schema["parameters"]["properties"]["request_heartbeat"] = {
|
|
95
95
|
"type": "boolean",
|
|
96
|
-
"description": "Request an immediate heartbeat after function execution. Set to
|
|
96
|
+
"description": "Request an immediate heartbeat after function execution. Set to `True` if you want to send a follow-up message or run a follow-up function.",
|
|
97
97
|
}
|
|
98
98
|
json_schema["parameters"]["required"].append("request_heartbeat")
|
|
99
99
|
|
|
@@ -128,7 +128,7 @@ class Tool(BaseTool):
|
|
|
128
128
|
# append heartbeat (necessary for triggering another reasoning step after this tool call)
|
|
129
129
|
json_schema["parameters"]["properties"]["request_heartbeat"] = {
|
|
130
130
|
"type": "boolean",
|
|
131
|
-
"description": "Request an immediate heartbeat after function execution. Set to
|
|
131
|
+
"description": "Request an immediate heartbeat after function execution. Set to `True` if you want to send a follow-up message or run a follow-up function.",
|
|
132
132
|
}
|
|
133
133
|
json_schema["parameters"]["required"].append("request_heartbeat")
|
|
134
134
|
|
|
@@ -161,7 +161,7 @@ class Tool(BaseTool):
|
|
|
161
161
|
# append heartbeat (necessary for triggering another reasoning step after this tool call)
|
|
162
162
|
json_schema["parameters"]["properties"]["request_heartbeat"] = {
|
|
163
163
|
"type": "boolean",
|
|
164
|
-
"description": "Request an immediate heartbeat after function execution. Set to
|
|
164
|
+
"description": "Request an immediate heartbeat after function execution. Set to `True` if you want to send a follow-up message or run a follow-up function.",
|
|
165
165
|
}
|
|
166
166
|
json_schema["parameters"]["required"].append("request_heartbeat")
|
|
167
167
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import uuid
|
|
2
|
-
from typing import TYPE_CHECKING, List
|
|
2
|
+
from typing import TYPE_CHECKING, List, Optional
|
|
3
3
|
|
|
4
4
|
from fastapi import APIRouter, Body, Depends, Header, HTTPException, Path, Query
|
|
5
5
|
|
|
@@ -43,7 +43,7 @@ router = APIRouter(prefix="/v1/threads", tags=["threads"])
|
|
|
43
43
|
def create_thread(
|
|
44
44
|
request: CreateThreadRequest = Body(...),
|
|
45
45
|
server: SyncServer = Depends(get_letta_server),
|
|
46
|
-
user_id: str = Header(None), # Extract user_id from header, default to None if not present
|
|
46
|
+
user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
47
47
|
):
|
|
48
48
|
# TODO: use requests.description and requests.metadata fields
|
|
49
49
|
# TODO: handle requests.file_ids and requests.tools
|
|
@@ -68,7 +68,7 @@ def create_thread(
|
|
|
68
68
|
def retrieve_thread(
|
|
69
69
|
thread_id: str = Path(..., description="The unique identifier of the thread."),
|
|
70
70
|
server: SyncServer = Depends(get_letta_server),
|
|
71
|
-
user_id: str = Header(None), # Extract user_id from header, default to None if not present
|
|
71
|
+
user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
72
72
|
):
|
|
73
73
|
actor = server.get_user_or_default(user_id=user_id)
|
|
74
74
|
agent = server.get_agent(user_id=actor.id, agent_id=thread_id)
|
|
@@ -102,7 +102,7 @@ def create_message(
|
|
|
102
102
|
thread_id: str = Path(..., description="The unique identifier of the thread."),
|
|
103
103
|
request: CreateMessageRequest = Body(...),
|
|
104
104
|
server: SyncServer = Depends(get_letta_server),
|
|
105
|
-
user_id: str = Header(None), # Extract user_id from header, default to None if not present
|
|
105
|
+
user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
106
106
|
):
|
|
107
107
|
actor = server.get_user_or_default(user_id=user_id)
|
|
108
108
|
agent_id = thread_id
|
|
@@ -146,7 +146,7 @@ def list_messages(
|
|
|
146
146
|
after: str = Query(None, description="A cursor for use in pagination. `after` is an object ID that defines your place in the list."),
|
|
147
147
|
before: str = Query(None, description="A cursor for use in pagination. `after` is an object ID that defines your place in the list."),
|
|
148
148
|
server: SyncServer = Depends(get_letta_server),
|
|
149
|
-
user_id: str = Header(None), # Extract user_id from header, default to None if not present
|
|
149
|
+
user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
150
150
|
):
|
|
151
151
|
actor = server.get_user_or_default(user_id)
|
|
152
152
|
after_uuid = after if before else None
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
|
-
from typing import TYPE_CHECKING
|
|
2
|
+
from typing import TYPE_CHECKING, Optional
|
|
3
3
|
|
|
4
4
|
from fastapi import APIRouter, Body, Depends, Header, HTTPException
|
|
5
5
|
|
|
@@ -30,7 +30,7 @@ router = APIRouter(prefix="/v1/chat/completions", tags=["chat_completions"])
|
|
|
30
30
|
async def create_chat_completion(
|
|
31
31
|
completion_request: ChatCompletionRequest = Body(...),
|
|
32
32
|
server: "SyncServer" = Depends(get_letta_server),
|
|
33
|
-
user_id: str = Header(None), # Extract user_id from header, default to None if not present
|
|
33
|
+
user_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
|
34
34
|
):
|
|
35
35
|
"""Send a message to a Letta agent via a /chat/completions completion_request
|
|
36
36
|
The bearer token will be used to identify the user.
|