letta-nightly 0.4.1.dev20241007104134__py3-none-any.whl → 0.4.1.dev20241008104105__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/agent.py +19 -9
- letta/credentials.py +1 -1
- letta/errors.py +1 -1
- letta/llm_api/azure_openai.py +15 -19
- letta/llm_api/helpers.py +153 -0
- letta/llm_api/llm_api_tools.py +39 -215
- letta/llm_api/openai.py +70 -2
- letta/providers.py +5 -1
- letta/schemas/llm_config.py +5 -2
- letta/server/rest_api/admin/tools.py +0 -1
- letta/server/rest_api/app.py +1 -17
- letta/server/rest_api/routers/openai/assistants/threads.py +9 -6
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -2
- letta/server/rest_api/routers/v1/agents.py +23 -13
- letta/server/rest_api/routers/v1/blocks.py +5 -3
- letta/server/rest_api/routers/v1/jobs.py +5 -3
- letta/server/rest_api/routers/v1/sources.py +24 -12
- letta/server/rest_api/routers/v1/tools.py +11 -6
- letta/server/server.py +17 -34
- letta/settings.py +2 -1
- {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241008104105.dist-info}/METADATA +1 -1
- {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241008104105.dist-info}/RECORD +25 -24
- {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241008104105.dist-info}/LICENSE +0 -0
- {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241008104105.dist-info}/WHEEL +0 -0
- {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241008104105.dist-info}/entry_points.txt +0 -0
letta/llm_api/llm_api_tools.py
CHANGED
|
@@ -1,25 +1,25 @@
|
|
|
1
|
-
import copy
|
|
2
|
-
import json
|
|
3
1
|
import os
|
|
4
2
|
import random
|
|
5
3
|
import time
|
|
6
|
-
import warnings
|
|
7
4
|
from typing import List, Optional, Union
|
|
8
5
|
|
|
9
6
|
import requests
|
|
10
7
|
|
|
11
|
-
from letta.constants import CLI_WARNING_PREFIX
|
|
8
|
+
from letta.constants import CLI_WARNING_PREFIX
|
|
12
9
|
from letta.llm_api.anthropic import anthropic_chat_completions_request
|
|
13
|
-
from letta.llm_api.azure_openai import
|
|
14
|
-
MODEL_TO_AZURE_ENGINE,
|
|
15
|
-
azure_openai_chat_completions_request,
|
|
16
|
-
)
|
|
10
|
+
from letta.llm_api.azure_openai import azure_openai_chat_completions_request
|
|
17
11
|
from letta.llm_api.cohere import cohere_chat_completions_request
|
|
18
12
|
from letta.llm_api.google_ai import (
|
|
19
13
|
convert_tools_to_google_ai_format,
|
|
20
14
|
google_ai_chat_completions_request,
|
|
21
15
|
)
|
|
16
|
+
from letta.llm_api.helpers import (
|
|
17
|
+
add_inner_thoughts_to_functions,
|
|
18
|
+
derive_inner_thoughts_in_kwargs,
|
|
19
|
+
unpack_all_inner_thoughts_from_kwargs,
|
|
20
|
+
)
|
|
22
21
|
from letta.llm_api.openai import (
|
|
22
|
+
build_openai_chat_completions_request,
|
|
23
23
|
openai_chat_completions_process_stream,
|
|
24
24
|
openai_chat_completions_request,
|
|
25
25
|
)
|
|
@@ -37,144 +37,15 @@ from letta.schemas.openai.chat_completion_request import (
|
|
|
37
37
|
Tool,
|
|
38
38
|
cast_message_to_subtype,
|
|
39
39
|
)
|
|
40
|
-
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
|
40
|
+
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
|
41
41
|
from letta.streaming_interface import (
|
|
42
42
|
AgentChunkStreamingInterface,
|
|
43
43
|
AgentRefreshStreamingInterface,
|
|
44
44
|
)
|
|
45
|
-
from letta.utils import json_dumps
|
|
46
45
|
|
|
47
46
|
LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq"]
|
|
48
47
|
|
|
49
48
|
|
|
50
|
-
# TODO update to use better types
|
|
51
|
-
def add_inner_thoughts_to_functions(
|
|
52
|
-
functions: List[dict],
|
|
53
|
-
inner_thoughts_key: str,
|
|
54
|
-
inner_thoughts_description: str,
|
|
55
|
-
inner_thoughts_required: bool = True,
|
|
56
|
-
# inner_thoughts_to_front: bool = True, TODO support sorting somewhere, probably in the to_dict?
|
|
57
|
-
) -> List[dict]:
|
|
58
|
-
"""Add an inner_thoughts kwarg to every function in the provided list"""
|
|
59
|
-
# return copies
|
|
60
|
-
new_functions = []
|
|
61
|
-
|
|
62
|
-
# functions is a list of dicts in the OpenAI schema (https://platform.openai.com/docs/api-reference/chat/create)
|
|
63
|
-
for function_object in functions:
|
|
64
|
-
function_params = function_object["parameters"]["properties"]
|
|
65
|
-
required_params = list(function_object["parameters"]["required"])
|
|
66
|
-
|
|
67
|
-
# if the inner thoughts arg doesn't exist, add it
|
|
68
|
-
if inner_thoughts_key not in function_params:
|
|
69
|
-
function_params[inner_thoughts_key] = {
|
|
70
|
-
"type": "string",
|
|
71
|
-
"description": inner_thoughts_description,
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
# make sure it's tagged as required
|
|
75
|
-
new_function_object = copy.deepcopy(function_object)
|
|
76
|
-
if inner_thoughts_required and inner_thoughts_key not in required_params:
|
|
77
|
-
required_params.append(inner_thoughts_key)
|
|
78
|
-
new_function_object["parameters"]["required"] = required_params
|
|
79
|
-
|
|
80
|
-
new_functions.append(new_function_object)
|
|
81
|
-
|
|
82
|
-
# return a list of copies
|
|
83
|
-
return new_functions
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def unpack_all_inner_thoughts_from_kwargs(
|
|
87
|
-
response: ChatCompletionResponse,
|
|
88
|
-
inner_thoughts_key: str,
|
|
89
|
-
) -> ChatCompletionResponse:
|
|
90
|
-
"""Strip the inner thoughts out of the tool call and put it in the message content"""
|
|
91
|
-
if len(response.choices) == 0:
|
|
92
|
-
raise ValueError(f"Unpacking inner thoughts from empty response not supported")
|
|
93
|
-
|
|
94
|
-
new_choices = []
|
|
95
|
-
for choice in response.choices:
|
|
96
|
-
new_choices.append(unpack_inner_thoughts_from_kwargs(choice, inner_thoughts_key))
|
|
97
|
-
|
|
98
|
-
# return an updated copy
|
|
99
|
-
new_response = response.model_copy(deep=True)
|
|
100
|
-
new_response.choices = new_choices
|
|
101
|
-
return new_response
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -> Choice:
|
|
105
|
-
message = choice.message
|
|
106
|
-
if message.role == "assistant" and message.tool_calls and len(message.tool_calls) >= 1:
|
|
107
|
-
if len(message.tool_calls) > 1:
|
|
108
|
-
warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(message.tool_calls)}) is not supported")
|
|
109
|
-
# TODO support multiple tool calls
|
|
110
|
-
tool_call = message.tool_calls[0]
|
|
111
|
-
|
|
112
|
-
try:
|
|
113
|
-
# Sadly we need to parse the JSON since args are in string format
|
|
114
|
-
func_args = dict(json.loads(tool_call.function.arguments))
|
|
115
|
-
if inner_thoughts_key in func_args:
|
|
116
|
-
# extract the inner thoughts
|
|
117
|
-
inner_thoughts = func_args.pop(inner_thoughts_key)
|
|
118
|
-
|
|
119
|
-
# replace the kwargs
|
|
120
|
-
new_choice = choice.model_copy(deep=True)
|
|
121
|
-
new_choice.message.tool_calls[0].function.arguments = json_dumps(func_args)
|
|
122
|
-
# also replace the message content
|
|
123
|
-
if new_choice.message.content is not None:
|
|
124
|
-
warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
|
|
125
|
-
new_choice.message.content = inner_thoughts
|
|
126
|
-
|
|
127
|
-
return new_choice
|
|
128
|
-
else:
|
|
129
|
-
warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
|
|
130
|
-
|
|
131
|
-
except json.JSONDecodeError as e:
|
|
132
|
-
warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
|
|
133
|
-
raise e
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
def is_context_overflow_error(exception: requests.exceptions.RequestException) -> bool:
|
|
137
|
-
"""Checks if an exception is due to context overflow (based on common OpenAI response messages)"""
|
|
138
|
-
from letta.utils import printd
|
|
139
|
-
|
|
140
|
-
match_string = OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
|
|
141
|
-
|
|
142
|
-
# Backwards compatibility with openai python package/client v0.28 (pre-v1 client migration)
|
|
143
|
-
if match_string in str(exception):
|
|
144
|
-
printd(f"Found '{match_string}' in str(exception)={(str(exception))}")
|
|
145
|
-
return True
|
|
146
|
-
|
|
147
|
-
# Based on python requests + OpenAI REST API (/v1)
|
|
148
|
-
elif isinstance(exception, requests.exceptions.HTTPError):
|
|
149
|
-
if exception.response is not None and "application/json" in exception.response.headers.get("Content-Type", ""):
|
|
150
|
-
try:
|
|
151
|
-
error_details = exception.response.json()
|
|
152
|
-
if "error" not in error_details:
|
|
153
|
-
printd(f"HTTPError occurred, but couldn't find error field: {error_details}")
|
|
154
|
-
return False
|
|
155
|
-
else:
|
|
156
|
-
error_details = error_details["error"]
|
|
157
|
-
|
|
158
|
-
# Check for the specific error code
|
|
159
|
-
if error_details.get("code") == "context_length_exceeded":
|
|
160
|
-
printd(f"HTTPError occurred, caught error code {error_details.get('code')}")
|
|
161
|
-
return True
|
|
162
|
-
# Soft-check for "maximum context length" inside of the message
|
|
163
|
-
elif error_details.get("message") and "maximum context length" in error_details.get("message"):
|
|
164
|
-
printd(f"HTTPError occurred, found '{match_string}' in error message contents ({error_details})")
|
|
165
|
-
return True
|
|
166
|
-
else:
|
|
167
|
-
printd(f"HTTPError occurred, but unknown error message: {error_details}")
|
|
168
|
-
return False
|
|
169
|
-
except ValueError:
|
|
170
|
-
# JSON decoding failed
|
|
171
|
-
printd(f"HTTPError occurred ({exception}), but no JSON error message.")
|
|
172
|
-
|
|
173
|
-
# Generic fail
|
|
174
|
-
else:
|
|
175
|
-
return False
|
|
176
|
-
|
|
177
|
-
|
|
178
49
|
def retry_with_exponential_backoff(
|
|
179
50
|
func,
|
|
180
51
|
initial_delay: float = 1,
|
|
@@ -248,7 +119,8 @@ def create(
|
|
|
248
119
|
stream_inferface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
|
|
249
120
|
# TODO move to llm_config?
|
|
250
121
|
# if unspecified (None), default to something we've tested
|
|
251
|
-
|
|
122
|
+
inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
|
|
123
|
+
max_tokens: Optional[int] = None,
|
|
252
124
|
model_settings: Optional[dict] = None, # TODO: eventually pass from server
|
|
253
125
|
) -> ChatCompletionResponse:
|
|
254
126
|
"""Return response to chat completion with backoff"""
|
|
@@ -267,59 +139,14 @@ def create(
|
|
|
267
139
|
|
|
268
140
|
# openai
|
|
269
141
|
if llm_config.model_endpoint_type == "openai":
|
|
270
|
-
|
|
271
|
-
if inner_thoughts_in_kwargs == OptionState.DEFAULT:
|
|
272
|
-
# model that are known to not use `content` fields on tool calls
|
|
273
|
-
inner_thoughts_in_kwargs = (
|
|
274
|
-
"gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model
|
|
275
|
-
)
|
|
276
|
-
else:
|
|
277
|
-
inner_thoughts_in_kwargs = True if inner_thoughts_in_kwargs == OptionState.YES else False
|
|
278
|
-
|
|
279
|
-
if not isinstance(inner_thoughts_in_kwargs, bool):
|
|
280
|
-
warnings.warn(f"Bad type detected: {type(inner_thoughts_in_kwargs)}")
|
|
281
|
-
inner_thoughts_in_kwargs = bool(inner_thoughts_in_kwargs)
|
|
282
|
-
if inner_thoughts_in_kwargs:
|
|
283
|
-
functions = add_inner_thoughts_to_functions(
|
|
284
|
-
functions=functions,
|
|
285
|
-
inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
|
286
|
-
inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
|
|
287
|
-
)
|
|
288
|
-
|
|
289
|
-
openai_message_list = [
|
|
290
|
-
cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs)) for m in messages
|
|
291
|
-
]
|
|
292
|
-
|
|
293
|
-
# TODO do the same for Azure?
|
|
294
142
|
if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
|
|
295
143
|
# only is a problem if we are *not* using an openai proxy
|
|
296
144
|
raise ValueError(f"OpenAI key is missing from letta config file")
|
|
297
|
-
if use_tool_naming:
|
|
298
|
-
data = ChatCompletionRequest(
|
|
299
|
-
model=llm_config.model,
|
|
300
|
-
messages=openai_message_list,
|
|
301
|
-
tools=[{"type": "function", "function": f} for f in functions] if functions else None,
|
|
302
|
-
tool_choice=function_call,
|
|
303
|
-
user=str(user_id),
|
|
304
|
-
)
|
|
305
|
-
else:
|
|
306
|
-
data = ChatCompletionRequest(
|
|
307
|
-
model=llm_config.model,
|
|
308
|
-
messages=openai_message_list,
|
|
309
|
-
functions=functions,
|
|
310
|
-
function_call=function_call,
|
|
311
|
-
user=str(user_id),
|
|
312
|
-
)
|
|
313
|
-
# https://platform.openai.com/docs/guides/text-generation/json-mode
|
|
314
|
-
# only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
|
|
315
|
-
if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
|
|
316
|
-
data.response_format = {"type": "json_object"}
|
|
317
145
|
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
data.user = str(uuid.UUID(int=0))
|
|
146
|
+
inner_thoughts_in_kwargs = derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option, model=llm_config.model)
|
|
147
|
+
data = build_openai_chat_completions_request(
|
|
148
|
+
llm_config, messages, user_id, functions, function_call, use_tool_naming, inner_thoughts_in_kwargs, max_tokens
|
|
149
|
+
)
|
|
323
150
|
|
|
324
151
|
if stream: # Client requested token streaming
|
|
325
152
|
data.stream = True
|
|
@@ -356,35 +183,32 @@ def create(
|
|
|
356
183
|
if stream:
|
|
357
184
|
raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
|
|
358
185
|
|
|
359
|
-
|
|
360
|
-
|
|
186
|
+
if model_settings.azure_api_key is None:
|
|
187
|
+
raise ValueError(f"Azure API key is missing. Did you set AZURE_API_KEY in your env?")
|
|
188
|
+
|
|
189
|
+
if model_settings.azure_base_url is None:
|
|
190
|
+
raise ValueError(f"Azure base url is missing. Did you set AZURE_BASE_URL in your env?")
|
|
191
|
+
|
|
192
|
+
# Set the llm config model_endpoint from model_settings
|
|
193
|
+
# For Azure, this model_endpoint is required to be configured via env variable, so users don't need to provide it in the LLM config
|
|
194
|
+
llm_config.model_endpoint = model_settings.azure_base_url
|
|
195
|
+
inner_thoughts_in_kwargs = derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option, llm_config.model)
|
|
196
|
+
chat_completion_request = build_openai_chat_completions_request(
|
|
197
|
+
llm_config, messages, user_id, functions, function_call, use_tool_naming, inner_thoughts_in_kwargs, max_tokens
|
|
361
198
|
)
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
tool_choice=function_call,
|
|
369
|
-
user=str(user_id),
|
|
370
|
-
)
|
|
371
|
-
else:
|
|
372
|
-
data = dict(
|
|
373
|
-
# NOTE: don't pass model to Azure calls, that is the deployment_id
|
|
374
|
-
# model=agent_config.model,
|
|
375
|
-
messages=[m.to_openai_dict() for m in messages],
|
|
376
|
-
functions=functions,
|
|
377
|
-
function_call=function_call,
|
|
378
|
-
user=str(user_id),
|
|
379
|
-
)
|
|
380
|
-
return azure_openai_chat_completions_request(
|
|
381
|
-
resource_name=model_settings.azure_endpoint,
|
|
382
|
-
deployment_id=azure_deployment,
|
|
383
|
-
api_version=model_settings.azure_version,
|
|
384
|
-
api_key=model_settings.azure_key,
|
|
385
|
-
data=data,
|
|
199
|
+
|
|
200
|
+
response = azure_openai_chat_completions_request(
|
|
201
|
+
model_settings=model_settings,
|
|
202
|
+
llm_config=llm_config,
|
|
203
|
+
api_key=model_settings.azure_api_key,
|
|
204
|
+
chat_completion_request=chat_completion_request,
|
|
386
205
|
)
|
|
387
206
|
|
|
207
|
+
if inner_thoughts_in_kwargs:
|
|
208
|
+
response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
|
|
209
|
+
|
|
210
|
+
return response
|
|
211
|
+
|
|
388
212
|
elif llm_config.model_endpoint_type == "google_ai":
|
|
389
213
|
if stream:
|
|
390
214
|
raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
|
|
@@ -517,7 +341,7 @@ def create(
|
|
|
517
341
|
stream_inferface.stream_end()
|
|
518
342
|
|
|
519
343
|
if inner_thoughts_in_kwargs:
|
|
520
|
-
response =
|
|
344
|
+
response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
|
|
521
345
|
|
|
522
346
|
return response
|
|
523
347
|
|
letta/llm_api/openai.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
|
-
|
|
2
|
+
import warnings
|
|
3
|
+
from typing import Generator, List, Optional, Union
|
|
3
4
|
|
|
4
5
|
import httpx
|
|
5
6
|
import requests
|
|
@@ -8,10 +9,19 @@ from httpx_sse._exceptions import SSEError
|
|
|
8
9
|
|
|
9
10
|
from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
|
|
10
11
|
from letta.errors import LLMError
|
|
12
|
+
from letta.llm_api.helpers import add_inner_thoughts_to_functions
|
|
13
|
+
from letta.local_llm.constants import (
|
|
14
|
+
INNER_THOUGHTS_KWARG,
|
|
15
|
+
INNER_THOUGHTS_KWARG_DESCRIPTION,
|
|
16
|
+
)
|
|
11
17
|
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
|
18
|
+
from letta.schemas.llm_config import LLMConfig
|
|
12
19
|
from letta.schemas.message import Message as _Message
|
|
13
20
|
from letta.schemas.message import MessageRole as _MessageRole
|
|
14
|
-
from letta.schemas.openai.chat_completion_request import
|
|
21
|
+
from letta.schemas.openai.chat_completion_request import (
|
|
22
|
+
ChatCompletionRequest,
|
|
23
|
+
cast_message_to_subtype,
|
|
24
|
+
)
|
|
15
25
|
from letta.schemas.openai.chat_completion_response import (
|
|
16
26
|
ChatCompletionChunkResponse,
|
|
17
27
|
ChatCompletionResponse,
|
|
@@ -81,6 +91,64 @@ def openai_get_model_list(url: str, api_key: Union[str, None], fix_url: Optional
|
|
|
81
91
|
raise e
|
|
82
92
|
|
|
83
93
|
|
|
94
|
+
def build_openai_chat_completions_request(
|
|
95
|
+
llm_config: LLMConfig,
|
|
96
|
+
messages: List[Message],
|
|
97
|
+
user_id: Optional[str],
|
|
98
|
+
functions: Optional[list],
|
|
99
|
+
function_call: str,
|
|
100
|
+
use_tool_naming: bool,
|
|
101
|
+
inner_thoughts_in_kwargs: bool,
|
|
102
|
+
max_tokens: Optional[int],
|
|
103
|
+
) -> ChatCompletionRequest:
|
|
104
|
+
if inner_thoughts_in_kwargs:
|
|
105
|
+
functions = add_inner_thoughts_to_functions(
|
|
106
|
+
functions=functions,
|
|
107
|
+
inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
|
108
|
+
inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
openai_message_list = [
|
|
112
|
+
cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs)) for m in messages
|
|
113
|
+
]
|
|
114
|
+
if llm_config.model:
|
|
115
|
+
model = llm_config.model
|
|
116
|
+
else:
|
|
117
|
+
warnings.warn(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
|
|
118
|
+
model = None
|
|
119
|
+
|
|
120
|
+
if use_tool_naming:
|
|
121
|
+
data = ChatCompletionRequest(
|
|
122
|
+
model=model,
|
|
123
|
+
messages=openai_message_list,
|
|
124
|
+
tools=[{"type": "function", "function": f} for f in functions] if functions else None,
|
|
125
|
+
tool_choice=function_call,
|
|
126
|
+
user=str(user_id),
|
|
127
|
+
max_tokens=max_tokens,
|
|
128
|
+
)
|
|
129
|
+
else:
|
|
130
|
+
data = ChatCompletionRequest(
|
|
131
|
+
model=model,
|
|
132
|
+
messages=openai_message_list,
|
|
133
|
+
functions=functions,
|
|
134
|
+
function_call=function_call,
|
|
135
|
+
user=str(user_id),
|
|
136
|
+
max_tokens=max_tokens,
|
|
137
|
+
)
|
|
138
|
+
# https://platform.openai.com/docs/guides/text-generation/json-mode
|
|
139
|
+
# only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
|
|
140
|
+
if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
|
|
141
|
+
data.response_format = {"type": "json_object"}
|
|
142
|
+
|
|
143
|
+
if "inference.memgpt.ai" in llm_config.model_endpoint:
|
|
144
|
+
# override user id for inference.memgpt.ai
|
|
145
|
+
import uuid
|
|
146
|
+
|
|
147
|
+
data.user = str(uuid.UUID(int=0))
|
|
148
|
+
|
|
149
|
+
return data
|
|
150
|
+
|
|
151
|
+
|
|
84
152
|
def openai_chat_completions_process_stream(
|
|
85
153
|
url: str,
|
|
86
154
|
api_key: str,
|
letta/providers.py
CHANGED
|
@@ -220,7 +220,11 @@ class GoogleAIProvider(Provider):
|
|
|
220
220
|
|
|
221
221
|
|
|
222
222
|
class AzureProvider(Provider):
|
|
223
|
-
|
|
223
|
+
name: str = "azure"
|
|
224
|
+
base_url: str = Field(
|
|
225
|
+
..., description="Base URL for the Azure API endpoint. This should be specific to your org, e.g. `https://letta.openai.azure.com`."
|
|
226
|
+
)
|
|
227
|
+
api_key: str = Field(..., description="API key for the Azure API.")
|
|
224
228
|
|
|
225
229
|
|
|
226
230
|
class VLLMProvider(OpenAIProvider):
|
letta/schemas/llm_config.py
CHANGED
|
@@ -11,7 +11,7 @@ class LLMConfig(BaseModel):
|
|
|
11
11
|
model (str): The name of the LLM model.
|
|
12
12
|
model_endpoint_type (str): The endpoint type for the model.
|
|
13
13
|
model_endpoint (str): The endpoint for the model.
|
|
14
|
-
model_wrapper (str): The wrapper for the model.
|
|
14
|
+
model_wrapper (str): The wrapper for the model. This is used to wrap additional text around the input/output of the model. This is useful for text-to-text completions, such as the Completions API in OpenAI.
|
|
15
15
|
context_window (int): The context window size for the model.
|
|
16
16
|
"""
|
|
17
17
|
|
|
@@ -34,7 +34,10 @@ class LLMConfig(BaseModel):
|
|
|
34
34
|
"vllm",
|
|
35
35
|
"hugging-face",
|
|
36
36
|
] = Field(..., description="The endpoint type for the model.")
|
|
37
|
-
model_endpoint: str = Field(
|
|
37
|
+
model_endpoint: Optional[str] = Field(None, description="The endpoint for the model.")
|
|
38
|
+
api_version: Optional[str] = Field(
|
|
39
|
+
None, description="The version for the model API. Used by the Azure provider backend, e.g. 2023-03-15-preview."
|
|
40
|
+
)
|
|
38
41
|
model_wrapper: Optional[str] = Field(None, description="The wrapper for the model.")
|
|
39
42
|
context_window: int = Field(..., description="The context window size for the model.")
|
|
40
43
|
|
|
@@ -26,7 +26,6 @@ class CreateToolResponse(BaseModel):
|
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
def setup_tools_index_router(server: SyncServer, interface: QueuingInterface):
|
|
29
|
-
# get_current_user_with_server = partial(partial(get_current_user, server), password)
|
|
30
29
|
|
|
31
30
|
@router.delete("/tools/{tool_name}", tags=["tools"])
|
|
32
31
|
async def delete_tool(
|
letta/server/rest_api/app.py
CHANGED
|
@@ -5,8 +5,7 @@ from pathlib import Path
|
|
|
5
5
|
from typing import Optional
|
|
6
6
|
|
|
7
7
|
import uvicorn
|
|
8
|
-
from fastapi import FastAPI
|
|
9
|
-
from fastapi.responses import JSONResponse
|
|
8
|
+
from fastapi import FastAPI
|
|
10
9
|
from starlette.middleware.cors import CORSMiddleware
|
|
11
10
|
|
|
12
11
|
from letta.server.constants import REST_DEFAULT_PORT
|
|
@@ -84,21 +83,6 @@ def create_application() -> "FastAPI":
|
|
|
84
83
|
allow_headers=["*"],
|
|
85
84
|
)
|
|
86
85
|
|
|
87
|
-
@app.middleware("http")
|
|
88
|
-
async def set_current_user_middleware(request: Request, call_next):
|
|
89
|
-
user_id = request.headers.get("user_id")
|
|
90
|
-
if user_id:
|
|
91
|
-
try:
|
|
92
|
-
server.set_current_user(user_id)
|
|
93
|
-
except ValueError as e:
|
|
94
|
-
# Return an HTTP 401 Unauthorized response
|
|
95
|
-
# raise HTTPException(status_code=401, detail=str(e))
|
|
96
|
-
return JSONResponse(status_code=401, content={"detail": str(e)})
|
|
97
|
-
else:
|
|
98
|
-
server.set_current_user(None)
|
|
99
|
-
response = await call_next(request)
|
|
100
|
-
return response
|
|
101
|
-
|
|
102
86
|
for route in v1_routes:
|
|
103
87
|
app.include_router(route, prefix=API_PREFIX)
|
|
104
88
|
# this gives undocumented routes for "latest" and bare api calls.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import uuid
|
|
2
2
|
from typing import TYPE_CHECKING, List
|
|
3
3
|
|
|
4
|
-
from fastapi import APIRouter, Body, Depends, HTTPException, Path, Query
|
|
4
|
+
from fastapi import APIRouter, Body, Depends, Header, HTTPException, Path, Query
|
|
5
5
|
|
|
6
6
|
from letta.constants import DEFAULT_PRESET
|
|
7
7
|
from letta.schemas.agent import CreateAgent
|
|
@@ -43,11 +43,12 @@ router = APIRouter(prefix="/v1/threads", tags=["threads"])
|
|
|
43
43
|
def create_thread(
|
|
44
44
|
request: CreateThreadRequest = Body(...),
|
|
45
45
|
server: SyncServer = Depends(get_letta_server),
|
|
46
|
+
user_id: str = Header(None), # Extract user_id from header, default to None if not present
|
|
46
47
|
):
|
|
47
48
|
# TODO: use requests.description and requests.metadata fields
|
|
48
49
|
# TODO: handle requests.file_ids and requests.tools
|
|
49
50
|
# TODO: eventually allow request to override embedding/llm model
|
|
50
|
-
actor = server.
|
|
51
|
+
actor = server.get_user_or_default(user_id=user_id)
|
|
51
52
|
|
|
52
53
|
print("Create thread/agent", request)
|
|
53
54
|
# create a letta agent
|
|
@@ -67,8 +68,9 @@ def create_thread(
|
|
|
67
68
|
def retrieve_thread(
|
|
68
69
|
thread_id: str = Path(..., description="The unique identifier of the thread."),
|
|
69
70
|
server: SyncServer = Depends(get_letta_server),
|
|
71
|
+
user_id: str = Header(None), # Extract user_id from header, default to None if not present
|
|
70
72
|
):
|
|
71
|
-
actor = server.
|
|
73
|
+
actor = server.get_user_or_default(user_id=user_id)
|
|
72
74
|
agent = server.get_agent(user_id=actor.id, agent_id=thread_id)
|
|
73
75
|
assert agent is not None
|
|
74
76
|
return OpenAIThread(
|
|
@@ -100,8 +102,9 @@ def create_message(
|
|
|
100
102
|
thread_id: str = Path(..., description="The unique identifier of the thread."),
|
|
101
103
|
request: CreateMessageRequest = Body(...),
|
|
102
104
|
server: SyncServer = Depends(get_letta_server),
|
|
105
|
+
user_id: str = Header(None), # Extract user_id from header, default to None if not present
|
|
103
106
|
):
|
|
104
|
-
actor = server.
|
|
107
|
+
actor = server.get_user_or_default(user_id=user_id)
|
|
105
108
|
agent_id = thread_id
|
|
106
109
|
# create message object
|
|
107
110
|
message = Message(
|
|
@@ -143,8 +146,9 @@ def list_messages(
|
|
|
143
146
|
after: str = Query(None, description="A cursor for use in pagination. `after` is an object ID that defines your place in the list."),
|
|
144
147
|
before: str = Query(None, description="A cursor for use in pagination. `after` is an object ID that defines your place in the list."),
|
|
145
148
|
server: SyncServer = Depends(get_letta_server),
|
|
149
|
+
user_id: str = Header(None), # Extract user_id from header, default to None if not present
|
|
146
150
|
):
|
|
147
|
-
actor = server.
|
|
151
|
+
actor = server.get_user_or_default(user_id)
|
|
148
152
|
after_uuid = after if before else None
|
|
149
153
|
before_uuid = before if before else None
|
|
150
154
|
agent_id = thread_id
|
|
@@ -239,7 +243,6 @@ def create_run(
|
|
|
239
243
|
request: CreateRunRequest = Body(...),
|
|
240
244
|
server: SyncServer = Depends(get_letta_server),
|
|
241
245
|
):
|
|
242
|
-
server.get_current_user()
|
|
243
246
|
|
|
244
247
|
# TODO: add request.instructions as a message?
|
|
245
248
|
agent_id = thread_id
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from typing import TYPE_CHECKING
|
|
3
3
|
|
|
4
|
-
from fastapi import APIRouter, Body, Depends, HTTPException
|
|
4
|
+
from fastapi import APIRouter, Body, Depends, Header, HTTPException
|
|
5
5
|
|
|
6
6
|
from letta.schemas.enums import MessageRole
|
|
7
7
|
from letta.schemas.letta_message import FunctionCall, LettaMessage
|
|
@@ -30,12 +30,14 @@ router = APIRouter(prefix="/v1/chat/completions", tags=["chat_completions"])
|
|
|
30
30
|
async def create_chat_completion(
|
|
31
31
|
completion_request: ChatCompletionRequest = Body(...),
|
|
32
32
|
server: "SyncServer" = Depends(get_letta_server),
|
|
33
|
+
user_id: str = Header(None), # Extract user_id from header, default to None if not present
|
|
33
34
|
):
|
|
34
35
|
"""Send a message to a Letta agent via a /chat/completions completion_request
|
|
35
36
|
The bearer token will be used to identify the user.
|
|
36
37
|
The 'user' field in the completion_request should be set to the agent ID.
|
|
37
38
|
"""
|
|
38
|
-
actor = server.
|
|
39
|
+
actor = server.get_user_or_default(user_id=user_id)
|
|
40
|
+
|
|
39
41
|
agent_id = completion_request.user
|
|
40
42
|
if agent_id is None:
|
|
41
43
|
raise HTTPException(status_code=400, detail="Must pass agent_id in the 'user' field")
|