letta-nightly 0.4.1.dev20241007104134__py3-none-any.whl → 0.4.1.dev20241009104130__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/agent.py +36 -10
- letta/client/client.py +8 -1
- letta/credentials.py +3 -3
- letta/errors.py +1 -1
- letta/functions/schema_generator.py +1 -1
- letta/llm_api/anthropic.py +3 -24
- letta/llm_api/azure_openai.py +53 -108
- letta/llm_api/azure_openai_constants.py +10 -0
- letta/llm_api/google_ai.py +39 -64
- letta/llm_api/helpers.py +208 -0
- letta/llm_api/llm_api_tools.py +43 -218
- letta/llm_api/openai.py +74 -50
- letta/main.py +1 -1
- letta/metadata.py +2 -0
- letta/providers.py +144 -31
- letta/schemas/agent.py +14 -0
- letta/schemas/llm_config.py +2 -2
- letta/schemas/openai/chat_completion_response.py +3 -0
- letta/schemas/tool.py +3 -3
- letta/server/rest_api/admin/tools.py +0 -1
- letta/server/rest_api/app.py +1 -17
- letta/server/rest_api/routers/openai/assistants/threads.py +10 -7
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +5 -3
- letta/server/rest_api/routers/v1/agents.py +23 -13
- letta/server/rest_api/routers/v1/blocks.py +5 -3
- letta/server/rest_api/routers/v1/jobs.py +5 -3
- letta/server/rest_api/routers/v1/sources.py +25 -13
- letta/server/rest_api/routers/v1/tools.py +12 -7
- letta/server/server.py +33 -37
- letta/settings.py +5 -113
- {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/METADATA +1 -1
- {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/RECORD +35 -33
- {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/LICENSE +0 -0
- {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/WHEEL +0 -0
- {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/entry_points.txt +0 -0
letta/llm_api/llm_api_tools.py
CHANGED
|
@@ -1,25 +1,25 @@
|
|
|
1
|
-
import copy
|
|
2
|
-
import json
|
|
3
1
|
import os
|
|
4
2
|
import random
|
|
5
3
|
import time
|
|
6
|
-
import warnings
|
|
7
4
|
from typing import List, Optional, Union
|
|
8
5
|
|
|
9
6
|
import requests
|
|
10
7
|
|
|
11
|
-
from letta.constants import CLI_WARNING_PREFIX
|
|
8
|
+
from letta.constants import CLI_WARNING_PREFIX
|
|
12
9
|
from letta.llm_api.anthropic import anthropic_chat_completions_request
|
|
13
|
-
from letta.llm_api.azure_openai import
|
|
14
|
-
MODEL_TO_AZURE_ENGINE,
|
|
15
|
-
azure_openai_chat_completions_request,
|
|
16
|
-
)
|
|
10
|
+
from letta.llm_api.azure_openai import azure_openai_chat_completions_request
|
|
17
11
|
from letta.llm_api.cohere import cohere_chat_completions_request
|
|
18
12
|
from letta.llm_api.google_ai import (
|
|
19
13
|
convert_tools_to_google_ai_format,
|
|
20
14
|
google_ai_chat_completions_request,
|
|
21
15
|
)
|
|
16
|
+
from letta.llm_api.helpers import (
|
|
17
|
+
add_inner_thoughts_to_functions,
|
|
18
|
+
derive_inner_thoughts_in_kwargs,
|
|
19
|
+
unpack_all_inner_thoughts_from_kwargs,
|
|
20
|
+
)
|
|
22
21
|
from letta.llm_api.openai import (
|
|
22
|
+
build_openai_chat_completions_request,
|
|
23
23
|
openai_chat_completions_process_stream,
|
|
24
24
|
openai_chat_completions_request,
|
|
25
25
|
)
|
|
@@ -28,7 +28,6 @@ from letta.local_llm.constants import (
|
|
|
28
28
|
INNER_THOUGHTS_KWARG,
|
|
29
29
|
INNER_THOUGHTS_KWARG_DESCRIPTION,
|
|
30
30
|
)
|
|
31
|
-
from letta.providers import GoogleAIProvider
|
|
32
31
|
from letta.schemas.enums import OptionState
|
|
33
32
|
from letta.schemas.llm_config import LLMConfig
|
|
34
33
|
from letta.schemas.message import Message
|
|
@@ -37,144 +36,15 @@ from letta.schemas.openai.chat_completion_request import (
|
|
|
37
36
|
Tool,
|
|
38
37
|
cast_message_to_subtype,
|
|
39
38
|
)
|
|
40
|
-
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
|
39
|
+
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
|
41
40
|
from letta.streaming_interface import (
|
|
42
41
|
AgentChunkStreamingInterface,
|
|
43
42
|
AgentRefreshStreamingInterface,
|
|
44
43
|
)
|
|
45
|
-
from letta.utils import json_dumps
|
|
46
44
|
|
|
47
45
|
LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq"]
|
|
48
46
|
|
|
49
47
|
|
|
50
|
-
# TODO update to use better types
|
|
51
|
-
def add_inner_thoughts_to_functions(
|
|
52
|
-
functions: List[dict],
|
|
53
|
-
inner_thoughts_key: str,
|
|
54
|
-
inner_thoughts_description: str,
|
|
55
|
-
inner_thoughts_required: bool = True,
|
|
56
|
-
# inner_thoughts_to_front: bool = True, TODO support sorting somewhere, probably in the to_dict?
|
|
57
|
-
) -> List[dict]:
|
|
58
|
-
"""Add an inner_thoughts kwarg to every function in the provided list"""
|
|
59
|
-
# return copies
|
|
60
|
-
new_functions = []
|
|
61
|
-
|
|
62
|
-
# functions is a list of dicts in the OpenAI schema (https://platform.openai.com/docs/api-reference/chat/create)
|
|
63
|
-
for function_object in functions:
|
|
64
|
-
function_params = function_object["parameters"]["properties"]
|
|
65
|
-
required_params = list(function_object["parameters"]["required"])
|
|
66
|
-
|
|
67
|
-
# if the inner thoughts arg doesn't exist, add it
|
|
68
|
-
if inner_thoughts_key not in function_params:
|
|
69
|
-
function_params[inner_thoughts_key] = {
|
|
70
|
-
"type": "string",
|
|
71
|
-
"description": inner_thoughts_description,
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
# make sure it's tagged as required
|
|
75
|
-
new_function_object = copy.deepcopy(function_object)
|
|
76
|
-
if inner_thoughts_required and inner_thoughts_key not in required_params:
|
|
77
|
-
required_params.append(inner_thoughts_key)
|
|
78
|
-
new_function_object["parameters"]["required"] = required_params
|
|
79
|
-
|
|
80
|
-
new_functions.append(new_function_object)
|
|
81
|
-
|
|
82
|
-
# return a list of copies
|
|
83
|
-
return new_functions
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
def unpack_all_inner_thoughts_from_kwargs(
|
|
87
|
-
response: ChatCompletionResponse,
|
|
88
|
-
inner_thoughts_key: str,
|
|
89
|
-
) -> ChatCompletionResponse:
|
|
90
|
-
"""Strip the inner thoughts out of the tool call and put it in the message content"""
|
|
91
|
-
if len(response.choices) == 0:
|
|
92
|
-
raise ValueError(f"Unpacking inner thoughts from empty response not supported")
|
|
93
|
-
|
|
94
|
-
new_choices = []
|
|
95
|
-
for choice in response.choices:
|
|
96
|
-
new_choices.append(unpack_inner_thoughts_from_kwargs(choice, inner_thoughts_key))
|
|
97
|
-
|
|
98
|
-
# return an updated copy
|
|
99
|
-
new_response = response.model_copy(deep=True)
|
|
100
|
-
new_response.choices = new_choices
|
|
101
|
-
return new_response
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -> Choice:
|
|
105
|
-
message = choice.message
|
|
106
|
-
if message.role == "assistant" and message.tool_calls and len(message.tool_calls) >= 1:
|
|
107
|
-
if len(message.tool_calls) > 1:
|
|
108
|
-
warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(message.tool_calls)}) is not supported")
|
|
109
|
-
# TODO support multiple tool calls
|
|
110
|
-
tool_call = message.tool_calls[0]
|
|
111
|
-
|
|
112
|
-
try:
|
|
113
|
-
# Sadly we need to parse the JSON since args are in string format
|
|
114
|
-
func_args = dict(json.loads(tool_call.function.arguments))
|
|
115
|
-
if inner_thoughts_key in func_args:
|
|
116
|
-
# extract the inner thoughts
|
|
117
|
-
inner_thoughts = func_args.pop(inner_thoughts_key)
|
|
118
|
-
|
|
119
|
-
# replace the kwargs
|
|
120
|
-
new_choice = choice.model_copy(deep=True)
|
|
121
|
-
new_choice.message.tool_calls[0].function.arguments = json_dumps(func_args)
|
|
122
|
-
# also replace the message content
|
|
123
|
-
if new_choice.message.content is not None:
|
|
124
|
-
warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
|
|
125
|
-
new_choice.message.content = inner_thoughts
|
|
126
|
-
|
|
127
|
-
return new_choice
|
|
128
|
-
else:
|
|
129
|
-
warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
|
|
130
|
-
|
|
131
|
-
except json.JSONDecodeError as e:
|
|
132
|
-
warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
|
|
133
|
-
raise e
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
def is_context_overflow_error(exception: requests.exceptions.RequestException) -> bool:
|
|
137
|
-
"""Checks if an exception is due to context overflow (based on common OpenAI response messages)"""
|
|
138
|
-
from letta.utils import printd
|
|
139
|
-
|
|
140
|
-
match_string = OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
|
|
141
|
-
|
|
142
|
-
# Backwards compatibility with openai python package/client v0.28 (pre-v1 client migration)
|
|
143
|
-
if match_string in str(exception):
|
|
144
|
-
printd(f"Found '{match_string}' in str(exception)={(str(exception))}")
|
|
145
|
-
return True
|
|
146
|
-
|
|
147
|
-
# Based on python requests + OpenAI REST API (/v1)
|
|
148
|
-
elif isinstance(exception, requests.exceptions.HTTPError):
|
|
149
|
-
if exception.response is not None and "application/json" in exception.response.headers.get("Content-Type", ""):
|
|
150
|
-
try:
|
|
151
|
-
error_details = exception.response.json()
|
|
152
|
-
if "error" not in error_details:
|
|
153
|
-
printd(f"HTTPError occurred, but couldn't find error field: {error_details}")
|
|
154
|
-
return False
|
|
155
|
-
else:
|
|
156
|
-
error_details = error_details["error"]
|
|
157
|
-
|
|
158
|
-
# Check for the specific error code
|
|
159
|
-
if error_details.get("code") == "context_length_exceeded":
|
|
160
|
-
printd(f"HTTPError occurred, caught error code {error_details.get('code')}")
|
|
161
|
-
return True
|
|
162
|
-
# Soft-check for "maximum context length" inside of the message
|
|
163
|
-
elif error_details.get("message") and "maximum context length" in error_details.get("message"):
|
|
164
|
-
printd(f"HTTPError occurred, found '{match_string}' in error message contents ({error_details})")
|
|
165
|
-
return True
|
|
166
|
-
else:
|
|
167
|
-
printd(f"HTTPError occurred, but unknown error message: {error_details}")
|
|
168
|
-
return False
|
|
169
|
-
except ValueError:
|
|
170
|
-
# JSON decoding failed
|
|
171
|
-
printd(f"HTTPError occurred ({exception}), but no JSON error message.")
|
|
172
|
-
|
|
173
|
-
# Generic fail
|
|
174
|
-
else:
|
|
175
|
-
return False
|
|
176
|
-
|
|
177
|
-
|
|
178
48
|
def retry_with_exponential_backoff(
|
|
179
49
|
func,
|
|
180
50
|
initial_delay: float = 1,
|
|
@@ -248,7 +118,8 @@ def create(
|
|
|
248
118
|
stream_inferface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
|
|
249
119
|
# TODO move to llm_config?
|
|
250
120
|
# if unspecified (None), default to something we've tested
|
|
251
|
-
|
|
121
|
+
inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
|
|
122
|
+
max_tokens: Optional[int] = None,
|
|
252
123
|
model_settings: Optional[dict] = None, # TODO: eventually pass from server
|
|
253
124
|
) -> ChatCompletionResponse:
|
|
254
125
|
"""Return response to chat completion with backoff"""
|
|
@@ -267,59 +138,14 @@ def create(
|
|
|
267
138
|
|
|
268
139
|
# openai
|
|
269
140
|
if llm_config.model_endpoint_type == "openai":
|
|
270
|
-
|
|
271
|
-
if inner_thoughts_in_kwargs == OptionState.DEFAULT:
|
|
272
|
-
# model that are known to not use `content` fields on tool calls
|
|
273
|
-
inner_thoughts_in_kwargs = (
|
|
274
|
-
"gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model
|
|
275
|
-
)
|
|
276
|
-
else:
|
|
277
|
-
inner_thoughts_in_kwargs = True if inner_thoughts_in_kwargs == OptionState.YES else False
|
|
278
|
-
|
|
279
|
-
if not isinstance(inner_thoughts_in_kwargs, bool):
|
|
280
|
-
warnings.warn(f"Bad type detected: {type(inner_thoughts_in_kwargs)}")
|
|
281
|
-
inner_thoughts_in_kwargs = bool(inner_thoughts_in_kwargs)
|
|
282
|
-
if inner_thoughts_in_kwargs:
|
|
283
|
-
functions = add_inner_thoughts_to_functions(
|
|
284
|
-
functions=functions,
|
|
285
|
-
inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
|
286
|
-
inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
|
|
287
|
-
)
|
|
288
|
-
|
|
289
|
-
openai_message_list = [
|
|
290
|
-
cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs)) for m in messages
|
|
291
|
-
]
|
|
292
|
-
|
|
293
|
-
# TODO do the same for Azure?
|
|
294
141
|
if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
|
|
295
142
|
# only is a problem if we are *not* using an openai proxy
|
|
296
143
|
raise ValueError(f"OpenAI key is missing from letta config file")
|
|
297
|
-
if use_tool_naming:
|
|
298
|
-
data = ChatCompletionRequest(
|
|
299
|
-
model=llm_config.model,
|
|
300
|
-
messages=openai_message_list,
|
|
301
|
-
tools=[{"type": "function", "function": f} for f in functions] if functions else None,
|
|
302
|
-
tool_choice=function_call,
|
|
303
|
-
user=str(user_id),
|
|
304
|
-
)
|
|
305
|
-
else:
|
|
306
|
-
data = ChatCompletionRequest(
|
|
307
|
-
model=llm_config.model,
|
|
308
|
-
messages=openai_message_list,
|
|
309
|
-
functions=functions,
|
|
310
|
-
function_call=function_call,
|
|
311
|
-
user=str(user_id),
|
|
312
|
-
)
|
|
313
|
-
# https://platform.openai.com/docs/guides/text-generation/json-mode
|
|
314
|
-
# only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
|
|
315
|
-
if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
|
|
316
|
-
data.response_format = {"type": "json_object"}
|
|
317
144
|
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
data.user = str(uuid.UUID(int=0))
|
|
145
|
+
inner_thoughts_in_kwargs = derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option, model=llm_config.model)
|
|
146
|
+
data = build_openai_chat_completions_request(
|
|
147
|
+
llm_config, messages, user_id, functions, function_call, use_tool_naming, inner_thoughts_in_kwargs, max_tokens
|
|
148
|
+
)
|
|
323
149
|
|
|
324
150
|
if stream: # Client requested token streaming
|
|
325
151
|
data.stream = True
|
|
@@ -356,35 +182,35 @@ def create(
|
|
|
356
182
|
if stream:
|
|
357
183
|
raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
|
|
358
184
|
|
|
359
|
-
|
|
360
|
-
|
|
185
|
+
if model_settings.azure_api_key is None:
|
|
186
|
+
raise ValueError(f"Azure API key is missing. Did you set AZURE_API_KEY in your env?")
|
|
187
|
+
|
|
188
|
+
if model_settings.azure_base_url is None:
|
|
189
|
+
raise ValueError(f"Azure base url is missing. Did you set AZURE_BASE_URL in your env?")
|
|
190
|
+
|
|
191
|
+
if model_settings.azure_api_version is None:
|
|
192
|
+
raise ValueError(f"Azure API version is missing. Did you set AZURE_API_VERSION in your env?")
|
|
193
|
+
|
|
194
|
+
# Set the llm config model_endpoint from model_settings
|
|
195
|
+
# For Azure, this model_endpoint is required to be configured via env variable, so users don't need to provide it in the LLM config
|
|
196
|
+
llm_config.model_endpoint = model_settings.azure_base_url
|
|
197
|
+
inner_thoughts_in_kwargs = derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option, llm_config.model)
|
|
198
|
+
chat_completion_request = build_openai_chat_completions_request(
|
|
199
|
+
llm_config, messages, user_id, functions, function_call, use_tool_naming, inner_thoughts_in_kwargs, max_tokens
|
|
361
200
|
)
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
tool_choice=function_call,
|
|
369
|
-
user=str(user_id),
|
|
370
|
-
)
|
|
371
|
-
else:
|
|
372
|
-
data = dict(
|
|
373
|
-
# NOTE: don't pass model to Azure calls, that is the deployment_id
|
|
374
|
-
# model=agent_config.model,
|
|
375
|
-
messages=[m.to_openai_dict() for m in messages],
|
|
376
|
-
functions=functions,
|
|
377
|
-
function_call=function_call,
|
|
378
|
-
user=str(user_id),
|
|
379
|
-
)
|
|
380
|
-
return azure_openai_chat_completions_request(
|
|
381
|
-
resource_name=model_settings.azure_endpoint,
|
|
382
|
-
deployment_id=azure_deployment,
|
|
383
|
-
api_version=model_settings.azure_version,
|
|
384
|
-
api_key=model_settings.azure_key,
|
|
385
|
-
data=data,
|
|
201
|
+
|
|
202
|
+
response = azure_openai_chat_completions_request(
|
|
203
|
+
model_settings=model_settings,
|
|
204
|
+
llm_config=llm_config,
|
|
205
|
+
api_key=model_settings.azure_api_key,
|
|
206
|
+
chat_completion_request=chat_completion_request,
|
|
386
207
|
)
|
|
387
208
|
|
|
209
|
+
if inner_thoughts_in_kwargs:
|
|
210
|
+
response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
|
|
211
|
+
|
|
212
|
+
return response
|
|
213
|
+
|
|
388
214
|
elif llm_config.model_endpoint_type == "google_ai":
|
|
389
215
|
if stream:
|
|
390
216
|
raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
|
|
@@ -404,7 +230,7 @@ def create(
|
|
|
404
230
|
|
|
405
231
|
return google_ai_chat_completions_request(
|
|
406
232
|
inner_thoughts_in_kwargs=google_ai_inner_thoughts_in_kwarg,
|
|
407
|
-
|
|
233
|
+
base_url=llm_config.model_endpoint,
|
|
408
234
|
model=llm_config.model,
|
|
409
235
|
api_key=model_settings.gemini_api_key,
|
|
410
236
|
# see structure of payload here: https://ai.google.dev/docs/function_calling
|
|
@@ -472,7 +298,6 @@ def create(
|
|
|
472
298
|
raise NotImplementedError(f"Streaming not yet implemented for Groq.")
|
|
473
299
|
|
|
474
300
|
if model_settings.groq_api_key is None and llm_config.model_endpoint == "https://api.groq.com/openai/v1/chat/completions":
|
|
475
|
-
# only is a problem if we are *not* using an openai proxy
|
|
476
301
|
raise ValueError(f"Groq key is missing from letta config file")
|
|
477
302
|
|
|
478
303
|
# force to true for groq, since they don't support 'content' is non-null
|
|
@@ -517,7 +342,7 @@ def create(
|
|
|
517
342
|
stream_inferface.stream_end()
|
|
518
343
|
|
|
519
344
|
if inner_thoughts_in_kwargs:
|
|
520
|
-
response =
|
|
345
|
+
response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
|
|
521
346
|
|
|
522
347
|
return response
|
|
523
348
|
|
letta/llm_api/openai.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import json
|
|
2
|
-
|
|
2
|
+
import warnings
|
|
3
|
+
from typing import Generator, List, Optional, Union
|
|
3
4
|
|
|
4
5
|
import httpx
|
|
5
6
|
import requests
|
|
@@ -8,10 +9,19 @@ from httpx_sse._exceptions import SSEError
|
|
|
8
9
|
|
|
9
10
|
from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
|
|
10
11
|
from letta.errors import LLMError
|
|
12
|
+
from letta.llm_api.helpers import add_inner_thoughts_to_functions, make_post_request
|
|
13
|
+
from letta.local_llm.constants import (
|
|
14
|
+
INNER_THOUGHTS_KWARG,
|
|
15
|
+
INNER_THOUGHTS_KWARG_DESCRIPTION,
|
|
16
|
+
)
|
|
11
17
|
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
|
18
|
+
from letta.schemas.llm_config import LLMConfig
|
|
12
19
|
from letta.schemas.message import Message as _Message
|
|
13
20
|
from letta.schemas.message import MessageRole as _MessageRole
|
|
14
|
-
from letta.schemas.openai.chat_completion_request import
|
|
21
|
+
from letta.schemas.openai.chat_completion_request import (
|
|
22
|
+
ChatCompletionRequest,
|
|
23
|
+
cast_message_to_subtype,
|
|
24
|
+
)
|
|
15
25
|
from letta.schemas.openai.chat_completion_response import (
|
|
16
26
|
ChatCompletionChunkResponse,
|
|
17
27
|
ChatCompletionResponse,
|
|
@@ -81,6 +91,64 @@ def openai_get_model_list(url: str, api_key: Union[str, None], fix_url: Optional
|
|
|
81
91
|
raise e
|
|
82
92
|
|
|
83
93
|
|
|
94
|
+
def build_openai_chat_completions_request(
|
|
95
|
+
llm_config: LLMConfig,
|
|
96
|
+
messages: List[Message],
|
|
97
|
+
user_id: Optional[str],
|
|
98
|
+
functions: Optional[list],
|
|
99
|
+
function_call: str,
|
|
100
|
+
use_tool_naming: bool,
|
|
101
|
+
inner_thoughts_in_kwargs: bool,
|
|
102
|
+
max_tokens: Optional[int],
|
|
103
|
+
) -> ChatCompletionRequest:
|
|
104
|
+
if inner_thoughts_in_kwargs:
|
|
105
|
+
functions = add_inner_thoughts_to_functions(
|
|
106
|
+
functions=functions,
|
|
107
|
+
inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
|
108
|
+
inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
openai_message_list = [
|
|
112
|
+
cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=inner_thoughts_in_kwargs)) for m in messages
|
|
113
|
+
]
|
|
114
|
+
if llm_config.model:
|
|
115
|
+
model = llm_config.model
|
|
116
|
+
else:
|
|
117
|
+
warnings.warn(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
|
|
118
|
+
model = None
|
|
119
|
+
|
|
120
|
+
if use_tool_naming:
|
|
121
|
+
data = ChatCompletionRequest(
|
|
122
|
+
model=model,
|
|
123
|
+
messages=openai_message_list,
|
|
124
|
+
tools=[{"type": "function", "function": f} for f in functions] if functions else None,
|
|
125
|
+
tool_choice=function_call,
|
|
126
|
+
user=str(user_id),
|
|
127
|
+
max_tokens=max_tokens,
|
|
128
|
+
)
|
|
129
|
+
else:
|
|
130
|
+
data = ChatCompletionRequest(
|
|
131
|
+
model=model,
|
|
132
|
+
messages=openai_message_list,
|
|
133
|
+
functions=functions,
|
|
134
|
+
function_call=function_call,
|
|
135
|
+
user=str(user_id),
|
|
136
|
+
max_tokens=max_tokens,
|
|
137
|
+
)
|
|
138
|
+
# https://platform.openai.com/docs/guides/text-generation/json-mode
|
|
139
|
+
# only supported by gpt-4o, gpt-4-turbo, or gpt-3.5-turbo
|
|
140
|
+
if "gpt-4o" in llm_config.model or "gpt-4-turbo" in llm_config.model or "gpt-3.5-turbo" in llm_config.model:
|
|
141
|
+
data.response_format = {"type": "json_object"}
|
|
142
|
+
|
|
143
|
+
if "inference.memgpt.ai" in llm_config.model_endpoint:
|
|
144
|
+
# override user id for inference.memgpt.ai
|
|
145
|
+
import uuid
|
|
146
|
+
|
|
147
|
+
data.user = str(uuid.UUID(int=0))
|
|
148
|
+
|
|
149
|
+
return data
|
|
150
|
+
|
|
151
|
+
|
|
84
152
|
def openai_chat_completions_process_stream(
|
|
85
153
|
url: str,
|
|
86
154
|
api_key: str,
|
|
@@ -415,58 +483,14 @@ def openai_chat_completions_request(
|
|
|
415
483
|
data.pop("tools")
|
|
416
484
|
data.pop("tool_choice", None) # extra safe, should exist always (default="auto")
|
|
417
485
|
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
response = requests.post(url, headers=headers, json=data)
|
|
421
|
-
printd(f"response = {response}, response.text = {response.text}")
|
|
422
|
-
# print(json.dumps(data, indent=4))
|
|
423
|
-
# raise requests.exceptions.HTTPError
|
|
424
|
-
response.raise_for_status() # Raises HTTPError for 4XX/5XX status
|
|
425
|
-
|
|
426
|
-
response = response.json() # convert to dict from string
|
|
427
|
-
printd(f"response.json = {response}")
|
|
428
|
-
|
|
429
|
-
response = ChatCompletionResponse(**response) # convert to 'dot-dict' style which is the openai python client default
|
|
430
|
-
return response
|
|
431
|
-
except requests.exceptions.HTTPError as http_err:
|
|
432
|
-
# Handle HTTP errors (e.g., response 4XX, 5XX)
|
|
433
|
-
printd(f"Got HTTPError, exception={http_err}, payload={data}")
|
|
434
|
-
raise http_err
|
|
435
|
-
except requests.exceptions.RequestException as req_err:
|
|
436
|
-
# Handle other requests-related errors (e.g., connection error)
|
|
437
|
-
printd(f"Got RequestException, exception={req_err}")
|
|
438
|
-
raise req_err
|
|
439
|
-
except Exception as e:
|
|
440
|
-
# Handle other potential errors
|
|
441
|
-
printd(f"Got unknown Exception, exception={e}")
|
|
442
|
-
raise e
|
|
486
|
+
response_json = make_post_request(url, headers, data)
|
|
487
|
+
return ChatCompletionResponse(**response_json)
|
|
443
488
|
|
|
444
489
|
|
|
445
490
|
def openai_embeddings_request(url: str, api_key: str, data: dict) -> EmbeddingResponse:
|
|
446
491
|
"""https://platform.openai.com/docs/api-reference/embeddings/create"""
|
|
447
|
-
from letta.utils import printd
|
|
448
492
|
|
|
449
493
|
url = smart_urljoin(url, "embeddings")
|
|
450
494
|
headers = {"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"}
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
try:
|
|
454
|
-
response = requests.post(url, headers=headers, json=data)
|
|
455
|
-
printd(f"response = {response}")
|
|
456
|
-
response.raise_for_status() # Raises HTTPError for 4XX/5XX status
|
|
457
|
-
response = response.json() # convert to dict from string
|
|
458
|
-
printd(f"response.json = {response}")
|
|
459
|
-
response = EmbeddingResponse(**response) # convert to 'dot-dict' style which is the openai python client default
|
|
460
|
-
return response
|
|
461
|
-
except requests.exceptions.HTTPError as http_err:
|
|
462
|
-
# Handle HTTP errors (e.g., response 4XX, 5XX)
|
|
463
|
-
printd(f"Got HTTPError, exception={http_err}, payload={data}")
|
|
464
|
-
raise http_err
|
|
465
|
-
except requests.exceptions.RequestException as req_err:
|
|
466
|
-
# Handle other requests-related errors (e.g., connection error)
|
|
467
|
-
printd(f"Got RequestException, exception={req_err}")
|
|
468
|
-
raise req_err
|
|
469
|
-
except Exception as e:
|
|
470
|
-
# Handle other potential errors
|
|
471
|
-
printd(f"Got unknown Exception, exception={e}")
|
|
472
|
-
raise e
|
|
495
|
+
response_json = make_post_request(url, headers, data)
|
|
496
|
+
return EmbeddingResponse(**response_json)
|
letta/main.py
CHANGED
|
@@ -366,7 +366,7 @@ def run_agent_loop(
|
|
|
366
366
|
first_message=False,
|
|
367
367
|
skip_verify=no_verify,
|
|
368
368
|
stream=stream,
|
|
369
|
-
|
|
369
|
+
inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs,
|
|
370
370
|
ms=ms,
|
|
371
371
|
)
|
|
372
372
|
new_messages = step_response.messages
|
letta/metadata.py
CHANGED
|
@@ -218,6 +218,7 @@ class AgentModel(Base):
|
|
|
218
218
|
tools = Column(JSON)
|
|
219
219
|
|
|
220
220
|
# configs
|
|
221
|
+
agent_type = Column(String)
|
|
221
222
|
llm_config = Column(LLMConfigColumn)
|
|
222
223
|
embedding_config = Column(EmbeddingConfigColumn)
|
|
223
224
|
|
|
@@ -243,6 +244,7 @@ class AgentModel(Base):
|
|
|
243
244
|
memory=Memory.load(self.memory), # load dictionary
|
|
244
245
|
system=self.system,
|
|
245
246
|
tools=self.tools,
|
|
247
|
+
agent_type=self.agent_type,
|
|
246
248
|
llm_config=self.llm_config,
|
|
247
249
|
embedding_config=self.embedding_config,
|
|
248
250
|
metadata_=self.metadata_,
|