letta-nightly 0.4.1.dev20241007104134__py3-none-any.whl → 0.4.1.dev20241009104130__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/agent.py +36 -10
- letta/client/client.py +8 -1
- letta/credentials.py +3 -3
- letta/errors.py +1 -1
- letta/functions/schema_generator.py +1 -1
- letta/llm_api/anthropic.py +3 -24
- letta/llm_api/azure_openai.py +53 -108
- letta/llm_api/azure_openai_constants.py +10 -0
- letta/llm_api/google_ai.py +39 -64
- letta/llm_api/helpers.py +208 -0
- letta/llm_api/llm_api_tools.py +43 -218
- letta/llm_api/openai.py +74 -50
- letta/main.py +1 -1
- letta/metadata.py +2 -0
- letta/providers.py +144 -31
- letta/schemas/agent.py +14 -0
- letta/schemas/llm_config.py +2 -2
- letta/schemas/openai/chat_completion_response.py +3 -0
- letta/schemas/tool.py +3 -3
- letta/server/rest_api/admin/tools.py +0 -1
- letta/server/rest_api/app.py +1 -17
- letta/server/rest_api/routers/openai/assistants/threads.py +10 -7
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +5 -3
- letta/server/rest_api/routers/v1/agents.py +23 -13
- letta/server/rest_api/routers/v1/blocks.py +5 -3
- letta/server/rest_api/routers/v1/jobs.py +5 -3
- letta/server/rest_api/routers/v1/sources.py +25 -13
- letta/server/rest_api/routers/v1/tools.py +12 -7
- letta/server/server.py +33 -37
- letta/settings.py +5 -113
- {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/METADATA +1 -1
- {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/RECORD +35 -33
- {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/LICENSE +0 -0
- {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/WHEEL +0 -0
- {letta_nightly-0.4.1.dev20241007104134.dist-info → letta_nightly-0.4.1.dev20241009104130.dist-info}/entry_points.txt +0 -0
letta/llm_api/google_ai.py
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import uuid
|
|
2
|
-
from typing import List, Optional
|
|
2
|
+
from typing import List, Optional, Tuple
|
|
3
3
|
|
|
4
4
|
import requests
|
|
5
5
|
|
|
6
6
|
from letta.constants import NON_USER_MSG_PREFIX
|
|
7
|
+
from letta.llm_api.helpers import make_post_request
|
|
7
8
|
from letta.local_llm.json_parser import clean_json_string_extra_backslash
|
|
8
9
|
from letta.local_llm.utils import count_tokens
|
|
9
10
|
from letta.schemas.openai.chat_completion_request import Tool
|
|
@@ -15,27 +16,41 @@ from letta.schemas.openai.chat_completion_response import (
|
|
|
15
16
|
ToolCall,
|
|
16
17
|
UsageStatistics,
|
|
17
18
|
)
|
|
18
|
-
from letta.utils import get_tool_call_id, get_utc_time
|
|
19
|
+
from letta.utils import get_tool_call_id, get_utc_time, json_dumps
|
|
19
20
|
|
|
20
|
-
# from letta.data_types import ToolCall
|
|
21
21
|
|
|
22
|
+
def get_gemini_endpoint_and_headers(
|
|
23
|
+
base_url: str, model: Optional[str], api_key: str, key_in_header: bool = True, generate_content: bool = False
|
|
24
|
+
) -> Tuple[str, dict]:
|
|
25
|
+
"""
|
|
26
|
+
Dynamically generate the model endpoint and headers.
|
|
27
|
+
"""
|
|
28
|
+
url = f"{base_url}/v1beta/models"
|
|
22
29
|
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
30
|
+
# Add the model
|
|
31
|
+
if model is not None:
|
|
32
|
+
url += f"/{model}"
|
|
27
33
|
|
|
28
|
-
|
|
29
|
-
|
|
34
|
+
# Add extension for generating content if we're hitting the LM
|
|
35
|
+
if generate_content:
|
|
36
|
+
url += ":generateContent"
|
|
30
37
|
|
|
38
|
+
# Decide if api key should be in header or not
|
|
31
39
|
# Two ways to pass the key: https://ai.google.dev/tutorials/setup
|
|
32
40
|
if key_in_header:
|
|
33
|
-
url = f"https://{service_endpoint}.googleapis.com/v1beta/models/{model}"
|
|
34
41
|
headers = {"Content-Type": "application/json", "x-goog-api-key": api_key}
|
|
35
42
|
else:
|
|
36
|
-
url
|
|
43
|
+
url += f"?key={api_key}"
|
|
37
44
|
headers = {"Content-Type": "application/json"}
|
|
38
45
|
|
|
46
|
+
return url, headers
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def google_ai_get_model_details(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> List[dict]:
|
|
50
|
+
from letta.utils import printd
|
|
51
|
+
|
|
52
|
+
url, headers = get_gemini_endpoint_and_headers(base_url, model, api_key, key_in_header)
|
|
53
|
+
|
|
39
54
|
try:
|
|
40
55
|
response = requests.get(url, headers=headers)
|
|
41
56
|
printd(f"response = {response}")
|
|
@@ -66,25 +81,17 @@ def google_ai_get_model_details(service_endpoint: str, api_key: str, model: str,
|
|
|
66
81
|
raise e
|
|
67
82
|
|
|
68
83
|
|
|
69
|
-
def google_ai_get_model_context_window(
|
|
70
|
-
model_details = google_ai_get_model_details(
|
|
71
|
-
service_endpoint=service_endpoint, api_key=api_key, model=model, key_in_header=key_in_header
|
|
72
|
-
)
|
|
84
|
+
def google_ai_get_model_context_window(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> int:
|
|
85
|
+
model_details = google_ai_get_model_details(base_url=base_url, api_key=api_key, model=model, key_in_header=key_in_header)
|
|
73
86
|
# TODO should this be:
|
|
74
87
|
# return model_details["inputTokenLimit"] + model_details["outputTokenLimit"]
|
|
75
88
|
return int(model_details["inputTokenLimit"])
|
|
76
89
|
|
|
77
90
|
|
|
78
|
-
def google_ai_get_model_list(
|
|
91
|
+
def google_ai_get_model_list(base_url: str, api_key: str, key_in_header: bool = True) -> List[dict]:
|
|
79
92
|
from letta.utils import printd
|
|
80
93
|
|
|
81
|
-
|
|
82
|
-
if key_in_header:
|
|
83
|
-
url = f"https://{service_endpoint}.googleapis.com/v1beta/models"
|
|
84
|
-
headers = {"Content-Type": "application/json", "x-goog-api-key": api_key}
|
|
85
|
-
else:
|
|
86
|
-
url = f"https://{service_endpoint}.googleapis.com/v1beta/models?key={api_key}"
|
|
87
|
-
headers = {"Content-Type": "application/json"}
|
|
94
|
+
url, headers = get_gemini_endpoint_and_headers(base_url, None, api_key, key_in_header)
|
|
88
95
|
|
|
89
96
|
try:
|
|
90
97
|
response = requests.get(url, headers=headers)
|
|
@@ -396,7 +403,7 @@ def convert_google_ai_response_to_chatcompletion(
|
|
|
396
403
|
|
|
397
404
|
# TODO convert 'data' type to pydantic
|
|
398
405
|
def google_ai_chat_completions_request(
|
|
399
|
-
|
|
406
|
+
base_url: str,
|
|
400
407
|
model: str,
|
|
401
408
|
api_key: str,
|
|
402
409
|
data: dict,
|
|
@@ -414,55 +421,23 @@ def google_ai_chat_completions_request(
|
|
|
414
421
|
This service has the following service endpoint and all URIs below are relative to this service endpoint:
|
|
415
422
|
https://xxx.googleapis.com
|
|
416
423
|
"""
|
|
417
|
-
from letta.utils import printd
|
|
418
424
|
|
|
419
|
-
assert service_endpoint is not None, "Missing service_endpoint when calling Google AI"
|
|
420
425
|
assert api_key is not None, "Missing api_key when calling Google AI"
|
|
421
|
-
assert model in SUPPORTED_MODELS, f"Model '{model}' not in supported models: {', '.join(SUPPORTED_MODELS)}"
|
|
422
426
|
|
|
423
|
-
|
|
424
|
-
if key_in_header:
|
|
425
|
-
url = f"https://{service_endpoint}.googleapis.com/v1beta/models/{model}:generateContent"
|
|
426
|
-
headers = {"Content-Type": "application/json", "x-goog-api-key": api_key}
|
|
427
|
-
else:
|
|
428
|
-
url = f"https://{service_endpoint}.googleapis.com/v1beta/models/{model}:generateContent?key={api_key}"
|
|
429
|
-
headers = {"Content-Type": "application/json"}
|
|
427
|
+
url, headers = get_gemini_endpoint_and_headers(base_url, model, api_key, key_in_header, generate_content=True)
|
|
430
428
|
|
|
431
429
|
# data["contents"][-1]["role"] = "model"
|
|
432
430
|
if add_postfunc_model_messages:
|
|
433
431
|
data["contents"] = add_dummy_model_messages(data["contents"])
|
|
434
432
|
|
|
435
|
-
|
|
433
|
+
response_json = make_post_request(url, headers, data)
|
|
436
434
|
try:
|
|
437
|
-
response = requests.post(url, headers=headers, json=data)
|
|
438
|
-
printd(f"response = {response}")
|
|
439
|
-
response.raise_for_status() # Raises HTTPError for 4XX/5XX status
|
|
440
|
-
response = response.json() # convert to dict from string
|
|
441
|
-
printd(f"response.json = {response}")
|
|
442
|
-
|
|
443
|
-
# Convert Google AI response to ChatCompletion style
|
|
444
435
|
return convert_google_ai_response_to_chatcompletion(
|
|
445
|
-
response_json=
|
|
446
|
-
model=model,
|
|
436
|
+
response_json=response_json,
|
|
437
|
+
model=data.get("model"),
|
|
447
438
|
input_messages=data["contents"],
|
|
448
|
-
pull_inner_thoughts_from_args=inner_thoughts_in_kwargs,
|
|
439
|
+
pull_inner_thoughts_from_args=data.get("inner_thoughts_in_kwargs", False),
|
|
449
440
|
)
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
printd(f"Got HTTPError, exception={http_err}, payload={data}")
|
|
454
|
-
# Print the HTTP status code
|
|
455
|
-
print(f"HTTP Error: {http_err.response.status_code}")
|
|
456
|
-
# Print the response content (error message from server)
|
|
457
|
-
print(f"Message: {http_err.response.text}")
|
|
458
|
-
raise http_err
|
|
459
|
-
|
|
460
|
-
except requests.exceptions.RequestException as req_err:
|
|
461
|
-
# Handle other requests-related errors (e.g., connection error)
|
|
462
|
-
printd(f"Got RequestException, exception={req_err}")
|
|
463
|
-
raise req_err
|
|
464
|
-
|
|
465
|
-
except Exception as e:
|
|
466
|
-
# Handle other potential errors
|
|
467
|
-
printd(f"Got unknown Exception, exception={e}")
|
|
468
|
-
raise e
|
|
441
|
+
except Exception as conversion_error:
|
|
442
|
+
print(f"Error during response conversion: {conversion_error}")
|
|
443
|
+
raise conversion_error
|
letta/llm_api/helpers.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import json
|
|
3
|
+
import warnings
|
|
4
|
+
from typing import Any, List, Union
|
|
5
|
+
|
|
6
|
+
import requests
|
|
7
|
+
|
|
8
|
+
from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
|
|
9
|
+
from letta.schemas.enums import OptionState
|
|
10
|
+
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice
|
|
11
|
+
from letta.utils import json_dumps, printd
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def make_post_request(url: str, headers: dict[str, str], data: dict[str, Any]) -> dict[str, Any]:
|
|
15
|
+
printd(f"Sending request to {url}")
|
|
16
|
+
try:
|
|
17
|
+
# Make the POST request
|
|
18
|
+
response = requests.post(url, headers=headers, json=data)
|
|
19
|
+
printd(f"Response status code: {response.status_code}")
|
|
20
|
+
|
|
21
|
+
# Raise for 4XX/5XX HTTP errors
|
|
22
|
+
response.raise_for_status()
|
|
23
|
+
|
|
24
|
+
# Ensure the content is JSON before parsing
|
|
25
|
+
if response.headers.get("Content-Type") == "application/json":
|
|
26
|
+
response_data = response.json() # Convert to dict from JSON
|
|
27
|
+
printd(f"Response JSON: {response_data}")
|
|
28
|
+
else:
|
|
29
|
+
error_message = f"Unexpected content type returned: {response.headers.get('Content-Type')}"
|
|
30
|
+
printd(error_message)
|
|
31
|
+
raise ValueError(error_message)
|
|
32
|
+
|
|
33
|
+
# Process the response using the callback function
|
|
34
|
+
return response_data
|
|
35
|
+
|
|
36
|
+
except requests.exceptions.HTTPError as http_err:
|
|
37
|
+
# HTTP errors (4XX, 5XX)
|
|
38
|
+
error_message = f"HTTP error occurred: {http_err}"
|
|
39
|
+
if http_err.response is not None:
|
|
40
|
+
error_message += f" | Status code: {http_err.response.status_code}, Message: {http_err.response.text}"
|
|
41
|
+
printd(error_message)
|
|
42
|
+
raise requests.exceptions.HTTPError(error_message) from http_err
|
|
43
|
+
|
|
44
|
+
except requests.exceptions.Timeout as timeout_err:
|
|
45
|
+
# Handle timeout errors
|
|
46
|
+
error_message = f"Request timed out: {timeout_err}"
|
|
47
|
+
printd(error_message)
|
|
48
|
+
raise requests.exceptions.Timeout(error_message) from timeout_err
|
|
49
|
+
|
|
50
|
+
except requests.exceptions.RequestException as req_err:
|
|
51
|
+
# Non-HTTP errors (e.g., connection, SSL errors)
|
|
52
|
+
error_message = f"Request failed: {req_err}"
|
|
53
|
+
printd(error_message)
|
|
54
|
+
raise requests.exceptions.RequestException(error_message) from req_err
|
|
55
|
+
|
|
56
|
+
except ValueError as val_err:
|
|
57
|
+
# Handle content-type or non-JSON response issues
|
|
58
|
+
error_message = f"ValueError: {val_err}"
|
|
59
|
+
printd(error_message)
|
|
60
|
+
raise ValueError(error_message) from val_err
|
|
61
|
+
|
|
62
|
+
except Exception as e:
|
|
63
|
+
# Catch any other unknown exceptions
|
|
64
|
+
error_message = f"An unexpected error occurred: {e}"
|
|
65
|
+
printd(error_message)
|
|
66
|
+
raise Exception(error_message) from e
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
# TODO update to use better types
|
|
70
|
+
def add_inner_thoughts_to_functions(
|
|
71
|
+
functions: List[dict],
|
|
72
|
+
inner_thoughts_key: str,
|
|
73
|
+
inner_thoughts_description: str,
|
|
74
|
+
inner_thoughts_required: bool = True,
|
|
75
|
+
# inner_thoughts_to_front: bool = True, TODO support sorting somewhere, probably in the to_dict?
|
|
76
|
+
) -> List[dict]:
|
|
77
|
+
"""Add an inner_thoughts kwarg to every function in the provided list"""
|
|
78
|
+
# return copies
|
|
79
|
+
new_functions = []
|
|
80
|
+
|
|
81
|
+
# functions is a list of dicts in the OpenAI schema (https://platform.openai.com/docs/api-reference/chat/create)
|
|
82
|
+
for function_object in functions:
|
|
83
|
+
function_params = function_object["parameters"]["properties"]
|
|
84
|
+
required_params = list(function_object["parameters"]["required"])
|
|
85
|
+
|
|
86
|
+
# if the inner thoughts arg doesn't exist, add it
|
|
87
|
+
if inner_thoughts_key not in function_params:
|
|
88
|
+
function_params[inner_thoughts_key] = {
|
|
89
|
+
"type": "string",
|
|
90
|
+
"description": inner_thoughts_description,
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
# make sure it's tagged as required
|
|
94
|
+
new_function_object = copy.deepcopy(function_object)
|
|
95
|
+
if inner_thoughts_required and inner_thoughts_key not in required_params:
|
|
96
|
+
required_params.append(inner_thoughts_key)
|
|
97
|
+
new_function_object["parameters"]["required"] = required_params
|
|
98
|
+
|
|
99
|
+
new_functions.append(new_function_object)
|
|
100
|
+
|
|
101
|
+
# return a list of copies
|
|
102
|
+
return new_functions
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def unpack_all_inner_thoughts_from_kwargs(
|
|
106
|
+
response: ChatCompletionResponse,
|
|
107
|
+
inner_thoughts_key: str,
|
|
108
|
+
) -> ChatCompletionResponse:
|
|
109
|
+
"""Strip the inner thoughts out of the tool call and put it in the message content"""
|
|
110
|
+
if len(response.choices) == 0:
|
|
111
|
+
raise ValueError(f"Unpacking inner thoughts from empty response not supported")
|
|
112
|
+
|
|
113
|
+
new_choices = []
|
|
114
|
+
for choice in response.choices:
|
|
115
|
+
new_choices.append(unpack_inner_thoughts_from_kwargs(choice, inner_thoughts_key))
|
|
116
|
+
|
|
117
|
+
# return an updated copy
|
|
118
|
+
new_response = response.model_copy(deep=True)
|
|
119
|
+
new_response.choices = new_choices
|
|
120
|
+
return new_response
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -> Choice:
|
|
124
|
+
message = choice.message
|
|
125
|
+
if message.role == "assistant" and message.tool_calls and len(message.tool_calls) >= 1:
|
|
126
|
+
if len(message.tool_calls) > 1:
|
|
127
|
+
warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(message.tool_calls)}) is not supported")
|
|
128
|
+
# TODO support multiple tool calls
|
|
129
|
+
tool_call = message.tool_calls[0]
|
|
130
|
+
|
|
131
|
+
try:
|
|
132
|
+
# Sadly we need to parse the JSON since args are in string format
|
|
133
|
+
func_args = dict(json.loads(tool_call.function.arguments))
|
|
134
|
+
if inner_thoughts_key in func_args:
|
|
135
|
+
# extract the inner thoughts
|
|
136
|
+
inner_thoughts = func_args.pop(inner_thoughts_key)
|
|
137
|
+
|
|
138
|
+
# replace the kwargs
|
|
139
|
+
new_choice = choice.model_copy(deep=True)
|
|
140
|
+
new_choice.message.tool_calls[0].function.arguments = json_dumps(func_args)
|
|
141
|
+
# also replace the message content
|
|
142
|
+
if new_choice.message.content is not None:
|
|
143
|
+
warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
|
|
144
|
+
new_choice.message.content = inner_thoughts
|
|
145
|
+
|
|
146
|
+
return new_choice
|
|
147
|
+
else:
|
|
148
|
+
warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
|
|
149
|
+
|
|
150
|
+
except json.JSONDecodeError as e:
|
|
151
|
+
warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
|
|
152
|
+
raise e
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def is_context_overflow_error(exception: Union[requests.exceptions.RequestException, Exception]) -> bool:
|
|
156
|
+
"""Checks if an exception is due to context overflow (based on common OpenAI response messages)"""
|
|
157
|
+
from letta.utils import printd
|
|
158
|
+
|
|
159
|
+
match_string = OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
|
|
160
|
+
|
|
161
|
+
# Backwards compatibility with openai python package/client v0.28 (pre-v1 client migration)
|
|
162
|
+
if match_string in str(exception):
|
|
163
|
+
printd(f"Found '{match_string}' in str(exception)={(str(exception))}")
|
|
164
|
+
return True
|
|
165
|
+
|
|
166
|
+
# Based on python requests + OpenAI REST API (/v1)
|
|
167
|
+
elif isinstance(exception, requests.exceptions.HTTPError):
|
|
168
|
+
if exception.response is not None and "application/json" in exception.response.headers.get("Content-Type", ""):
|
|
169
|
+
try:
|
|
170
|
+
error_details = exception.response.json()
|
|
171
|
+
if "error" not in error_details:
|
|
172
|
+
printd(f"HTTPError occurred, but couldn't find error field: {error_details}")
|
|
173
|
+
return False
|
|
174
|
+
else:
|
|
175
|
+
error_details = error_details["error"]
|
|
176
|
+
|
|
177
|
+
# Check for the specific error code
|
|
178
|
+
if error_details.get("code") == "context_length_exceeded":
|
|
179
|
+
printd(f"HTTPError occurred, caught error code {error_details.get('code')}")
|
|
180
|
+
return True
|
|
181
|
+
# Soft-check for "maximum context length" inside of the message
|
|
182
|
+
elif error_details.get("message") and "maximum context length" in error_details.get("message"):
|
|
183
|
+
printd(f"HTTPError occurred, found '{match_string}' in error message contents ({error_details})")
|
|
184
|
+
return True
|
|
185
|
+
else:
|
|
186
|
+
printd(f"HTTPError occurred, but unknown error message: {error_details}")
|
|
187
|
+
return False
|
|
188
|
+
except ValueError:
|
|
189
|
+
# JSON decoding failed
|
|
190
|
+
printd(f"HTTPError occurred ({exception}), but no JSON error message.")
|
|
191
|
+
|
|
192
|
+
# Generic fail
|
|
193
|
+
else:
|
|
194
|
+
return False
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option: OptionState, model: str):
|
|
198
|
+
if inner_thoughts_in_kwargs_option == OptionState.DEFAULT:
|
|
199
|
+
# model that are known to not use `content` fields on tool calls
|
|
200
|
+
inner_thoughts_in_kwargs = "gpt-4o" in model or "gpt-4-turbo" in model or "gpt-3.5-turbo" in model
|
|
201
|
+
else:
|
|
202
|
+
inner_thoughts_in_kwargs = True if inner_thoughts_in_kwargs_option == OptionState.YES else False
|
|
203
|
+
|
|
204
|
+
if not isinstance(inner_thoughts_in_kwargs, bool):
|
|
205
|
+
warnings.warn(f"Bad type detected: {type(inner_thoughts_in_kwargs)}")
|
|
206
|
+
inner_thoughts_in_kwargs = bool(inner_thoughts_in_kwargs)
|
|
207
|
+
|
|
208
|
+
return inner_thoughts_in_kwargs
|