letta-nightly 0.4.1.dev20241006104046__py3-none-any.whl → 0.4.1.dev20241008104105__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/agent.py +19 -9
- letta/credentials.py +1 -1
- letta/errors.py +1 -1
- letta/llm_api/azure_openai.py +15 -19
- letta/llm_api/helpers.py +153 -0
- letta/llm_api/llm_api_tools.py +39 -215
- letta/llm_api/openai.py +70 -2
- letta/providers.py +5 -1
- letta/schemas/llm_config.py +5 -2
- letta/server/rest_api/admin/tools.py +0 -1
- letta/server/rest_api/app.py +1 -17
- letta/server/rest_api/routers/openai/assistants/threads.py +9 -6
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -2
- letta/server/rest_api/routers/v1/agents.py +23 -13
- letta/server/rest_api/routers/v1/blocks.py +5 -3
- letta/server/rest_api/routers/v1/jobs.py +5 -3
- letta/server/rest_api/routers/v1/sources.py +24 -12
- letta/server/rest_api/routers/v1/tools.py +11 -6
- letta/server/server.py +17 -34
- letta/settings.py +2 -1
- {letta_nightly-0.4.1.dev20241006104046.dist-info → letta_nightly-0.4.1.dev20241008104105.dist-info}/METADATA +1 -1
- {letta_nightly-0.4.1.dev20241006104046.dist-info → letta_nightly-0.4.1.dev20241008104105.dist-info}/RECORD +25 -24
- {letta_nightly-0.4.1.dev20241006104046.dist-info → letta_nightly-0.4.1.dev20241008104105.dist-info}/LICENSE +0 -0
- {letta_nightly-0.4.1.dev20241006104046.dist-info → letta_nightly-0.4.1.dev20241008104105.dist-info}/WHEEL +0 -0
- {letta_nightly-0.4.1.dev20241006104046.dist-info → letta_nightly-0.4.1.dev20241008104105.dist-info}/entry_points.txt +0 -0
letta/agent.py
CHANGED
|
@@ -18,7 +18,7 @@ from letta.constants import (
|
|
|
18
18
|
MESSAGE_SUMMARY_WARNING_FRAC,
|
|
19
19
|
)
|
|
20
20
|
from letta.interface import AgentInterface
|
|
21
|
-
from letta.llm_api.llm_api_tools import create
|
|
21
|
+
from letta.llm_api.llm_api_tools import create
|
|
22
22
|
from letta.memory import ArchivalMemory, RecallMemory, summarize_messages
|
|
23
23
|
from letta.metadata import MetadataStore
|
|
24
24
|
from letta.persistence_manager import LocalStateManager
|
|
@@ -56,6 +56,7 @@ from letta.utils import (
|
|
|
56
56
|
)
|
|
57
57
|
|
|
58
58
|
from .errors import LLMError
|
|
59
|
+
from .llm_api.helpers import is_context_overflow_error
|
|
59
60
|
|
|
60
61
|
|
|
61
62
|
def compile_memory_metadata_block(
|
|
@@ -207,7 +208,7 @@ class BaseAgent(ABC):
|
|
|
207
208
|
recreate_message_timestamp: bool = True, # if True, when input is a Message type, recreated the 'created_at' field
|
|
208
209
|
stream: bool = False, # TODO move to config?
|
|
209
210
|
timestamp: Optional[datetime.datetime] = None,
|
|
210
|
-
|
|
211
|
+
inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
|
|
211
212
|
ms: Optional[MetadataStore] = None,
|
|
212
213
|
) -> AgentStepResponse:
|
|
213
214
|
"""
|
|
@@ -223,7 +224,7 @@ class BaseAgent(ABC):
|
|
|
223
224
|
class Agent(BaseAgent):
|
|
224
225
|
def __init__(
|
|
225
226
|
self,
|
|
226
|
-
interface: AgentInterface,
|
|
227
|
+
interface: Optional[AgentInterface],
|
|
227
228
|
# agents can be created from providing agent_state
|
|
228
229
|
agent_state: AgentState,
|
|
229
230
|
tools: List[Tool],
|
|
@@ -460,7 +461,7 @@ class Agent(BaseAgent):
|
|
|
460
461
|
function_call: str = "auto",
|
|
461
462
|
first_message: bool = False, # hint
|
|
462
463
|
stream: bool = False, # TODO move to config?
|
|
463
|
-
|
|
464
|
+
inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
|
|
464
465
|
) -> ChatCompletionResponse:
|
|
465
466
|
"""Get response from LLM API"""
|
|
466
467
|
try:
|
|
@@ -478,7 +479,7 @@ class Agent(BaseAgent):
|
|
|
478
479
|
stream=stream,
|
|
479
480
|
stream_inferface=self.interface,
|
|
480
481
|
# putting inner thoughts in func args or not
|
|
481
|
-
|
|
482
|
+
inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
|
|
482
483
|
)
|
|
483
484
|
|
|
484
485
|
if len(response.choices) == 0:
|
|
@@ -560,6 +561,8 @@ class Agent(BaseAgent):
|
|
|
560
561
|
function_call = (
|
|
561
562
|
response_message.function_call if response_message.function_call is not None else response_message.tool_calls[0].function
|
|
562
563
|
)
|
|
564
|
+
|
|
565
|
+
# Get the name of the function
|
|
563
566
|
function_name = function_call.name
|
|
564
567
|
printd(f"Request to call function {function_name} with tool_call_id: {tool_call_id}")
|
|
565
568
|
|
|
@@ -608,6 +611,13 @@ class Agent(BaseAgent):
|
|
|
608
611
|
self.interface.function_message(f"Error: {error_msg}", msg_obj=messages[-1])
|
|
609
612
|
return messages, False, True # force a heartbeat to allow agent to handle error
|
|
610
613
|
|
|
614
|
+
# Check if inner thoughts is in the function call arguments (possible apparently if you are using Azure)
|
|
615
|
+
if "inner_thoughts" in function_args:
|
|
616
|
+
response_message.content = function_args.pop("inner_thoughts")
|
|
617
|
+
# The content if then internal monologue, not chat
|
|
618
|
+
if response_message.content:
|
|
619
|
+
self.interface.internal_monologue(response_message.content, msg_obj=messages[-1])
|
|
620
|
+
|
|
611
621
|
# (Still parsing function args)
|
|
612
622
|
# Handle requests for immediate heartbeat
|
|
613
623
|
heartbeat_request = function_args.pop("request_heartbeat", None)
|
|
@@ -716,7 +726,7 @@ class Agent(BaseAgent):
|
|
|
716
726
|
recreate_message_timestamp: bool = True, # if True, when input is a Message type, recreated the 'created_at' field
|
|
717
727
|
stream: bool = False, # TODO move to config?
|
|
718
728
|
timestamp: Optional[datetime.datetime] = None,
|
|
719
|
-
|
|
729
|
+
inner_thoughts_in_kwargs_option: OptionState = OptionState.DEFAULT,
|
|
720
730
|
ms: Optional[MetadataStore] = None,
|
|
721
731
|
) -> AgentStepResponse:
|
|
722
732
|
"""Top-level event message handler for the Letta agent"""
|
|
@@ -795,7 +805,7 @@ class Agent(BaseAgent):
|
|
|
795
805
|
message_sequence=input_message_sequence,
|
|
796
806
|
first_message=True, # passed through to the prompt formatter
|
|
797
807
|
stream=stream,
|
|
798
|
-
|
|
808
|
+
inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
|
|
799
809
|
)
|
|
800
810
|
if verify_first_message_correctness(response, require_monologue=self.first_message_verify_mono):
|
|
801
811
|
break
|
|
@@ -808,7 +818,7 @@ class Agent(BaseAgent):
|
|
|
808
818
|
response = self._get_ai_reply(
|
|
809
819
|
message_sequence=input_message_sequence,
|
|
810
820
|
stream=stream,
|
|
811
|
-
|
|
821
|
+
inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
|
|
812
822
|
)
|
|
813
823
|
|
|
814
824
|
# Step 3: check if LLM wanted to call a function
|
|
@@ -892,7 +902,7 @@ class Agent(BaseAgent):
|
|
|
892
902
|
recreate_message_timestamp=recreate_message_timestamp,
|
|
893
903
|
stream=stream,
|
|
894
904
|
timestamp=timestamp,
|
|
895
|
-
|
|
905
|
+
inner_thoughts_in_kwargs_option=inner_thoughts_in_kwargs_option,
|
|
896
906
|
ms=ms,
|
|
897
907
|
)
|
|
898
908
|
|
letta/credentials.py
CHANGED
letta/errors.py
CHANGED
|
@@ -56,7 +56,7 @@ class LettaMessageError(LettaError):
|
|
|
56
56
|
error_msg += f" (Explanation: {explanation})"
|
|
57
57
|
|
|
58
58
|
# Pretty print out message JSON
|
|
59
|
-
message_json = json.dumps([message.
|
|
59
|
+
message_json = json.dumps([message.model_dump() for message in messages], indent=4)
|
|
60
60
|
return f"{error_msg}\n\n{message_json}"
|
|
61
61
|
|
|
62
62
|
|
letta/llm_api/azure_openai.py
CHANGED
|
@@ -2,8 +2,11 @@ from typing import Union
|
|
|
2
2
|
|
|
3
3
|
import requests
|
|
4
4
|
|
|
5
|
+
from letta.schemas.llm_config import LLMConfig
|
|
5
6
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
|
7
|
+
from letta.schemas.openai.chat_completions import ChatCompletionRequest
|
|
6
8
|
from letta.schemas.openai.embedding_response import EmbeddingResponse
|
|
9
|
+
from letta.settings import ModelSettings
|
|
7
10
|
from letta.utils import smart_urljoin
|
|
8
11
|
|
|
9
12
|
MODEL_TO_AZURE_ENGINE = {
|
|
@@ -13,17 +16,16 @@ MODEL_TO_AZURE_ENGINE = {
|
|
|
13
16
|
"gpt-3.5": "gpt-35-turbo",
|
|
14
17
|
"gpt-3.5-turbo": "gpt-35-turbo",
|
|
15
18
|
"gpt-3.5-turbo-16k": "gpt-35-turbo-16k",
|
|
19
|
+
"gpt-4o-mini": "gpt-4o-mini",
|
|
16
20
|
}
|
|
17
21
|
|
|
18
22
|
|
|
19
|
-
def
|
|
20
|
-
"
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
endpoint_address = endpoint_address.replace("https://", "")
|
|
26
|
-
return endpoint_address
|
|
23
|
+
def get_azure_endpoint(llm_config: LLMConfig, model_settings: ModelSettings):
|
|
24
|
+
assert llm_config.api_version, "Missing model version! This field must be provided in the LLM config for Azure."
|
|
25
|
+
assert llm_config.model in MODEL_TO_AZURE_ENGINE, f"{llm_config.model} not in supported models: {list(MODEL_TO_AZURE_ENGINE.keys())}"
|
|
26
|
+
|
|
27
|
+
model = MODEL_TO_AZURE_ENGINE[llm_config.model]
|
|
28
|
+
return f"{model_settings.azure_base_url}/openai/deployments/{model}/chat/completions?api-version={llm_config.api_version}"
|
|
27
29
|
|
|
28
30
|
|
|
29
31
|
def azure_openai_get_model_list(url: str, api_key: Union[str, None], api_version: str) -> dict:
|
|
@@ -72,19 +74,15 @@ def azure_openai_get_model_list(url: str, api_key: Union[str, None], api_version
|
|
|
72
74
|
|
|
73
75
|
|
|
74
76
|
def azure_openai_chat_completions_request(
|
|
75
|
-
|
|
77
|
+
model_settings: ModelSettings, llm_config: LLMConfig, api_key: str, chat_completion_request: ChatCompletionRequest
|
|
76
78
|
) -> ChatCompletionResponse:
|
|
77
79
|
"""https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#chat-completions"""
|
|
78
80
|
from letta.utils import printd
|
|
79
81
|
|
|
80
|
-
assert resource_name is not None, "Missing required field when calling Azure OpenAI"
|
|
81
|
-
assert deployment_id is not None, "Missing required field when calling Azure OpenAI"
|
|
82
|
-
assert api_version is not None, "Missing required field when calling Azure OpenAI"
|
|
83
82
|
assert api_key is not None, "Missing required field when calling Azure OpenAI"
|
|
84
83
|
|
|
85
|
-
resource_name = clean_azure_endpoint(resource_name)
|
|
86
|
-
url = f"https://{resource_name}.openai.azure.com/openai/deployments/{deployment_id}/chat/completions?api-version={api_version}"
|
|
87
84
|
headers = {"Content-Type": "application/json", "api-key": f"{api_key}"}
|
|
85
|
+
data = chat_completion_request.model_dump(exclude_none=True)
|
|
88
86
|
|
|
89
87
|
# If functions == None, strip from the payload
|
|
90
88
|
if "functions" in data and data["functions"] is None:
|
|
@@ -95,11 +93,10 @@ def azure_openai_chat_completions_request(
|
|
|
95
93
|
data.pop("tools")
|
|
96
94
|
data.pop("tool_choice", None) # extra safe, should exist always (default="auto")
|
|
97
95
|
|
|
98
|
-
|
|
96
|
+
model_endpoint = get_azure_endpoint(llm_config, model_settings)
|
|
97
|
+
printd(f"Sending request to {model_endpoint}")
|
|
99
98
|
try:
|
|
100
|
-
|
|
101
|
-
response = requests.post(url, headers=headers, json=data)
|
|
102
|
-
printd(f"response = {response}")
|
|
99
|
+
response = requests.post(model_endpoint, headers=headers, json=data)
|
|
103
100
|
response.raise_for_status() # Raises HTTPError for 4XX/5XX status
|
|
104
101
|
response = response.json() # convert to dict from string
|
|
105
102
|
printd(f"response.json = {response}")
|
|
@@ -128,7 +125,6 @@ def azure_openai_embeddings_request(
|
|
|
128
125
|
"""https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings"""
|
|
129
126
|
from letta.utils import printd
|
|
130
127
|
|
|
131
|
-
resource_name = clean_azure_endpoint(resource_name)
|
|
132
128
|
url = f"https://{resource_name}.openai.azure.com/openai/deployments/{deployment_id}/embeddings?api-version={api_version}"
|
|
133
129
|
headers = {"Content-Type": "application/json", "api-key": f"{api_key}"}
|
|
134
130
|
|
letta/llm_api/helpers.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import json
|
|
3
|
+
import warnings
|
|
4
|
+
from typing import List, Union
|
|
5
|
+
|
|
6
|
+
import requests
|
|
7
|
+
|
|
8
|
+
from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
|
|
9
|
+
from letta.schemas.enums import OptionState
|
|
10
|
+
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice
|
|
11
|
+
from letta.utils import json_dumps
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
# TODO update to use better types
|
|
15
|
+
def add_inner_thoughts_to_functions(
|
|
16
|
+
functions: List[dict],
|
|
17
|
+
inner_thoughts_key: str,
|
|
18
|
+
inner_thoughts_description: str,
|
|
19
|
+
inner_thoughts_required: bool = True,
|
|
20
|
+
# inner_thoughts_to_front: bool = True, TODO support sorting somewhere, probably in the to_dict?
|
|
21
|
+
) -> List[dict]:
|
|
22
|
+
"""Add an inner_thoughts kwarg to every function in the provided list"""
|
|
23
|
+
# return copies
|
|
24
|
+
new_functions = []
|
|
25
|
+
|
|
26
|
+
# functions is a list of dicts in the OpenAI schema (https://platform.openai.com/docs/api-reference/chat/create)
|
|
27
|
+
for function_object in functions:
|
|
28
|
+
function_params = function_object["parameters"]["properties"]
|
|
29
|
+
required_params = list(function_object["parameters"]["required"])
|
|
30
|
+
|
|
31
|
+
# if the inner thoughts arg doesn't exist, add it
|
|
32
|
+
if inner_thoughts_key not in function_params:
|
|
33
|
+
function_params[inner_thoughts_key] = {
|
|
34
|
+
"type": "string",
|
|
35
|
+
"description": inner_thoughts_description,
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
# make sure it's tagged as required
|
|
39
|
+
new_function_object = copy.deepcopy(function_object)
|
|
40
|
+
if inner_thoughts_required and inner_thoughts_key not in required_params:
|
|
41
|
+
required_params.append(inner_thoughts_key)
|
|
42
|
+
new_function_object["parameters"]["required"] = required_params
|
|
43
|
+
|
|
44
|
+
new_functions.append(new_function_object)
|
|
45
|
+
|
|
46
|
+
# return a list of copies
|
|
47
|
+
return new_functions
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def unpack_all_inner_thoughts_from_kwargs(
|
|
51
|
+
response: ChatCompletionResponse,
|
|
52
|
+
inner_thoughts_key: str,
|
|
53
|
+
) -> ChatCompletionResponse:
|
|
54
|
+
"""Strip the inner thoughts out of the tool call and put it in the message content"""
|
|
55
|
+
if len(response.choices) == 0:
|
|
56
|
+
raise ValueError(f"Unpacking inner thoughts from empty response not supported")
|
|
57
|
+
|
|
58
|
+
new_choices = []
|
|
59
|
+
for choice in response.choices:
|
|
60
|
+
new_choices.append(unpack_inner_thoughts_from_kwargs(choice, inner_thoughts_key))
|
|
61
|
+
|
|
62
|
+
# return an updated copy
|
|
63
|
+
new_response = response.model_copy(deep=True)
|
|
64
|
+
new_response.choices = new_choices
|
|
65
|
+
return new_response
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def unpack_inner_thoughts_from_kwargs(choice: Choice, inner_thoughts_key: str) -> Choice:
|
|
69
|
+
message = choice.message
|
|
70
|
+
if message.role == "assistant" and message.tool_calls and len(message.tool_calls) >= 1:
|
|
71
|
+
if len(message.tool_calls) > 1:
|
|
72
|
+
warnings.warn(f"Unpacking inner thoughts from more than one tool call ({len(message.tool_calls)}) is not supported")
|
|
73
|
+
# TODO support multiple tool calls
|
|
74
|
+
tool_call = message.tool_calls[0]
|
|
75
|
+
|
|
76
|
+
try:
|
|
77
|
+
# Sadly we need to parse the JSON since args are in string format
|
|
78
|
+
func_args = dict(json.loads(tool_call.function.arguments))
|
|
79
|
+
if inner_thoughts_key in func_args:
|
|
80
|
+
# extract the inner thoughts
|
|
81
|
+
inner_thoughts = func_args.pop(inner_thoughts_key)
|
|
82
|
+
|
|
83
|
+
# replace the kwargs
|
|
84
|
+
new_choice = choice.model_copy(deep=True)
|
|
85
|
+
new_choice.message.tool_calls[0].function.arguments = json_dumps(func_args)
|
|
86
|
+
# also replace the message content
|
|
87
|
+
if new_choice.message.content is not None:
|
|
88
|
+
warnings.warn(f"Overwriting existing inner monologue ({new_choice.message.content}) with kwarg ({inner_thoughts})")
|
|
89
|
+
new_choice.message.content = inner_thoughts
|
|
90
|
+
|
|
91
|
+
return new_choice
|
|
92
|
+
else:
|
|
93
|
+
warnings.warn(f"Did not find inner thoughts in tool call: {str(tool_call)}")
|
|
94
|
+
|
|
95
|
+
except json.JSONDecodeError as e:
|
|
96
|
+
warnings.warn(f"Failed to strip inner thoughts from kwargs: {e}")
|
|
97
|
+
raise e
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def is_context_overflow_error(exception: Union[requests.exceptions.RequestException, Exception]) -> bool:
|
|
101
|
+
"""Checks if an exception is due to context overflow (based on common OpenAI response messages)"""
|
|
102
|
+
from letta.utils import printd
|
|
103
|
+
|
|
104
|
+
match_string = OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
|
|
105
|
+
|
|
106
|
+
# Backwards compatibility with openai python package/client v0.28 (pre-v1 client migration)
|
|
107
|
+
if match_string in str(exception):
|
|
108
|
+
printd(f"Found '{match_string}' in str(exception)={(str(exception))}")
|
|
109
|
+
return True
|
|
110
|
+
|
|
111
|
+
# Based on python requests + OpenAI REST API (/v1)
|
|
112
|
+
elif isinstance(exception, requests.exceptions.HTTPError):
|
|
113
|
+
if exception.response is not None and "application/json" in exception.response.headers.get("Content-Type", ""):
|
|
114
|
+
try:
|
|
115
|
+
error_details = exception.response.json()
|
|
116
|
+
if "error" not in error_details:
|
|
117
|
+
printd(f"HTTPError occurred, but couldn't find error field: {error_details}")
|
|
118
|
+
return False
|
|
119
|
+
else:
|
|
120
|
+
error_details = error_details["error"]
|
|
121
|
+
|
|
122
|
+
# Check for the specific error code
|
|
123
|
+
if error_details.get("code") == "context_length_exceeded":
|
|
124
|
+
printd(f"HTTPError occurred, caught error code {error_details.get('code')}")
|
|
125
|
+
return True
|
|
126
|
+
# Soft-check for "maximum context length" inside of the message
|
|
127
|
+
elif error_details.get("message") and "maximum context length" in error_details.get("message"):
|
|
128
|
+
printd(f"HTTPError occurred, found '{match_string}' in error message contents ({error_details})")
|
|
129
|
+
return True
|
|
130
|
+
else:
|
|
131
|
+
printd(f"HTTPError occurred, but unknown error message: {error_details}")
|
|
132
|
+
return False
|
|
133
|
+
except ValueError:
|
|
134
|
+
# JSON decoding failed
|
|
135
|
+
printd(f"HTTPError occurred ({exception}), but no JSON error message.")
|
|
136
|
+
|
|
137
|
+
# Generic fail
|
|
138
|
+
else:
|
|
139
|
+
return False
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def derive_inner_thoughts_in_kwargs(inner_thoughts_in_kwargs_option: OptionState, model: str):
|
|
143
|
+
if inner_thoughts_in_kwargs_option == OptionState.DEFAULT:
|
|
144
|
+
# model that are known to not use `content` fields on tool calls
|
|
145
|
+
inner_thoughts_in_kwargs = "gpt-4o" in model or "gpt-4-turbo" in model or "gpt-3.5-turbo" in model
|
|
146
|
+
else:
|
|
147
|
+
inner_thoughts_in_kwargs = True if inner_thoughts_in_kwargs_option == OptionState.YES else False
|
|
148
|
+
|
|
149
|
+
if not isinstance(inner_thoughts_in_kwargs, bool):
|
|
150
|
+
warnings.warn(f"Bad type detected: {type(inner_thoughts_in_kwargs)}")
|
|
151
|
+
inner_thoughts_in_kwargs = bool(inner_thoughts_in_kwargs)
|
|
152
|
+
|
|
153
|
+
return inner_thoughts_in_kwargs
|