letta-nightly 0.7.20.dev20250520104253__py3-none-any.whl → 0.7.21.dev20250521233415__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +290 -3
- letta/agents/base_agent.py +0 -55
- letta/agents/helpers.py +5 -0
- letta/agents/letta_agent.py +314 -64
- letta/agents/letta_agent_batch.py +102 -55
- letta/agents/voice_agent.py +5 -5
- letta/client/client.py +9 -18
- letta/constants.py +55 -1
- letta/functions/function_sets/builtin.py +27 -0
- letta/functions/mcp_client/stdio_client.py +1 -1
- letta/groups/sleeptime_multi_agent_v2.py +1 -1
- letta/interfaces/anthropic_streaming_interface.py +10 -1
- letta/interfaces/openai_streaming_interface.py +9 -2
- letta/llm_api/anthropic.py +21 -2
- letta/llm_api/anthropic_client.py +33 -6
- letta/llm_api/google_ai_client.py +136 -423
- letta/llm_api/google_vertex_client.py +173 -22
- letta/llm_api/llm_api_tools.py +27 -0
- letta/llm_api/llm_client.py +1 -1
- letta/llm_api/llm_client_base.py +32 -21
- letta/llm_api/openai.py +57 -0
- letta/llm_api/openai_client.py +7 -11
- letta/memory.py +0 -1
- letta/orm/__init__.py +1 -0
- letta/orm/enums.py +1 -0
- letta/orm/provider_trace.py +26 -0
- letta/orm/step.py +1 -0
- letta/schemas/provider_trace.py +43 -0
- letta/schemas/providers.py +210 -65
- letta/schemas/step.py +1 -0
- letta/schemas/tool.py +4 -0
- letta/server/db.py +37 -19
- letta/server/rest_api/routers/v1/__init__.py +2 -0
- letta/server/rest_api/routers/v1/agents.py +57 -34
- letta/server/rest_api/routers/v1/blocks.py +3 -3
- letta/server/rest_api/routers/v1/identities.py +24 -26
- letta/server/rest_api/routers/v1/jobs.py +3 -3
- letta/server/rest_api/routers/v1/llms.py +13 -8
- letta/server/rest_api/routers/v1/sandbox_configs.py +6 -6
- letta/server/rest_api/routers/v1/tags.py +3 -3
- letta/server/rest_api/routers/v1/telemetry.py +18 -0
- letta/server/rest_api/routers/v1/tools.py +6 -6
- letta/server/rest_api/streaming_response.py +105 -0
- letta/server/rest_api/utils.py +4 -0
- letta/server/server.py +140 -0
- letta/services/agent_manager.py +251 -18
- letta/services/block_manager.py +52 -37
- letta/services/helpers/noop_helper.py +10 -0
- letta/services/identity_manager.py +43 -38
- letta/services/job_manager.py +29 -0
- letta/services/message_manager.py +111 -0
- letta/services/sandbox_config_manager.py +36 -0
- letta/services/step_manager.py +146 -0
- letta/services/telemetry_manager.py +58 -0
- letta/services/tool_executor/tool_execution_manager.py +49 -5
- letta/services/tool_executor/tool_execution_sandbox.py +47 -0
- letta/services/tool_executor/tool_executor.py +236 -7
- letta/services/tool_manager.py +160 -1
- letta/services/tool_sandbox/e2b_sandbox.py +65 -3
- letta/settings.py +10 -2
- letta/tracing.py +5 -5
- {letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/METADATA +3 -2
- {letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/RECORD +67 -60
- {letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/LICENSE +0 -0
- {letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/WHEEL +0 -0
- {letta_nightly-0.7.20.dev20250520104253.dist-info → letta_nightly-0.7.21.dev20250521233415.dist-info}/entry_points.txt +0 -0
@@ -5,14 +5,16 @@ from typing import List, Optional
|
|
5
5
|
from google import genai
|
6
6
|
from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, GenerateContentResponse, ThinkingConfig, ToolConfig
|
7
7
|
|
8
|
+
from letta.constants import NON_USER_MSG_PREFIX
|
8
9
|
from letta.helpers.datetime_helpers import get_utc_time_int
|
9
10
|
from letta.helpers.json_helpers import json_dumps, json_loads
|
10
|
-
from letta.llm_api.
|
11
|
+
from letta.llm_api.llm_client_base import LLMClientBase
|
11
12
|
from letta.local_llm.json_parser import clean_json_string_extra_backslash
|
12
13
|
from letta.local_llm.utils import count_tokens
|
13
14
|
from letta.log import get_logger
|
14
15
|
from letta.schemas.llm_config import LLMConfig
|
15
16
|
from letta.schemas.message import Message as PydanticMessage
|
17
|
+
from letta.schemas.openai.chat_completion_request import Tool
|
16
18
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
|
17
19
|
from letta.settings import model_settings, settings
|
18
20
|
from letta.utils import get_tool_call_id
|
@@ -20,18 +22,21 @@ from letta.utils import get_tool_call_id
|
|
20
22
|
logger = get_logger(__name__)
|
21
23
|
|
22
24
|
|
23
|
-
class GoogleVertexClient(
|
25
|
+
class GoogleVertexClient(LLMClientBase):
|
24
26
|
|
25
|
-
def
|
26
|
-
|
27
|
-
Performs underlying request to llm and returns raw response.
|
28
|
-
"""
|
29
|
-
client = genai.Client(
|
27
|
+
def _get_client(self):
|
28
|
+
return genai.Client(
|
30
29
|
vertexai=True,
|
31
30
|
project=model_settings.google_cloud_project,
|
32
31
|
location=model_settings.google_cloud_location,
|
33
32
|
http_options={"api_version": "v1"},
|
34
33
|
)
|
34
|
+
|
35
|
+
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
36
|
+
"""
|
37
|
+
Performs underlying request to llm and returns raw response.
|
38
|
+
"""
|
39
|
+
client = self._get_client()
|
35
40
|
response = client.models.generate_content(
|
36
41
|
model=llm_config.model,
|
37
42
|
contents=request_data["contents"],
|
@@ -43,12 +48,7 @@ class GoogleVertexClient(GoogleAIClient):
|
|
43
48
|
"""
|
44
49
|
Performs underlying request to llm and returns raw response.
|
45
50
|
"""
|
46
|
-
client =
|
47
|
-
vertexai=True,
|
48
|
-
project=model_settings.google_cloud_project,
|
49
|
-
location=model_settings.google_cloud_location,
|
50
|
-
http_options={"api_version": "v1"},
|
51
|
-
)
|
51
|
+
client = self._get_client()
|
52
52
|
response = await client.aio.models.generate_content(
|
53
53
|
model=llm_config.model,
|
54
54
|
contents=request_data["contents"],
|
@@ -56,6 +56,139 @@ class GoogleVertexClient(GoogleAIClient):
|
|
56
56
|
)
|
57
57
|
return response.model_dump()
|
58
58
|
|
59
|
+
def add_dummy_model_messages(self, messages: List[dict]) -> List[dict]:
|
60
|
+
"""Google AI API requires all function call returns are immediately followed by a 'model' role message.
|
61
|
+
|
62
|
+
In Letta, the 'model' will often call a function (e.g. send_message) that itself yields to the user,
|
63
|
+
so there is no natural follow-up 'model' role message.
|
64
|
+
|
65
|
+
To satisfy the Google AI API restrictions, we can add a dummy 'yield' message
|
66
|
+
with role == 'model' that is placed in-betweeen and function output
|
67
|
+
(role == 'tool') and user message (role == 'user').
|
68
|
+
"""
|
69
|
+
dummy_yield_message = {
|
70
|
+
"role": "model",
|
71
|
+
"parts": [{"text": f"{NON_USER_MSG_PREFIX}Function call returned, waiting for user response."}],
|
72
|
+
}
|
73
|
+
messages_with_padding = []
|
74
|
+
for i, message in enumerate(messages):
|
75
|
+
messages_with_padding.append(message)
|
76
|
+
# Check if the current message role is 'tool' and the next message role is 'user'
|
77
|
+
if message["role"] in ["tool", "function"] and (i + 1 < len(messages) and messages[i + 1]["role"] == "user"):
|
78
|
+
messages_with_padding.append(dummy_yield_message)
|
79
|
+
|
80
|
+
return messages_with_padding
|
81
|
+
|
82
|
+
def _clean_google_ai_schema_properties(self, schema_part: dict):
|
83
|
+
"""Recursively clean schema parts to remove unsupported Google AI keywords."""
|
84
|
+
if not isinstance(schema_part, dict):
|
85
|
+
return
|
86
|
+
|
87
|
+
# Per https://ai.google.dev/gemini-api/docs/function-calling?example=meeting#notes_and_limitations
|
88
|
+
# * Only a subset of the OpenAPI schema is supported.
|
89
|
+
# * Supported parameter types in Python are limited.
|
90
|
+
unsupported_keys = ["default", "exclusiveMaximum", "exclusiveMinimum", "additionalProperties"]
|
91
|
+
keys_to_remove_at_this_level = [key for key in unsupported_keys if key in schema_part]
|
92
|
+
for key_to_remove in keys_to_remove_at_this_level:
|
93
|
+
logger.warning(f"Removing unsupported keyword '{key_to_remove}' from schema part.")
|
94
|
+
del schema_part[key_to_remove]
|
95
|
+
|
96
|
+
if schema_part.get("type") == "string" and "format" in schema_part:
|
97
|
+
allowed_formats = ["enum", "date-time"]
|
98
|
+
if schema_part["format"] not in allowed_formats:
|
99
|
+
logger.warning(f"Removing unsupported format '{schema_part['format']}' for string type. Allowed: {allowed_formats}")
|
100
|
+
del schema_part["format"]
|
101
|
+
|
102
|
+
# Check properties within the current level
|
103
|
+
if "properties" in schema_part and isinstance(schema_part["properties"], dict):
|
104
|
+
for prop_name, prop_schema in schema_part["properties"].items():
|
105
|
+
self._clean_google_ai_schema_properties(prop_schema)
|
106
|
+
|
107
|
+
# Check items within arrays
|
108
|
+
if "items" in schema_part and isinstance(schema_part["items"], dict):
|
109
|
+
self._clean_google_ai_schema_properties(schema_part["items"])
|
110
|
+
|
111
|
+
# Check within anyOf, allOf, oneOf lists
|
112
|
+
for key in ["anyOf", "allOf", "oneOf"]:
|
113
|
+
if key in schema_part and isinstance(schema_part[key], list):
|
114
|
+
for item_schema in schema_part[key]:
|
115
|
+
self._clean_google_ai_schema_properties(item_schema)
|
116
|
+
|
117
|
+
def convert_tools_to_google_ai_format(self, tools: List[Tool], llm_config: LLMConfig) -> List[dict]:
|
118
|
+
"""
|
119
|
+
OpenAI style:
|
120
|
+
"tools": [{
|
121
|
+
"type": "function",
|
122
|
+
"function": {
|
123
|
+
"name": "find_movies",
|
124
|
+
"description": "find ....",
|
125
|
+
"parameters": {
|
126
|
+
"type": "object",
|
127
|
+
"properties": {
|
128
|
+
PARAM: {
|
129
|
+
"type": PARAM_TYPE, # eg "string"
|
130
|
+
"description": PARAM_DESCRIPTION,
|
131
|
+
},
|
132
|
+
...
|
133
|
+
},
|
134
|
+
"required": List[str],
|
135
|
+
}
|
136
|
+
}
|
137
|
+
}
|
138
|
+
]
|
139
|
+
|
140
|
+
Google AI style:
|
141
|
+
"tools": [{
|
142
|
+
"functionDeclarations": [{
|
143
|
+
"name": "find_movies",
|
144
|
+
"description": "find movie titles currently playing in theaters based on any description, genre, title words, etc.",
|
145
|
+
"parameters": {
|
146
|
+
"type": "OBJECT",
|
147
|
+
"properties": {
|
148
|
+
"location": {
|
149
|
+
"type": "STRING",
|
150
|
+
"description": "The city and state, e.g. San Francisco, CA or a zip code e.g. 95616"
|
151
|
+
},
|
152
|
+
"description": {
|
153
|
+
"type": "STRING",
|
154
|
+
"description": "Any kind of description including category or genre, title words, attributes, etc."
|
155
|
+
}
|
156
|
+
},
|
157
|
+
"required": ["description"]
|
158
|
+
}
|
159
|
+
}, {
|
160
|
+
"name": "find_theaters",
|
161
|
+
...
|
162
|
+
"""
|
163
|
+
function_list = [
|
164
|
+
dict(
|
165
|
+
name=t.function.name,
|
166
|
+
description=t.function.description,
|
167
|
+
parameters=t.function.parameters, # TODO need to unpack
|
168
|
+
)
|
169
|
+
for t in tools
|
170
|
+
]
|
171
|
+
|
172
|
+
# Add inner thoughts if needed
|
173
|
+
for func in function_list:
|
174
|
+
# Note: Google AI API used to have weird casing requirements, but not any more
|
175
|
+
|
176
|
+
# Google AI API only supports a subset of OpenAPI 3.0, so unsupported params must be cleaned
|
177
|
+
if "parameters" in func and isinstance(func["parameters"], dict):
|
178
|
+
self._clean_google_ai_schema_properties(func["parameters"])
|
179
|
+
|
180
|
+
# Add inner thoughts
|
181
|
+
if llm_config.put_inner_thoughts_in_kwargs:
|
182
|
+
from letta.local_llm.constants import INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_VERTEX
|
183
|
+
|
184
|
+
func["parameters"]["properties"][INNER_THOUGHTS_KWARG_VERTEX] = {
|
185
|
+
"type": "string",
|
186
|
+
"description": INNER_THOUGHTS_KWARG_DESCRIPTION,
|
187
|
+
}
|
188
|
+
func["parameters"]["required"].append(INNER_THOUGHTS_KWARG_VERTEX)
|
189
|
+
|
190
|
+
return [{"functionDeclarations": function_list}]
|
191
|
+
|
59
192
|
def build_request_data(
|
60
193
|
self,
|
61
194
|
messages: List[PydanticMessage],
|
@@ -66,11 +199,29 @@ class GoogleVertexClient(GoogleAIClient):
|
|
66
199
|
"""
|
67
200
|
Constructs a request object in the expected data format for this client.
|
68
201
|
"""
|
69
|
-
request_data = super().build_request_data(messages, llm_config, tools, force_tool_call)
|
70
|
-
request_data["config"] = request_data.pop("generation_config")
|
71
|
-
request_data["config"]["tools"] = request_data.pop("tools")
|
72
202
|
|
73
|
-
|
203
|
+
if tools:
|
204
|
+
tool_objs = [Tool(type="function", function=t) for t in tools]
|
205
|
+
tool_names = [t.function.name for t in tool_objs]
|
206
|
+
# Convert to the exact payload style Google expects
|
207
|
+
formatted_tools = self.convert_tools_to_google_ai_format(tool_objs, llm_config)
|
208
|
+
else:
|
209
|
+
formatted_tools = []
|
210
|
+
tool_names = []
|
211
|
+
|
212
|
+
contents = self.add_dummy_model_messages(
|
213
|
+
[m.to_google_ai_dict() for m in messages],
|
214
|
+
)
|
215
|
+
|
216
|
+
request_data = {
|
217
|
+
"contents": contents,
|
218
|
+
"config": {
|
219
|
+
"temperature": llm_config.temperature,
|
220
|
+
"max_output_tokens": llm_config.max_tokens,
|
221
|
+
"tools": formatted_tools,
|
222
|
+
},
|
223
|
+
}
|
224
|
+
|
74
225
|
if len(tool_names) == 1 and settings.use_vertex_structured_outputs_experimental:
|
75
226
|
request_data["config"]["response_mime_type"] = "application/json"
|
76
227
|
request_data["config"]["response_schema"] = self.get_function_call_response_schema(tools[0])
|
@@ -89,11 +240,11 @@ class GoogleVertexClient(GoogleAIClient):
|
|
89
240
|
# Add thinking_config
|
90
241
|
# If enable_reasoner is False, set thinking_budget to 0
|
91
242
|
# Otherwise, use the value from max_reasoning_tokens
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
243
|
+
if llm_config.enable_reasoner:
|
244
|
+
thinking_config = ThinkingConfig(
|
245
|
+
thinking_budget=llm_config.max_reasoning_tokens,
|
246
|
+
)
|
247
|
+
request_data["config"]["thinking_config"] = thinking_config.model_dump()
|
97
248
|
|
98
249
|
return request_data
|
99
250
|
|
letta/llm_api/llm_api_tools.py
CHANGED
@@ -20,15 +20,19 @@ from letta.llm_api.openai import (
|
|
20
20
|
build_openai_chat_completions_request,
|
21
21
|
openai_chat_completions_process_stream,
|
22
22
|
openai_chat_completions_request,
|
23
|
+
prepare_openai_payload,
|
23
24
|
)
|
24
25
|
from letta.local_llm.chat_completion_proxy import get_chat_completion
|
25
26
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
26
27
|
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
28
|
+
from letta.orm.user import User
|
27
29
|
from letta.schemas.enums import ProviderCategory
|
28
30
|
from letta.schemas.llm_config import LLMConfig
|
29
31
|
from letta.schemas.message import Message
|
30
32
|
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, cast_message_to_subtype
|
31
33
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
34
|
+
from letta.schemas.provider_trace import ProviderTraceCreate
|
35
|
+
from letta.services.telemetry_manager import TelemetryManager
|
32
36
|
from letta.settings import ModelSettings
|
33
37
|
from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
|
34
38
|
from letta.tracing import log_event, trace_method
|
@@ -142,6 +146,9 @@ def create(
|
|
142
146
|
model_settings: Optional[dict] = None, # TODO: eventually pass from server
|
143
147
|
put_inner_thoughts_first: bool = True,
|
144
148
|
name: Optional[str] = None,
|
149
|
+
telemetry_manager: Optional[TelemetryManager] = None,
|
150
|
+
step_id: Optional[str] = None,
|
151
|
+
actor: Optional[User] = None,
|
145
152
|
) -> ChatCompletionResponse:
|
146
153
|
"""Return response to chat completion with backoff"""
|
147
154
|
from letta.utils import printd
|
@@ -233,6 +240,16 @@ def create(
|
|
233
240
|
if isinstance(stream_interface, AgentChunkStreamingInterface):
|
234
241
|
stream_interface.stream_end()
|
235
242
|
|
243
|
+
telemetry_manager.create_provider_trace(
|
244
|
+
actor=actor,
|
245
|
+
provider_trace_create=ProviderTraceCreate(
|
246
|
+
request_json=prepare_openai_payload(data),
|
247
|
+
response_json=response.model_json_schema(),
|
248
|
+
step_id=step_id,
|
249
|
+
organization_id=actor.organization_id,
|
250
|
+
),
|
251
|
+
)
|
252
|
+
|
236
253
|
if llm_config.put_inner_thoughts_in_kwargs:
|
237
254
|
response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
|
238
255
|
|
@@ -407,6 +424,16 @@ def create(
|
|
407
424
|
if llm_config.put_inner_thoughts_in_kwargs:
|
408
425
|
response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
|
409
426
|
|
427
|
+
telemetry_manager.create_provider_trace(
|
428
|
+
actor=actor,
|
429
|
+
provider_trace_create=ProviderTraceCreate(
|
430
|
+
request_json=chat_completion_request.model_json_schema(),
|
431
|
+
response_json=response.model_json_schema(),
|
432
|
+
step_id=step_id,
|
433
|
+
organization_id=actor.organization_id,
|
434
|
+
),
|
435
|
+
)
|
436
|
+
|
410
437
|
return response
|
411
438
|
|
412
439
|
# elif llm_config.model_endpoint_type == "cohere":
|
letta/llm_api/llm_client.py
CHANGED
letta/llm_api/llm_client_base.py
CHANGED
@@ -9,7 +9,9 @@ from letta.errors import LLMError
|
|
9
9
|
from letta.schemas.llm_config import LLMConfig
|
10
10
|
from letta.schemas.message import Message
|
11
11
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
12
|
-
from letta.
|
12
|
+
from letta.schemas.provider_trace import ProviderTraceCreate
|
13
|
+
from letta.services.telemetry_manager import TelemetryManager
|
14
|
+
from letta.tracing import log_event, trace_method
|
13
15
|
|
14
16
|
if TYPE_CHECKING:
|
15
17
|
from letta.orm import User
|
@@ -31,13 +33,15 @@ class LLMClientBase:
|
|
31
33
|
self.put_inner_thoughts_first = put_inner_thoughts_first
|
32
34
|
self.use_tool_naming = use_tool_naming
|
33
35
|
|
36
|
+
@trace_method
|
34
37
|
def send_llm_request(
|
35
38
|
self,
|
36
39
|
messages: List[Message],
|
37
40
|
llm_config: LLMConfig,
|
38
41
|
tools: Optional[List[dict]] = None, # TODO: change to Tool object
|
39
|
-
stream: bool = False,
|
40
42
|
force_tool_call: Optional[str] = None,
|
43
|
+
telemetry_manager: Optional["TelemetryManager"] = None,
|
44
|
+
step_id: Optional[str] = None,
|
41
45
|
) -> Union[ChatCompletionResponse, Stream[ChatCompletionChunk]]:
|
42
46
|
"""
|
43
47
|
Issues a request to the downstream model endpoint and parses response.
|
@@ -48,37 +52,51 @@ class LLMClientBase:
|
|
48
52
|
|
49
53
|
try:
|
50
54
|
log_event(name="llm_request_sent", attributes=request_data)
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
+
response_data = self.request(request_data, llm_config)
|
56
|
+
if step_id and telemetry_manager:
|
57
|
+
telemetry_manager.create_provider_trace(
|
58
|
+
actor=self.actor,
|
59
|
+
provider_trace_create=ProviderTraceCreate(
|
60
|
+
request_json=request_data,
|
61
|
+
response_json=response_data,
|
62
|
+
step_id=step_id,
|
63
|
+
organization_id=self.actor.organization_id,
|
64
|
+
),
|
65
|
+
)
|
55
66
|
log_event(name="llm_response_received", attributes=response_data)
|
56
67
|
except Exception as e:
|
57
68
|
raise self.handle_llm_error(e)
|
58
69
|
|
59
70
|
return self.convert_response_to_chat_completion(response_data, messages, llm_config)
|
60
71
|
|
72
|
+
@trace_method
|
61
73
|
async def send_llm_request_async(
|
62
74
|
self,
|
75
|
+
request_data: dict,
|
63
76
|
messages: List[Message],
|
64
77
|
llm_config: LLMConfig,
|
65
|
-
|
66
|
-
|
67
|
-
force_tool_call: Optional[str] = None,
|
78
|
+
telemetry_manager: "TelemetryManager | None" = None,
|
79
|
+
step_id: str | None = None,
|
68
80
|
) -> Union[ChatCompletionResponse, AsyncStream[ChatCompletionChunk]]:
|
69
81
|
"""
|
70
82
|
Issues a request to the downstream model endpoint.
|
71
83
|
If stream=True, returns an AsyncStream[ChatCompletionChunk] that can be async iterated over.
|
72
84
|
Otherwise returns a ChatCompletionResponse.
|
73
85
|
"""
|
74
|
-
request_data = self.build_request_data(messages, llm_config, tools, force_tool_call)
|
75
86
|
|
76
87
|
try:
|
77
88
|
log_event(name="llm_request_sent", attributes=request_data)
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
89
|
+
response_data = await self.request_async(request_data, llm_config)
|
90
|
+
await telemetry_manager.create_provider_trace_async(
|
91
|
+
actor=self.actor,
|
92
|
+
provider_trace_create=ProviderTraceCreate(
|
93
|
+
request_json=request_data,
|
94
|
+
response_json=response_data,
|
95
|
+
step_id=step_id,
|
96
|
+
organization_id=self.actor.organization_id,
|
97
|
+
),
|
98
|
+
)
|
99
|
+
|
82
100
|
log_event(name="llm_response_received", attributes=response_data)
|
83
101
|
except Exception as e:
|
84
102
|
raise self.handle_llm_error(e)
|
@@ -133,13 +151,6 @@ class LLMClientBase:
|
|
133
151
|
"""
|
134
152
|
raise NotImplementedError
|
135
153
|
|
136
|
-
@abstractmethod
|
137
|
-
def stream(self, request_data: dict, llm_config: LLMConfig) -> Stream[ChatCompletionChunk]:
|
138
|
-
"""
|
139
|
-
Performs underlying streaming request to llm and returns raw response.
|
140
|
-
"""
|
141
|
-
raise NotImplementedError(f"Streaming is not supported for {llm_config.model_endpoint_type}")
|
142
|
-
|
143
154
|
@abstractmethod
|
144
155
|
async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[ChatCompletionChunk]:
|
145
156
|
"""
|
letta/llm_api/openai.py
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
import warnings
|
2
2
|
from typing import Generator, List, Optional, Union
|
3
3
|
|
4
|
+
import httpx
|
4
5
|
import requests
|
5
6
|
from openai import OpenAI
|
6
7
|
|
@@ -110,6 +111,62 @@ def openai_get_model_list(url: str, api_key: Optional[str] = None, fix_url: bool
|
|
110
111
|
raise e
|
111
112
|
|
112
113
|
|
114
|
+
async def openai_get_model_list_async(
|
115
|
+
url: str,
|
116
|
+
api_key: Optional[str] = None,
|
117
|
+
fix_url: bool = False,
|
118
|
+
extra_params: Optional[dict] = None,
|
119
|
+
client: Optional["httpx.AsyncClient"] = None,
|
120
|
+
) -> dict:
|
121
|
+
"""https://platform.openai.com/docs/api-reference/models/list"""
|
122
|
+
from letta.utils import printd
|
123
|
+
|
124
|
+
# In some cases we may want to double-check the URL and do basic correction
|
125
|
+
if fix_url and not url.endswith("/v1"):
|
126
|
+
url = smart_urljoin(url, "v1")
|
127
|
+
|
128
|
+
url = smart_urljoin(url, "models")
|
129
|
+
|
130
|
+
headers = {"Content-Type": "application/json"}
|
131
|
+
if api_key is not None:
|
132
|
+
headers["Authorization"] = f"Bearer {api_key}"
|
133
|
+
|
134
|
+
printd(f"Sending request to {url}")
|
135
|
+
|
136
|
+
# Use provided client or create a new one
|
137
|
+
close_client = False
|
138
|
+
if client is None:
|
139
|
+
client = httpx.AsyncClient()
|
140
|
+
close_client = True
|
141
|
+
|
142
|
+
try:
|
143
|
+
response = await client.get(url, headers=headers, params=extra_params)
|
144
|
+
response.raise_for_status()
|
145
|
+
result = response.json()
|
146
|
+
printd(f"response = {result}")
|
147
|
+
return result
|
148
|
+
except httpx.HTTPStatusError as http_err:
|
149
|
+
# Handle HTTP errors (e.g., response 4XX, 5XX)
|
150
|
+
error_response = None
|
151
|
+
try:
|
152
|
+
error_response = http_err.response.json()
|
153
|
+
except:
|
154
|
+
error_response = {"status_code": http_err.response.status_code, "text": http_err.response.text}
|
155
|
+
printd(f"Got HTTPError, exception={http_err}, response={error_response}")
|
156
|
+
raise http_err
|
157
|
+
except httpx.RequestError as req_err:
|
158
|
+
# Handle other httpx-related errors (e.g., connection error)
|
159
|
+
printd(f"Got RequestException, exception={req_err}")
|
160
|
+
raise req_err
|
161
|
+
except Exception as e:
|
162
|
+
# Handle other potential errors
|
163
|
+
printd(f"Got unknown Exception, exception={e}")
|
164
|
+
raise e
|
165
|
+
finally:
|
166
|
+
if close_client:
|
167
|
+
await client.aclose()
|
168
|
+
|
169
|
+
|
113
170
|
def build_openai_chat_completions_request(
|
114
171
|
llm_config: LLMConfig,
|
115
172
|
messages: List[_Message],
|
letta/llm_api/openai_client.py
CHANGED
@@ -2,7 +2,7 @@ import os
|
|
2
2
|
from typing import List, Optional
|
3
3
|
|
4
4
|
import openai
|
5
|
-
from openai import AsyncOpenAI, AsyncStream, OpenAI
|
5
|
+
from openai import AsyncOpenAI, AsyncStream, OpenAI
|
6
6
|
from openai.types.chat.chat_completion import ChatCompletion
|
7
7
|
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
8
8
|
|
@@ -22,7 +22,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_st
|
|
22
22
|
from letta.llm_api.llm_client_base import LLMClientBase
|
23
23
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
|
24
24
|
from letta.log import get_logger
|
25
|
-
from letta.schemas.enums import ProviderCategory
|
25
|
+
from letta.schemas.enums import ProviderCategory, ProviderType
|
26
26
|
from letta.schemas.llm_config import LLMConfig
|
27
27
|
from letta.schemas.message import Message as PydanticMessage
|
28
28
|
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
|
@@ -113,6 +113,8 @@ class OpenAIClient(LLMClientBase):
|
|
113
113
|
from letta.services.provider_manager import ProviderManager
|
114
114
|
|
115
115
|
api_key = ProviderManager().get_override_key(llm_config.provider_name, actor=self.actor)
|
116
|
+
if llm_config.model_endpoint_type == ProviderType.together:
|
117
|
+
api_key = model_settings.together_api_key or os.environ.get("TOGETHER_API_KEY")
|
116
118
|
|
117
119
|
if not api_key:
|
118
120
|
api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
|
@@ -254,20 +256,14 @@ class OpenAIClient(LLMClientBase):
|
|
254
256
|
|
255
257
|
return chat_completion_response
|
256
258
|
|
257
|
-
def stream(self, request_data: dict, llm_config: LLMConfig) -> Stream[ChatCompletionChunk]:
|
258
|
-
"""
|
259
|
-
Performs underlying streaming request to OpenAI and returns the stream iterator.
|
260
|
-
"""
|
261
|
-
client = OpenAI(**self._prepare_client_kwargs(llm_config))
|
262
|
-
response_stream: Stream[ChatCompletionChunk] = client.chat.completions.create(**request_data, stream=True)
|
263
|
-
return response_stream
|
264
|
-
|
265
259
|
async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[ChatCompletionChunk]:
|
266
260
|
"""
|
267
261
|
Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator.
|
268
262
|
"""
|
269
263
|
client = AsyncOpenAI(**self._prepare_client_kwargs(llm_config))
|
270
|
-
response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
|
264
|
+
response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(
|
265
|
+
**request_data, stream=True, stream_options={"include_usage": True}
|
266
|
+
)
|
271
267
|
return response_stream
|
272
268
|
|
273
269
|
def handle_llm_error(self, e: Exception) -> Exception:
|
letta/memory.py
CHANGED
letta/orm/__init__.py
CHANGED
@@ -19,6 +19,7 @@ from letta.orm.message import Message
|
|
19
19
|
from letta.orm.organization import Organization
|
20
20
|
from letta.orm.passage import AgentPassage, BasePassage, SourcePassage
|
21
21
|
from letta.orm.provider import Provider
|
22
|
+
from letta.orm.provider_trace import ProviderTrace
|
22
23
|
from letta.orm.sandbox_config import AgentEnvironmentVariable, SandboxConfig, SandboxEnvironmentVariable
|
23
24
|
from letta.orm.source import Source
|
24
25
|
from letta.orm.sources_agents import SourcesAgents
|
letta/orm/enums.py
CHANGED
@@ -8,6 +8,7 @@ class ToolType(str, Enum):
|
|
8
8
|
LETTA_MULTI_AGENT_CORE = "letta_multi_agent_core"
|
9
9
|
LETTA_SLEEPTIME_CORE = "letta_sleeptime_core"
|
10
10
|
LETTA_VOICE_SLEEPTIME_CORE = "letta_voice_sleeptime_core"
|
11
|
+
LETTA_BUILTIN = "letta_builtin"
|
11
12
|
EXTERNAL_COMPOSIO = "external_composio"
|
12
13
|
EXTERNAL_LANGCHAIN = "external_langchain"
|
13
14
|
# TODO is "external" the right name here? Since as of now, MCP is local / doesn't support remote?
|
@@ -0,0 +1,26 @@
|
|
1
|
+
import uuid
|
2
|
+
|
3
|
+
from sqlalchemy import JSON, Index, String
|
4
|
+
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
5
|
+
|
6
|
+
from letta.orm.mixins import OrganizationMixin
|
7
|
+
from letta.orm.sqlalchemy_base import SqlalchemyBase
|
8
|
+
from letta.schemas.provider_trace import ProviderTrace as PydanticProviderTrace
|
9
|
+
|
10
|
+
|
11
|
+
class ProviderTrace(SqlalchemyBase, OrganizationMixin):
|
12
|
+
"""Defines data model for storing provider trace information"""
|
13
|
+
|
14
|
+
__tablename__ = "provider_traces"
|
15
|
+
__pydantic_model__ = PydanticProviderTrace
|
16
|
+
__table_args__ = (Index("ix_step_id", "step_id"),)
|
17
|
+
|
18
|
+
id: Mapped[str] = mapped_column(
|
19
|
+
primary_key=True, doc="Unique provider trace identifier", default=lambda: f"provider_trace-{uuid.uuid4()}"
|
20
|
+
)
|
21
|
+
request_json: Mapped[dict] = mapped_column(JSON, doc="JSON content of the provider request")
|
22
|
+
response_json: Mapped[dict] = mapped_column(JSON, doc="JSON content of the provider response")
|
23
|
+
step_id: Mapped[str] = mapped_column(String, nullable=True, doc="ID of the step that this trace is associated with")
|
24
|
+
|
25
|
+
# Relationships
|
26
|
+
organization: Mapped["Organization"] = relationship("Organization", lazy="selectin")
|
letta/orm/step.py
CHANGED
@@ -35,6 +35,7 @@ class Step(SqlalchemyBase):
|
|
35
35
|
)
|
36
36
|
agent_id: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the model used for this step.")
|
37
37
|
provider_name: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the provider used for this step.")
|
38
|
+
provider_category: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The category of the provider used for this step.")
|
38
39
|
model: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the model used for this step.")
|
39
40
|
model_endpoint: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The model endpoint url used for this step.")
|
40
41
|
context_window_limit: Mapped[Optional[int]] = mapped_column(
|
@@ -0,0 +1,43 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from datetime import datetime
|
4
|
+
from typing import Any, Dict, Optional
|
5
|
+
|
6
|
+
from pydantic import BaseModel, Field
|
7
|
+
|
8
|
+
from letta.helpers.datetime_helpers import get_utc_time
|
9
|
+
from letta.schemas.letta_base import OrmMetadataBase
|
10
|
+
|
11
|
+
|
12
|
+
class BaseProviderTrace(OrmMetadataBase):
|
13
|
+
__id_prefix__ = "provider_trace"
|
14
|
+
|
15
|
+
|
16
|
+
class ProviderTraceCreate(BaseModel):
|
17
|
+
"""Request to create a provider trace"""
|
18
|
+
|
19
|
+
request_json: dict[str, Any] = Field(..., description="JSON content of the provider request")
|
20
|
+
response_json: dict[str, Any] = Field(..., description="JSON content of the provider response")
|
21
|
+
step_id: str = Field(None, description="ID of the step that this trace is associated with")
|
22
|
+
organization_id: str = Field(..., description="The unique identifier of the organization.")
|
23
|
+
|
24
|
+
|
25
|
+
class ProviderTrace(BaseProviderTrace):
|
26
|
+
"""
|
27
|
+
Letta's internal representation of a provider trace.
|
28
|
+
|
29
|
+
Attributes:
|
30
|
+
id (str): The unique identifier of the provider trace.
|
31
|
+
request_json (Dict[str, Any]): JSON content of the provider request.
|
32
|
+
response_json (Dict[str, Any]): JSON content of the provider response.
|
33
|
+
step_id (str): ID of the step that this trace is associated with.
|
34
|
+
organization_id (str): The unique identifier of the organization.
|
35
|
+
created_at (datetime): The timestamp when the object was created.
|
36
|
+
"""
|
37
|
+
|
38
|
+
id: str = BaseProviderTrace.generate_id_field()
|
39
|
+
request_json: Dict[str, Any] = Field(..., description="JSON content of the provider request")
|
40
|
+
response_json: Dict[str, Any] = Field(..., description="JSON content of the provider response")
|
41
|
+
step_id: Optional[str] = Field(None, description="ID of the step that this trace is associated with")
|
42
|
+
organization_id: str = Field(..., description="The unique identifier of the organization.")
|
43
|
+
created_at: datetime = Field(default_factory=get_utc_time, description="The timestamp when the object was created.")
|