letta-nightly 0.6.32.dev20250224104119__py3-none-any.whl → 0.6.33.dev20250226015402__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -1
- letta/agent.py +10 -5
- letta/constants.py +7 -0
- letta/functions/ast_parsers.py +13 -0
- letta/functions/schema_generator.py +14 -2
- letta/helpers/composio_helpers.py +3 -2
- letta/helpers/tool_execution_helper.py +171 -0
- letta/llm_api/anthropic.py +37 -1
- letta/orm/source.py +2 -2
- letta/schemas/openai/chat_completion_request.py +1 -1
- letta/schemas/providers.py +43 -4
- letta/schemas/tool.py +1 -13
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +225 -35
- letta/server/rest_api/routers/v1/agents.py +57 -55
- letta/server/rest_api/routers/v1/blocks.py +12 -12
- letta/server/rest_api/routers/v1/identities.py +12 -12
- letta/server/rest_api/routers/v1/jobs.py +8 -8
- letta/server/rest_api/routers/v1/providers.py +11 -5
- letta/server/rest_api/routers/v1/runs.py +12 -12
- letta/server/rest_api/routers/v1/sandbox_configs.py +24 -24
- letta/server/rest_api/routers/v1/sources.py +20 -20
- letta/server/rest_api/routers/v1/steps.py +8 -7
- letta/server/rest_api/routers/v1/tags.py +2 -2
- letta/server/rest_api/routers/v1/tools.py +20 -20
- letta/server/rest_api/utils.py +2 -3
- letta/services/agent_manager.py +36 -0
- letta/services/block_manager.py +4 -2
- letta/services/identity_manager.py +6 -0
- letta/services/message_manager.py +4 -1
- letta/services/provider_manager.py +10 -9
- letta/services/step_manager.py +2 -2
- {letta_nightly-0.6.32.dev20250224104119.dist-info → letta_nightly-0.6.33.dev20250226015402.dist-info}/METADATA +1 -1
- {letta_nightly-0.6.32.dev20250224104119.dist-info → letta_nightly-0.6.33.dev20250226015402.dist-info}/RECORD +36 -35
- {letta_nightly-0.6.32.dev20250224104119.dist-info → letta_nightly-0.6.33.dev20250226015402.dist-info}/LICENSE +0 -0
- {letta_nightly-0.6.32.dev20250224104119.dist-info → letta_nightly-0.6.33.dev20250226015402.dist-info}/WHEEL +0 -0
- {letta_nightly-0.6.32.dev20250224104119.dist-info → letta_nightly-0.6.33.dev20250226015402.dist-info}/entry_points.txt +0 -0
|
@@ -1,19 +1,39 @@
|
|
|
1
1
|
import asyncio
|
|
2
|
+
import json
|
|
3
|
+
import uuid
|
|
2
4
|
from typing import TYPE_CHECKING, List, Optional, Union
|
|
3
5
|
|
|
4
6
|
import httpx
|
|
5
7
|
import openai
|
|
6
8
|
from fastapi import APIRouter, Body, Depends, Header, HTTPException
|
|
7
9
|
from fastapi.responses import StreamingResponse
|
|
10
|
+
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk, Choice, ChoiceDelta
|
|
8
11
|
from openai.types.chat.completion_create_params import CompletionCreateParams
|
|
9
12
|
from starlette.concurrency import run_in_threadpool
|
|
10
13
|
|
|
11
14
|
from letta.agent import Agent
|
|
12
|
-
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
|
15
|
+
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, LETTA_TOOL_SET, NON_USER_MSG_PREFIX, PRE_EXECUTION_MESSAGE_ARG
|
|
16
|
+
from letta.helpers.tool_execution_helper import (
|
|
17
|
+
add_pre_execution_message,
|
|
18
|
+
enable_strict_mode,
|
|
19
|
+
execute_external_tool,
|
|
20
|
+
remove_request_heartbeat,
|
|
21
|
+
)
|
|
13
22
|
from letta.log import get_logger
|
|
23
|
+
from letta.orm.enums import ToolType
|
|
14
24
|
from letta.schemas.message import Message, MessageCreate
|
|
25
|
+
from letta.schemas.openai.chat_completion_request import (
|
|
26
|
+
AssistantMessage,
|
|
27
|
+
ChatCompletionRequest,
|
|
28
|
+
Tool,
|
|
29
|
+
ToolCall,
|
|
30
|
+
ToolCallFunction,
|
|
31
|
+
ToolMessage,
|
|
32
|
+
UserMessage,
|
|
33
|
+
)
|
|
15
34
|
from letta.schemas.user import User
|
|
16
35
|
from letta.server.rest_api.chat_completions_interface import ChatCompletionsStreamingInterface
|
|
36
|
+
from letta.server.rest_api.optimistic_json_parser import OptimisticJSONParser
|
|
17
37
|
|
|
18
38
|
# TODO this belongs in a controller!
|
|
19
39
|
from letta.server.rest_api.utils import (
|
|
@@ -52,20 +72,53 @@ async def create_fast_chat_completions(
|
|
|
52
72
|
server: "SyncServer" = Depends(get_letta_server),
|
|
53
73
|
user_id: Optional[str] = Header(None, alias="user_id"),
|
|
54
74
|
):
|
|
55
|
-
|
|
75
|
+
actor = server.user_manager.get_user_or_default(user_id=user_id)
|
|
76
|
+
|
|
56
77
|
agent_id = str(completion_request.get("user", None))
|
|
57
78
|
if agent_id is None:
|
|
58
|
-
|
|
59
|
-
logger.error(error_msg)
|
|
60
|
-
raise HTTPException(status_code=400, detail=error_msg)
|
|
61
|
-
model = completion_request.get("model")
|
|
79
|
+
raise HTTPException(status_code=400, detail="Must pass agent_id in the 'user' field")
|
|
62
80
|
|
|
63
|
-
|
|
81
|
+
agent_state = server.agent_manager.get_agent_by_id(agent_id=agent_id, actor=actor)
|
|
82
|
+
if agent_state.llm_config.model_endpoint_type != "openai":
|
|
83
|
+
raise HTTPException(status_code=400, detail="Only OpenAI models are supported by this endpoint.")
|
|
84
|
+
|
|
85
|
+
# Convert Letta messages to OpenAI messages
|
|
86
|
+
in_context_messages = server.message_manager.get_messages_by_ids(message_ids=agent_state.message_ids, actor=actor)
|
|
87
|
+
openai_messages = convert_letta_messages_to_openai(in_context_messages)
|
|
88
|
+
|
|
89
|
+
# Also parse user input from completion_request and append
|
|
90
|
+
input_message = get_messages_from_completion_request(completion_request)[-1]
|
|
91
|
+
openai_messages.append(input_message)
|
|
92
|
+
|
|
93
|
+
# Tools we allow this agent to call
|
|
94
|
+
tools = [t for t in agent_state.tools if t.name not in LETTA_TOOL_SET and t.tool_type in {ToolType.EXTERNAL_COMPOSIO, ToolType.CUSTOM}]
|
|
95
|
+
|
|
96
|
+
# Initial request
|
|
97
|
+
openai_request = ChatCompletionRequest(
|
|
98
|
+
model=agent_state.llm_config.model,
|
|
99
|
+
messages=openai_messages,
|
|
100
|
+
# TODO: This nested thing here is so ugly, need to refactor
|
|
101
|
+
tools=(
|
|
102
|
+
[
|
|
103
|
+
Tool(type="function", function=enable_strict_mode(add_pre_execution_message(remove_request_heartbeat(t.json_schema))))
|
|
104
|
+
for t in tools
|
|
105
|
+
]
|
|
106
|
+
if tools
|
|
107
|
+
else None
|
|
108
|
+
),
|
|
109
|
+
tool_choice="auto",
|
|
110
|
+
user=user_id,
|
|
111
|
+
max_completion_tokens=agent_state.llm_config.max_tokens,
|
|
112
|
+
temperature=agent_state.llm_config.temperature,
|
|
113
|
+
stream=True,
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
# Create the OpenAI async client
|
|
64
117
|
client = openai.AsyncClient(
|
|
65
118
|
api_key=model_settings.openai_api_key,
|
|
66
119
|
max_retries=0,
|
|
67
120
|
http_client=httpx.AsyncClient(
|
|
68
|
-
timeout=httpx.Timeout(connect=15.0, read=
|
|
121
|
+
timeout=httpx.Timeout(connect=15.0, read=30.0, write=15.0, pool=15.0),
|
|
69
122
|
follow_redirects=True,
|
|
70
123
|
limits=httpx.Limits(
|
|
71
124
|
max_connections=50,
|
|
@@ -75,38 +128,175 @@ async def create_fast_chat_completions(
|
|
|
75
128
|
),
|
|
76
129
|
)
|
|
77
130
|
|
|
78
|
-
#
|
|
79
|
-
input_message =
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
# Get in context messages
|
|
83
|
-
in_context_messages = server.agent_manager.get_in_context_messages(agent_id=agent_id, actor=actor)
|
|
84
|
-
openai_dict_in_context_messages = convert_letta_messages_to_openai(in_context_messages)
|
|
85
|
-
openai_dict_in_context_messages.append(input_message)
|
|
131
|
+
# The messages we want to persist to the Letta agent
|
|
132
|
+
user_message = create_user_message(input_message=input_message, agent_id=agent_id, actor=actor)
|
|
133
|
+
message_db_queue = [user_message]
|
|
86
134
|
|
|
87
135
|
async def event_stream():
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
136
|
+
"""
|
|
137
|
+
A function-calling loop:
|
|
138
|
+
- We stream partial tokens.
|
|
139
|
+
- If we detect a tool call (finish_reason="tool_calls"), we parse it,
|
|
140
|
+
add two messages to the conversation:
|
|
141
|
+
(a) assistant message with tool_calls referencing the same ID
|
|
142
|
+
(b) a tool message referencing that ID, containing the tool result.
|
|
143
|
+
- Re-invoke the OpenAI request with updated conversation, streaming again.
|
|
144
|
+
- End when finish_reason="stop" or no more tool calls.
|
|
145
|
+
"""
|
|
146
|
+
|
|
147
|
+
# We'll keep updating this conversation in a loop
|
|
148
|
+
conversation = openai_messages[:]
|
|
149
|
+
|
|
150
|
+
while True:
|
|
151
|
+
# Make the streaming request to OpenAI
|
|
152
|
+
stream = await client.chat.completions.create(**openai_request.model_dump(exclude_unset=True))
|
|
153
|
+
|
|
154
|
+
content_buffer = []
|
|
155
|
+
tool_call_name = None
|
|
156
|
+
tool_call_args_str = ""
|
|
157
|
+
tool_call_id = None
|
|
158
|
+
tool_call_happened = False
|
|
159
|
+
finish_reason_stop = False
|
|
160
|
+
optimistic_json_parser = OptimisticJSONParser(strict=True)
|
|
161
|
+
current_parsed_json_result = {}
|
|
162
|
+
|
|
163
|
+
async with stream:
|
|
164
|
+
async for chunk in stream:
|
|
165
|
+
choice = chunk.choices[0]
|
|
166
|
+
delta = choice.delta
|
|
167
|
+
finish_reason = choice.finish_reason # "tool_calls", "stop", or None
|
|
168
|
+
|
|
169
|
+
if delta.content:
|
|
170
|
+
content_buffer.append(delta.content)
|
|
171
|
+
yield f"data: {chunk.model_dump_json()}\n\n"
|
|
172
|
+
|
|
173
|
+
# CASE B: Partial tool call info
|
|
174
|
+
if delta.tool_calls:
|
|
175
|
+
# Typically there's only one in delta.tool_calls
|
|
176
|
+
tc = delta.tool_calls[0]
|
|
177
|
+
if tc.function.name:
|
|
178
|
+
tool_call_name = tc.function.name
|
|
179
|
+
if tc.function.arguments:
|
|
180
|
+
tool_call_args_str += tc.function.arguments
|
|
181
|
+
|
|
182
|
+
# See if we can stream out the pre-execution message
|
|
183
|
+
parsed_args = optimistic_json_parser.parse(tool_call_args_str)
|
|
184
|
+
if parsed_args.get(
|
|
185
|
+
PRE_EXECUTION_MESSAGE_ARG
|
|
186
|
+
) and current_parsed_json_result.get( # Ensure key exists and is not None/empty
|
|
187
|
+
PRE_EXECUTION_MESSAGE_ARG
|
|
188
|
+
) != parsed_args.get(
|
|
189
|
+
PRE_EXECUTION_MESSAGE_ARG
|
|
190
|
+
):
|
|
191
|
+
# Only stream if there's something new to stream
|
|
192
|
+
# We do this way to avoid hanging JSON at the end of the stream, e.g. '}'
|
|
193
|
+
if parsed_args != current_parsed_json_result:
|
|
194
|
+
current_parsed_json_result = parsed_args
|
|
195
|
+
synthetic_chunk = ChatCompletionChunk(
|
|
196
|
+
id=chunk.id,
|
|
197
|
+
object=chunk.object,
|
|
198
|
+
created=chunk.created,
|
|
199
|
+
model=chunk.model,
|
|
200
|
+
choices=[
|
|
201
|
+
Choice(
|
|
202
|
+
index=choice.index,
|
|
203
|
+
delta=ChoiceDelta(content=tc.function.arguments, role="assistant"),
|
|
204
|
+
finish_reason=None,
|
|
205
|
+
)
|
|
206
|
+
],
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
yield f"data: {synthetic_chunk.model_dump_json()}\n\n"
|
|
210
|
+
|
|
211
|
+
# We might generate a unique ID for the tool call
|
|
212
|
+
if tc.id:
|
|
213
|
+
tool_call_id = tc.id
|
|
214
|
+
|
|
215
|
+
# Check finish_reason
|
|
216
|
+
if finish_reason == "tool_calls":
|
|
217
|
+
tool_call_happened = True
|
|
218
|
+
break
|
|
219
|
+
elif finish_reason == "stop":
|
|
220
|
+
finish_reason_stop = True
|
|
221
|
+
break
|
|
222
|
+
|
|
223
|
+
if content_buffer:
|
|
224
|
+
# We treat that partial text as an assistant message
|
|
225
|
+
content = "".join(content_buffer)
|
|
226
|
+
conversation.append({"role": "assistant", "content": content})
|
|
227
|
+
|
|
228
|
+
# Create an assistant message here to persist later
|
|
229
|
+
assistant_message = create_assistant_message_from_openai_response(
|
|
230
|
+
response_text=content, agent_id=agent_id, model=agent_state.llm_config.model, actor=actor
|
|
231
|
+
)
|
|
232
|
+
message_db_queue.append(assistant_message)
|
|
233
|
+
|
|
234
|
+
if tool_call_happened:
|
|
235
|
+
# Parse the tool call arguments
|
|
236
|
+
try:
|
|
237
|
+
tool_args = json.loads(tool_call_args_str)
|
|
238
|
+
except json.JSONDecodeError:
|
|
239
|
+
tool_args = {}
|
|
240
|
+
|
|
241
|
+
if not tool_call_id:
|
|
242
|
+
# If no tool_call_id given by the model, generate one
|
|
243
|
+
tool_call_id = f"call_{uuid.uuid4().hex[:8]}"
|
|
244
|
+
|
|
245
|
+
# 1) Insert the "assistant" message with the tool_calls field
|
|
246
|
+
# referencing the same tool_call_id
|
|
247
|
+
assistant_tool_call_msg = AssistantMessage(
|
|
248
|
+
content=None,
|
|
249
|
+
tool_calls=[ToolCall(id=tool_call_id, function=ToolCallFunction(name=tool_call_name, arguments=tool_call_args_str))],
|
|
250
|
+
)
|
|
251
|
+
|
|
252
|
+
conversation.append(assistant_tool_call_msg.model_dump())
|
|
253
|
+
|
|
254
|
+
# 2) Execute the tool
|
|
255
|
+
target_tool = next((x for x in tools if x.name == tool_call_name), None)
|
|
256
|
+
if not target_tool:
|
|
257
|
+
# Tool not found, handle error
|
|
258
|
+
yield f"data: {json.dumps({'error': 'Tool not found', 'tool': tool_call_name})}\n\n"
|
|
259
|
+
break
|
|
260
|
+
|
|
261
|
+
try:
|
|
262
|
+
tool_result, _ = execute_external_tool(
|
|
263
|
+
agent_state=agent_state,
|
|
264
|
+
function_name=tool_call_name,
|
|
265
|
+
function_args=tool_args,
|
|
266
|
+
target_letta_tool=target_tool,
|
|
267
|
+
actor=actor,
|
|
268
|
+
allow_agent_state_modifications=False,
|
|
269
|
+
)
|
|
270
|
+
except Exception as e:
|
|
271
|
+
tool_result = f"Failed to call tool. Error: {e}"
|
|
272
|
+
|
|
273
|
+
# 3) Insert the "tool" message referencing the same tool_call_id
|
|
274
|
+
tool_message = ToolMessage(content=json.dumps({"result": tool_result}), tool_call_id=tool_call_id)
|
|
275
|
+
|
|
276
|
+
conversation.append(tool_message.model_dump())
|
|
277
|
+
|
|
278
|
+
# 4) Add a user message prompting the tool call result summarization
|
|
279
|
+
heartbeat_user_message = UserMessage(
|
|
280
|
+
content=f"{NON_USER_MSG_PREFIX} Tool finished executing. Summarize the result for the user.",
|
|
281
|
+
)
|
|
282
|
+
conversation.append(heartbeat_user_message.model_dump())
|
|
283
|
+
|
|
284
|
+
# Now, re-invoke OpenAI with the updated conversation
|
|
285
|
+
openai_request.messages = conversation
|
|
286
|
+
|
|
287
|
+
continue # Start the while loop again
|
|
288
|
+
|
|
289
|
+
if finish_reason_stop:
|
|
290
|
+
# Model is done, no more calls
|
|
291
|
+
break
|
|
292
|
+
|
|
293
|
+
# If we reach here, no tool call, no "stop", but we've ended streaming
|
|
294
|
+
# Possibly a model error or some other finish reason. We'll just end.
|
|
295
|
+
break
|
|
105
296
|
|
|
106
|
-
# Persist both in one synchronous DB call, done in a threadpool
|
|
107
297
|
await run_in_threadpool(
|
|
108
298
|
server.agent_manager.append_to_in_context_messages,
|
|
109
|
-
|
|
299
|
+
message_db_queue,
|
|
110
300
|
agent_id=agent_id,
|
|
111
301
|
actor=actor,
|
|
112
302
|
)
|