letta-nightly 0.6.32.dev20250225104119__py3-none-any.whl → 0.6.33.dev20250226104113__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (36) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +10 -5
  3. letta/constants.py +7 -0
  4. letta/functions/ast_parsers.py +13 -0
  5. letta/functions/schema_generator.py +14 -2
  6. letta/helpers/composio_helpers.py +3 -2
  7. letta/helpers/tool_execution_helper.py +171 -0
  8. letta/llm_api/anthropic.py +37 -1
  9. letta/orm/source.py +2 -2
  10. letta/schemas/openai/chat_completion_request.py +1 -1
  11. letta/schemas/providers.py +43 -4
  12. letta/schemas/tool.py +1 -13
  13. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +225 -35
  14. letta/server/rest_api/routers/v1/agents.py +57 -55
  15. letta/server/rest_api/routers/v1/blocks.py +12 -12
  16. letta/server/rest_api/routers/v1/identities.py +12 -12
  17. letta/server/rest_api/routers/v1/jobs.py +8 -8
  18. letta/server/rest_api/routers/v1/providers.py +11 -5
  19. letta/server/rest_api/routers/v1/runs.py +12 -12
  20. letta/server/rest_api/routers/v1/sandbox_configs.py +24 -24
  21. letta/server/rest_api/routers/v1/sources.py +20 -20
  22. letta/server/rest_api/routers/v1/steps.py +8 -7
  23. letta/server/rest_api/routers/v1/tags.py +2 -2
  24. letta/server/rest_api/routers/v1/tools.py +20 -20
  25. letta/server/rest_api/utils.py +2 -3
  26. letta/services/agent_manager.py +36 -0
  27. letta/services/block_manager.py +4 -2
  28. letta/services/identity_manager.py +6 -0
  29. letta/services/message_manager.py +4 -1
  30. letta/services/provider_manager.py +10 -9
  31. letta/services/step_manager.py +2 -2
  32. {letta_nightly-0.6.32.dev20250225104119.dist-info → letta_nightly-0.6.33.dev20250226104113.dist-info}/METADATA +1 -1
  33. {letta_nightly-0.6.32.dev20250225104119.dist-info → letta_nightly-0.6.33.dev20250226104113.dist-info}/RECORD +36 -35
  34. {letta_nightly-0.6.32.dev20250225104119.dist-info → letta_nightly-0.6.33.dev20250226104113.dist-info}/LICENSE +0 -0
  35. {letta_nightly-0.6.32.dev20250225104119.dist-info → letta_nightly-0.6.33.dev20250226104113.dist-info}/WHEEL +0 -0
  36. {letta_nightly-0.6.32.dev20250225104119.dist-info → letta_nightly-0.6.33.dev20250226104113.dist-info}/entry_points.txt +0 -0
@@ -1,19 +1,39 @@
1
1
  import asyncio
2
+ import json
3
+ import uuid
2
4
  from typing import TYPE_CHECKING, List, Optional, Union
3
5
 
4
6
  import httpx
5
7
  import openai
6
8
  from fastapi import APIRouter, Body, Depends, Header, HTTPException
7
9
  from fastapi.responses import StreamingResponse
10
+ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk, Choice, ChoiceDelta
8
11
  from openai.types.chat.completion_create_params import CompletionCreateParams
9
12
  from starlette.concurrency import run_in_threadpool
10
13
 
11
14
  from letta.agent import Agent
12
- from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
15
+ from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, LETTA_TOOL_SET, NON_USER_MSG_PREFIX, PRE_EXECUTION_MESSAGE_ARG
16
+ from letta.helpers.tool_execution_helper import (
17
+ add_pre_execution_message,
18
+ enable_strict_mode,
19
+ execute_external_tool,
20
+ remove_request_heartbeat,
21
+ )
13
22
  from letta.log import get_logger
23
+ from letta.orm.enums import ToolType
14
24
  from letta.schemas.message import Message, MessageCreate
25
+ from letta.schemas.openai.chat_completion_request import (
26
+ AssistantMessage,
27
+ ChatCompletionRequest,
28
+ Tool,
29
+ ToolCall,
30
+ ToolCallFunction,
31
+ ToolMessage,
32
+ UserMessage,
33
+ )
15
34
  from letta.schemas.user import User
16
35
  from letta.server.rest_api.chat_completions_interface import ChatCompletionsStreamingInterface
36
+ from letta.server.rest_api.optimistic_json_parser import OptimisticJSONParser
17
37
 
18
38
  # TODO this belongs in a controller!
19
39
  from letta.server.rest_api.utils import (
@@ -52,20 +72,53 @@ async def create_fast_chat_completions(
52
72
  server: "SyncServer" = Depends(get_letta_server),
53
73
  user_id: Optional[str] = Header(None, alias="user_id"),
54
74
  ):
55
- # TODO: This is necessary, we need to factor out CompletionCreateParams due to weird behavior
75
+ actor = server.user_manager.get_user_or_default(user_id=user_id)
76
+
56
77
  agent_id = str(completion_request.get("user", None))
57
78
  if agent_id is None:
58
- error_msg = "Must pass agent_id in the 'user' field"
59
- logger.error(error_msg)
60
- raise HTTPException(status_code=400, detail=error_msg)
61
- model = completion_request.get("model")
79
+ raise HTTPException(status_code=400, detail="Must pass agent_id in the 'user' field")
62
80
 
63
- actor = server.user_manager.get_user_or_default(user_id=user_id)
81
+ agent_state = server.agent_manager.get_agent_by_id(agent_id=agent_id, actor=actor)
82
+ if agent_state.llm_config.model_endpoint_type != "openai":
83
+ raise HTTPException(status_code=400, detail="Only OpenAI models are supported by this endpoint.")
84
+
85
+ # Convert Letta messages to OpenAI messages
86
+ in_context_messages = server.message_manager.get_messages_by_ids(message_ids=agent_state.message_ids, actor=actor)
87
+ openai_messages = convert_letta_messages_to_openai(in_context_messages)
88
+
89
+ # Also parse user input from completion_request and append
90
+ input_message = get_messages_from_completion_request(completion_request)[-1]
91
+ openai_messages.append(input_message)
92
+
93
+ # Tools we allow this agent to call
94
+ tools = [t for t in agent_state.tools if t.name not in LETTA_TOOL_SET and t.tool_type in {ToolType.EXTERNAL_COMPOSIO, ToolType.CUSTOM}]
95
+
96
+ # Initial request
97
+ openai_request = ChatCompletionRequest(
98
+ model=agent_state.llm_config.model,
99
+ messages=openai_messages,
100
+ # TODO: This nested thing here is so ugly, need to refactor
101
+ tools=(
102
+ [
103
+ Tool(type="function", function=enable_strict_mode(add_pre_execution_message(remove_request_heartbeat(t.json_schema))))
104
+ for t in tools
105
+ ]
106
+ if tools
107
+ else None
108
+ ),
109
+ tool_choice="auto",
110
+ user=user_id,
111
+ max_completion_tokens=agent_state.llm_config.max_tokens,
112
+ temperature=agent_state.llm_config.temperature,
113
+ stream=True,
114
+ )
115
+
116
+ # Create the OpenAI async client
64
117
  client = openai.AsyncClient(
65
118
  api_key=model_settings.openai_api_key,
66
119
  max_retries=0,
67
120
  http_client=httpx.AsyncClient(
68
- timeout=httpx.Timeout(connect=15.0, read=5.0, write=5.0, pool=5.0),
121
+ timeout=httpx.Timeout(connect=15.0, read=30.0, write=15.0, pool=15.0),
69
122
  follow_redirects=True,
70
123
  limits=httpx.Limits(
71
124
  max_connections=50,
@@ -75,38 +128,175 @@ async def create_fast_chat_completions(
75
128
  ),
76
129
  )
77
130
 
78
- # Magic message manipulating
79
- input_message = get_messages_from_completion_request(completion_request)[-1]
80
- completion_request.pop("messages")
81
-
82
- # Get in context messages
83
- in_context_messages = server.agent_manager.get_in_context_messages(agent_id=agent_id, actor=actor)
84
- openai_dict_in_context_messages = convert_letta_messages_to_openai(in_context_messages)
85
- openai_dict_in_context_messages.append(input_message)
131
+ # The messages we want to persist to the Letta agent
132
+ user_message = create_user_message(input_message=input_message, agent_id=agent_id, actor=actor)
133
+ message_db_queue = [user_message]
86
134
 
87
135
  async def event_stream():
88
- # TODO: Factor this out into separate interface
89
- response_accumulator = []
90
-
91
- stream = await client.chat.completions.create(**completion_request, messages=openai_dict_in_context_messages)
92
-
93
- async with stream:
94
- async for chunk in stream:
95
- if chunk.choices and chunk.choices[0].delta.content:
96
- # TODO: This does not support tool calling right now
97
- response_accumulator.append(chunk.choices[0].delta.content)
98
- yield f"data: {chunk.model_dump_json()}\n\n"
99
-
100
- # Construct messages
101
- user_message = create_user_message(input_message=input_message, agent_id=agent_id, actor=actor)
102
- assistant_message = create_assistant_message_from_openai_response(
103
- response_text="".join(response_accumulator), agent_id=agent_id, model=str(model), actor=actor
104
- )
136
+ """
137
+ A function-calling loop:
138
+ - We stream partial tokens.
139
+ - If we detect a tool call (finish_reason="tool_calls"), we parse it,
140
+ add two messages to the conversation:
141
+ (a) assistant message with tool_calls referencing the same ID
142
+ (b) a tool message referencing that ID, containing the tool result.
143
+ - Re-invoke the OpenAI request with updated conversation, streaming again.
144
+ - End when finish_reason="stop" or no more tool calls.
145
+ """
146
+
147
+ # We'll keep updating this conversation in a loop
148
+ conversation = openai_messages[:]
149
+
150
+ while True:
151
+ # Make the streaming request to OpenAI
152
+ stream = await client.chat.completions.create(**openai_request.model_dump(exclude_unset=True))
153
+
154
+ content_buffer = []
155
+ tool_call_name = None
156
+ tool_call_args_str = ""
157
+ tool_call_id = None
158
+ tool_call_happened = False
159
+ finish_reason_stop = False
160
+ optimistic_json_parser = OptimisticJSONParser(strict=True)
161
+ current_parsed_json_result = {}
162
+
163
+ async with stream:
164
+ async for chunk in stream:
165
+ choice = chunk.choices[0]
166
+ delta = choice.delta
167
+ finish_reason = choice.finish_reason # "tool_calls", "stop", or None
168
+
169
+ if delta.content:
170
+ content_buffer.append(delta.content)
171
+ yield f"data: {chunk.model_dump_json()}\n\n"
172
+
173
+ # CASE B: Partial tool call info
174
+ if delta.tool_calls:
175
+ # Typically there's only one in delta.tool_calls
176
+ tc = delta.tool_calls[0]
177
+ if tc.function.name:
178
+ tool_call_name = tc.function.name
179
+ if tc.function.arguments:
180
+ tool_call_args_str += tc.function.arguments
181
+
182
+ # See if we can stream out the pre-execution message
183
+ parsed_args = optimistic_json_parser.parse(tool_call_args_str)
184
+ if parsed_args.get(
185
+ PRE_EXECUTION_MESSAGE_ARG
186
+ ) and current_parsed_json_result.get( # Ensure key exists and is not None/empty
187
+ PRE_EXECUTION_MESSAGE_ARG
188
+ ) != parsed_args.get(
189
+ PRE_EXECUTION_MESSAGE_ARG
190
+ ):
191
+ # Only stream if there's something new to stream
192
+ # We do this way to avoid hanging JSON at the end of the stream, e.g. '}'
193
+ if parsed_args != current_parsed_json_result:
194
+ current_parsed_json_result = parsed_args
195
+ synthetic_chunk = ChatCompletionChunk(
196
+ id=chunk.id,
197
+ object=chunk.object,
198
+ created=chunk.created,
199
+ model=chunk.model,
200
+ choices=[
201
+ Choice(
202
+ index=choice.index,
203
+ delta=ChoiceDelta(content=tc.function.arguments, role="assistant"),
204
+ finish_reason=None,
205
+ )
206
+ ],
207
+ )
208
+
209
+ yield f"data: {synthetic_chunk.model_dump_json()}\n\n"
210
+
211
+ # We might generate a unique ID for the tool call
212
+ if tc.id:
213
+ tool_call_id = tc.id
214
+
215
+ # Check finish_reason
216
+ if finish_reason == "tool_calls":
217
+ tool_call_happened = True
218
+ break
219
+ elif finish_reason == "stop":
220
+ finish_reason_stop = True
221
+ break
222
+
223
+ if content_buffer:
224
+ # We treat that partial text as an assistant message
225
+ content = "".join(content_buffer)
226
+ conversation.append({"role": "assistant", "content": content})
227
+
228
+ # Create an assistant message here to persist later
229
+ assistant_message = create_assistant_message_from_openai_response(
230
+ response_text=content, agent_id=agent_id, model=agent_state.llm_config.model, actor=actor
231
+ )
232
+ message_db_queue.append(assistant_message)
233
+
234
+ if tool_call_happened:
235
+ # Parse the tool call arguments
236
+ try:
237
+ tool_args = json.loads(tool_call_args_str)
238
+ except json.JSONDecodeError:
239
+ tool_args = {}
240
+
241
+ if not tool_call_id:
242
+ # If no tool_call_id given by the model, generate one
243
+ tool_call_id = f"call_{uuid.uuid4().hex[:8]}"
244
+
245
+ # 1) Insert the "assistant" message with the tool_calls field
246
+ # referencing the same tool_call_id
247
+ assistant_tool_call_msg = AssistantMessage(
248
+ content=None,
249
+ tool_calls=[ToolCall(id=tool_call_id, function=ToolCallFunction(name=tool_call_name, arguments=tool_call_args_str))],
250
+ )
251
+
252
+ conversation.append(assistant_tool_call_msg.model_dump())
253
+
254
+ # 2) Execute the tool
255
+ target_tool = next((x for x in tools if x.name == tool_call_name), None)
256
+ if not target_tool:
257
+ # Tool not found, handle error
258
+ yield f"data: {json.dumps({'error': 'Tool not found', 'tool': tool_call_name})}\n\n"
259
+ break
260
+
261
+ try:
262
+ tool_result, _ = execute_external_tool(
263
+ agent_state=agent_state,
264
+ function_name=tool_call_name,
265
+ function_args=tool_args,
266
+ target_letta_tool=target_tool,
267
+ actor=actor,
268
+ allow_agent_state_modifications=False,
269
+ )
270
+ except Exception as e:
271
+ tool_result = f"Failed to call tool. Error: {e}"
272
+
273
+ # 3) Insert the "tool" message referencing the same tool_call_id
274
+ tool_message = ToolMessage(content=json.dumps({"result": tool_result}), tool_call_id=tool_call_id)
275
+
276
+ conversation.append(tool_message.model_dump())
277
+
278
+ # 4) Add a user message prompting the tool call result summarization
279
+ heartbeat_user_message = UserMessage(
280
+ content=f"{NON_USER_MSG_PREFIX} Tool finished executing. Summarize the result for the user.",
281
+ )
282
+ conversation.append(heartbeat_user_message.model_dump())
283
+
284
+ # Now, re-invoke OpenAI with the updated conversation
285
+ openai_request.messages = conversation
286
+
287
+ continue # Start the while loop again
288
+
289
+ if finish_reason_stop:
290
+ # Model is done, no more calls
291
+ break
292
+
293
+ # If we reach here, no tool call, no "stop", but we've ended streaming
294
+ # Possibly a model error or some other finish reason. We'll just end.
295
+ break
105
296
 
106
- # Persist both in one synchronous DB call, done in a threadpool
107
297
  await run_in_threadpool(
108
298
  server.agent_manager.append_to_in_context_messages,
109
- [user_message, assistant_message],
299
+ message_db_queue,
110
300
  agent_id=agent_id,
111
301
  actor=actor,
112
302
  )