letta-nightly 0.6.27.dev20250220104103__py3-none-any.whl → 0.6.29.dev20250221033538__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -1
- letta/agent.py +19 -2
- letta/client/client.py +2 -0
- letta/constants.py +2 -0
- letta/functions/schema_generator.py +6 -6
- letta/helpers/converters.py +153 -0
- letta/helpers/tool_rule_solver.py +11 -1
- letta/llm_api/anthropic.py +10 -5
- letta/llm_api/aws_bedrock.py +1 -1
- letta/llm_api/deepseek.py +303 -0
- letta/llm_api/helpers.py +20 -10
- letta/llm_api/llm_api_tools.py +85 -2
- letta/llm_api/openai.py +16 -1
- letta/local_llm/chat_completion_proxy.py +15 -2
- letta/local_llm/lmstudio/api.py +75 -1
- letta/orm/__init__.py +2 -0
- letta/orm/agent.py +11 -4
- letta/orm/custom_columns.py +31 -110
- letta/orm/identities_agents.py +13 -0
- letta/orm/identity.py +60 -0
- letta/orm/organization.py +2 -0
- letta/orm/sqlalchemy_base.py +4 -0
- letta/schemas/agent.py +11 -1
- letta/schemas/identity.py +67 -0
- letta/schemas/llm_config.py +2 -0
- letta/schemas/message.py +1 -1
- letta/schemas/openai/chat_completion_response.py +2 -0
- letta/schemas/providers.py +72 -1
- letta/schemas/tool_rule.py +9 -1
- letta/serialize_schemas/__init__.py +1 -0
- letta/serialize_schemas/agent.py +36 -0
- letta/serialize_schemas/base.py +12 -0
- letta/serialize_schemas/custom_fields.py +69 -0
- letta/serialize_schemas/message.py +15 -0
- letta/server/db.py +111 -0
- letta/server/rest_api/app.py +8 -0
- letta/server/rest_api/chat_completions_interface.py +45 -21
- letta/server/rest_api/interface.py +114 -9
- letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +98 -24
- letta/server/rest_api/routers/v1/__init__.py +2 -0
- letta/server/rest_api/routers/v1/agents.py +14 -3
- letta/server/rest_api/routers/v1/identities.py +121 -0
- letta/server/rest_api/utils.py +183 -4
- letta/server/server.py +23 -117
- letta/services/agent_manager.py +53 -6
- letta/services/block_manager.py +1 -1
- letta/services/identity_manager.py +156 -0
- letta/services/job_manager.py +1 -1
- letta/services/message_manager.py +1 -1
- letta/services/organization_manager.py +1 -1
- letta/services/passage_manager.py +1 -1
- letta/services/provider_manager.py +1 -1
- letta/services/sandbox_config_manager.py +1 -1
- letta/services/source_manager.py +1 -1
- letta/services/step_manager.py +1 -1
- letta/services/tool_manager.py +1 -1
- letta/services/user_manager.py +1 -1
- letta/settings.py +3 -0
- letta/streaming_interface.py +6 -2
- letta/tracing.py +205 -0
- letta/utils.py +4 -0
- {letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/METADATA +9 -2
- {letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/RECORD +66 -52
- {letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/LICENSE +0 -0
- {letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/WHEEL +0 -0
- {letta_nightly-0.6.27.dev20250220104103.dist-info → letta_nightly-0.6.29.dev20250221033538.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
3
|
+
import warnings
|
|
4
|
+
from typing import List, Optional
|
|
5
|
+
|
|
6
|
+
from letta.schemas.llm_config import LLMConfig
|
|
7
|
+
from letta.schemas.message import Message as _Message
|
|
8
|
+
from letta.schemas.openai.chat_completion_request import AssistantMessage, ChatCompletionRequest, ChatMessage
|
|
9
|
+
from letta.schemas.openai.chat_completion_request import FunctionCall as ToolFunctionChoiceFunctionCall
|
|
10
|
+
from letta.schemas.openai.chat_completion_request import Tool, ToolFunctionChoice, ToolMessage, UserMessage, cast_message_to_subtype
|
|
11
|
+
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
|
12
|
+
from letta.schemas.openai.openai import Function, ToolCall
|
|
13
|
+
from letta.utils import get_tool_call_id
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def merge_tool_message(previous_message: ChatMessage, tool_message: ToolMessage) -> ChatMessage:
|
|
17
|
+
"""
|
|
18
|
+
Merge `ToolMessage` objects into the previous message.
|
|
19
|
+
"""
|
|
20
|
+
previous_message.content += (
|
|
21
|
+
f"<ToolMessage> content: {tool_message.content}, role: {tool_message.role}, tool_call_id: {tool_message.tool_call_id}</ToolMessage>"
|
|
22
|
+
)
|
|
23
|
+
return previous_message
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def handle_assistant_message(assistant_message: AssistantMessage) -> AssistantMessage:
|
|
27
|
+
"""
|
|
28
|
+
For `AssistantMessage` objects, remove the `tool_calls` field and add them to the `content` field.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
if "tool_calls" in assistant_message.dict().keys():
|
|
32
|
+
assistant_message.content = "".join(
|
|
33
|
+
[
|
|
34
|
+
# f"<ToolCall> name: {tool_call.function.name}, function: {tool_call.function}</ToolCall>"
|
|
35
|
+
f"<ToolCall> {json.dumps(tool_call.function.dict())} </ToolCall>"
|
|
36
|
+
for tool_call in assistant_message.tool_calls
|
|
37
|
+
]
|
|
38
|
+
)
|
|
39
|
+
del assistant_message.tool_calls
|
|
40
|
+
return assistant_message
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List[_Message]:
|
|
44
|
+
"""
|
|
45
|
+
Deepeek API has the following constraints: messages must be interleaved between user and assistant messages, ending on a user message.
|
|
46
|
+
Tools are currently unstable for V3 and not supported for R1 in the API: https://api-docs.deepseek.com/guides/function_calling.
|
|
47
|
+
|
|
48
|
+
This function merges ToolMessages into AssistantMessages and removes ToolCalls from AssistantMessages, and adds a dummy user message
|
|
49
|
+
at the end.
|
|
50
|
+
|
|
51
|
+
"""
|
|
52
|
+
deepseek_messages = []
|
|
53
|
+
for idx, message in enumerate(messages):
|
|
54
|
+
# First message is the system prompt, add it
|
|
55
|
+
if idx == 0 and message.role == "system":
|
|
56
|
+
deepseek_messages.append(message)
|
|
57
|
+
continue
|
|
58
|
+
if message.role == "user":
|
|
59
|
+
if deepseek_messages[-1].role == "assistant" or deepseek_messages[-1].role == "system":
|
|
60
|
+
# User message, add it
|
|
61
|
+
deepseek_messages.append(UserMessage(content=message.content))
|
|
62
|
+
else:
|
|
63
|
+
# add to the content of the previous message
|
|
64
|
+
deepseek_messages[-1].content += message.content
|
|
65
|
+
elif message.role == "assistant":
|
|
66
|
+
if deepseek_messages[-1].role == "user":
|
|
67
|
+
# Assistant message, remove tool calls and add them to the content
|
|
68
|
+
deepseek_messages.append(handle_assistant_message(message))
|
|
69
|
+
else:
|
|
70
|
+
# add to the content of the previous message
|
|
71
|
+
deepseek_messages[-1].content += message.content
|
|
72
|
+
elif message.role == "tool" and deepseek_messages[-1].role == "assistant":
|
|
73
|
+
# Tool message, add it to the last assistant message
|
|
74
|
+
merged_message = merge_tool_message(deepseek_messages[-1], message)
|
|
75
|
+
deepseek_messages[-1] = merged_message
|
|
76
|
+
else:
|
|
77
|
+
print(f"Skipping message: {message}")
|
|
78
|
+
|
|
79
|
+
# This needs to end on a user message, add a dummy message if the last was assistant
|
|
80
|
+
if deepseek_messages[-1].role == "assistant":
|
|
81
|
+
deepseek_messages.append(UserMessage(content=""))
|
|
82
|
+
return deepseek_messages
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def build_deepseek_chat_completions_request(
|
|
86
|
+
llm_config: LLMConfig,
|
|
87
|
+
messages: List[_Message],
|
|
88
|
+
user_id: Optional[str],
|
|
89
|
+
functions: Optional[list],
|
|
90
|
+
function_call: Optional[str],
|
|
91
|
+
use_tool_naming: bool,
|
|
92
|
+
max_tokens: Optional[int],
|
|
93
|
+
) -> ChatCompletionRequest:
|
|
94
|
+
# if functions and llm_config.put_inner_thoughts_in_kwargs:
|
|
95
|
+
# # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
|
|
96
|
+
# # TODO(fix)
|
|
97
|
+
# inner_thoughts_desc = (
|
|
98
|
+
# INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST if ":1234" in llm_config.model_endpoint else INNER_THOUGHTS_KWARG_DESCRIPTION
|
|
99
|
+
# )
|
|
100
|
+
# functions = add_inner_thoughts_to_functions(
|
|
101
|
+
# functions=functions,
|
|
102
|
+
# inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
|
103
|
+
# inner_thoughts_description=inner_thoughts_desc,
|
|
104
|
+
# )
|
|
105
|
+
|
|
106
|
+
openai_message_list = [cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=False)) for m in messages]
|
|
107
|
+
|
|
108
|
+
if llm_config.model:
|
|
109
|
+
model = llm_config.model
|
|
110
|
+
else:
|
|
111
|
+
warnings.warn(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
|
|
112
|
+
model = None
|
|
113
|
+
if use_tool_naming:
|
|
114
|
+
if function_call is None:
|
|
115
|
+
tool_choice = None
|
|
116
|
+
elif function_call not in ["none", "auto", "required"]:
|
|
117
|
+
tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=function_call))
|
|
118
|
+
else:
|
|
119
|
+
tool_choice = function_call
|
|
120
|
+
|
|
121
|
+
def add_functions_to_system_message(system_message: ChatMessage):
|
|
122
|
+
system_message.content += f"<available functions> {''.join(json.dumps(f) for f in functions)} </available functions>"
|
|
123
|
+
system_message.content += f'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.'
|
|
124
|
+
|
|
125
|
+
if llm_config.model == "deepseek-reasoner": # R1 currently doesn't support function calling natively
|
|
126
|
+
add_functions_to_system_message(
|
|
127
|
+
openai_message_list[0]
|
|
128
|
+
) # Inject additional instructions to the system prompt with the available functions
|
|
129
|
+
|
|
130
|
+
openai_message_list = map_messages_to_deepseek_format(openai_message_list)
|
|
131
|
+
|
|
132
|
+
data = ChatCompletionRequest(
|
|
133
|
+
model=model,
|
|
134
|
+
messages=openai_message_list,
|
|
135
|
+
user=str(user_id),
|
|
136
|
+
max_completion_tokens=max_tokens,
|
|
137
|
+
temperature=llm_config.temperature,
|
|
138
|
+
)
|
|
139
|
+
else:
|
|
140
|
+
data = ChatCompletionRequest(
|
|
141
|
+
model=model,
|
|
142
|
+
messages=openai_message_list,
|
|
143
|
+
tools=[Tool(type="function", function=f) for f in functions] if functions else None,
|
|
144
|
+
tool_choice=tool_choice,
|
|
145
|
+
user=str(user_id),
|
|
146
|
+
max_completion_tokens=max_tokens,
|
|
147
|
+
temperature=llm_config.temperature,
|
|
148
|
+
)
|
|
149
|
+
else:
|
|
150
|
+
data = ChatCompletionRequest(
|
|
151
|
+
model=model,
|
|
152
|
+
messages=openai_message_list,
|
|
153
|
+
functions=functions,
|
|
154
|
+
function_call=function_call,
|
|
155
|
+
user=str(user_id),
|
|
156
|
+
max_completion_tokens=max_tokens,
|
|
157
|
+
temperature=llm_config.temperature,
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
return data
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def convert_deepseek_response_to_chatcompletion(
|
|
164
|
+
response: ChatCompletionResponse,
|
|
165
|
+
) -> ChatCompletionResponse:
|
|
166
|
+
"""
|
|
167
|
+
Example response from DeepSeek:
|
|
168
|
+
|
|
169
|
+
ChatCompletion(
|
|
170
|
+
id='bc7f7d25-82e4-443a-b217-dfad2b66da8e',
|
|
171
|
+
choices=[
|
|
172
|
+
Choice(
|
|
173
|
+
finish_reason='stop',
|
|
174
|
+
index=0,
|
|
175
|
+
logprobs=None,
|
|
176
|
+
message=ChatCompletionMessage(
|
|
177
|
+
content='{"function": "send_message", "arguments": {"message": "Hey! Whales are such majestic creatures, aren\'t they? How\'s your day going? 🌊 "}}',
|
|
178
|
+
refusal=None,
|
|
179
|
+
role='assistant',
|
|
180
|
+
audio=None,
|
|
181
|
+
function_call=None,
|
|
182
|
+
tool_calls=None,
|
|
183
|
+
reasoning_content='Okay, the user said "hello whales". Hmm, that\'s an interesting greeting. Maybe they meant "hello there" or are they actually talking about whales? Let me check if I misheard. Whales are fascinating creatures. I should respond in a friendly way. Let me ask them how they\'re doing and mention whales to keep the conversation going.'
|
|
184
|
+
)
|
|
185
|
+
)
|
|
186
|
+
],
|
|
187
|
+
created=1738266449,
|
|
188
|
+
model='deepseek-reasoner',
|
|
189
|
+
object='chat.completion',
|
|
190
|
+
service_tier=None,
|
|
191
|
+
system_fingerprint='fp_7e73fd9a08',
|
|
192
|
+
usage=CompletionUsage(
|
|
193
|
+
completion_tokens=111,
|
|
194
|
+
prompt_tokens=1270,
|
|
195
|
+
total_tokens=1381,
|
|
196
|
+
completion_tokens_details=CompletionTokensDetails(
|
|
197
|
+
accepted_prediction_tokens=None,
|
|
198
|
+
audio_tokens=None,
|
|
199
|
+
reasoning_tokens=72,
|
|
200
|
+
rejected_prediction_tokens=None
|
|
201
|
+
),
|
|
202
|
+
prompt_tokens_details=PromptTokensDetails(
|
|
203
|
+
audio_tokens=None,
|
|
204
|
+
cached_tokens=1088
|
|
205
|
+
),
|
|
206
|
+
prompt_cache_hit_tokens=1088,
|
|
207
|
+
prompt_cache_miss_tokens=182
|
|
208
|
+
)
|
|
209
|
+
)
|
|
210
|
+
"""
|
|
211
|
+
|
|
212
|
+
def convert_dict_quotes(input_dict: dict):
|
|
213
|
+
"""
|
|
214
|
+
Convert a dictionary with single-quoted keys to double-quoted keys,
|
|
215
|
+
properly handling boolean values and nested structures.
|
|
216
|
+
|
|
217
|
+
Args:
|
|
218
|
+
input_dict (dict): Input dictionary with single-quoted keys
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
str: JSON string with double-quoted keys
|
|
222
|
+
"""
|
|
223
|
+
# First convert the dictionary to a JSON string to handle booleans properly
|
|
224
|
+
json_str = json.dumps(input_dict)
|
|
225
|
+
|
|
226
|
+
# Function to handle complex string replacements
|
|
227
|
+
def replace_quotes(match):
|
|
228
|
+
key = match.group(1)
|
|
229
|
+
# Escape any existing double quotes in the key
|
|
230
|
+
key = key.replace('"', '\\"')
|
|
231
|
+
return f'"{key}":'
|
|
232
|
+
|
|
233
|
+
# Replace single-quoted keys with double-quoted keys
|
|
234
|
+
# This regex looks for single-quoted keys followed by a colon
|
|
235
|
+
def strip_json_block(text):
|
|
236
|
+
# Check if text starts with ```json or similar
|
|
237
|
+
if text.strip().startswith("```"):
|
|
238
|
+
# Split by \n to remove the first and last lines
|
|
239
|
+
lines = text.split("\n")[1:-1]
|
|
240
|
+
return "\n".join(lines)
|
|
241
|
+
return text
|
|
242
|
+
|
|
243
|
+
pattern = r"'([^']*)':"
|
|
244
|
+
converted_str = re.sub(pattern, replace_quotes, strip_json_block(json_str))
|
|
245
|
+
|
|
246
|
+
# Parse the string back to ensure valid JSON format
|
|
247
|
+
try:
|
|
248
|
+
json.loads(converted_str)
|
|
249
|
+
return converted_str
|
|
250
|
+
except json.JSONDecodeError as e:
|
|
251
|
+
raise ValueError(f"Failed to create valid JSON with double quotes: {str(e)}")
|
|
252
|
+
|
|
253
|
+
def extract_json_block(text):
|
|
254
|
+
# Find the first {
|
|
255
|
+
start = text.find("{")
|
|
256
|
+
if start == -1:
|
|
257
|
+
return text
|
|
258
|
+
|
|
259
|
+
# Track nested braces to find the matching closing brace
|
|
260
|
+
brace_count = 0
|
|
261
|
+
end = start
|
|
262
|
+
|
|
263
|
+
for i in range(start, len(text)):
|
|
264
|
+
if text[i] == "{":
|
|
265
|
+
brace_count += 1
|
|
266
|
+
elif text[i] == "}":
|
|
267
|
+
brace_count -= 1
|
|
268
|
+
if brace_count == 0:
|
|
269
|
+
end = i + 1
|
|
270
|
+
break
|
|
271
|
+
|
|
272
|
+
return text[start:end]
|
|
273
|
+
|
|
274
|
+
content = response.choices[0].message.content
|
|
275
|
+
try:
|
|
276
|
+
content_dict = json.loads(extract_json_block(content))
|
|
277
|
+
|
|
278
|
+
if type(content_dict["arguments"]) == str:
|
|
279
|
+
content_dict["arguments"] = json.loads(content_dict["arguments"])
|
|
280
|
+
|
|
281
|
+
tool_calls = [
|
|
282
|
+
ToolCall(
|
|
283
|
+
id=get_tool_call_id(),
|
|
284
|
+
type="function",
|
|
285
|
+
function=Function(
|
|
286
|
+
name=content_dict["name"],
|
|
287
|
+
arguments=convert_dict_quotes(content_dict["arguments"]),
|
|
288
|
+
),
|
|
289
|
+
)
|
|
290
|
+
]
|
|
291
|
+
except (json.JSONDecodeError, TypeError, KeyError) as e:
|
|
292
|
+
print(e)
|
|
293
|
+
tool_calls = response.choices[0].message.tool_calls
|
|
294
|
+
raise ValueError(f"Failed to create valid JSON {content}")
|
|
295
|
+
|
|
296
|
+
# Move the "reasoning_content" into the "content" field
|
|
297
|
+
response.choices[0].message.content = response.choices[0].message.reasoning_content
|
|
298
|
+
response.choices[0].message.tool_calls = tool_calls
|
|
299
|
+
|
|
300
|
+
# Remove the "reasoning_content" field
|
|
301
|
+
response.choices[0].message.reasoning_content = None
|
|
302
|
+
|
|
303
|
+
return response
|
letta/llm_api/helpers.py
CHANGED
|
@@ -202,21 +202,29 @@ def add_inner_thoughts_to_functions(
|
|
|
202
202
|
inner_thoughts_key: str,
|
|
203
203
|
inner_thoughts_description: str,
|
|
204
204
|
inner_thoughts_required: bool = True,
|
|
205
|
+
put_inner_thoughts_first: bool = True,
|
|
205
206
|
) -> List[dict]:
|
|
206
207
|
"""Add an inner_thoughts kwarg to every function in the provided list, ensuring it's the first parameter"""
|
|
207
208
|
new_functions = []
|
|
208
209
|
for function_object in functions:
|
|
209
210
|
new_function_object = copy.deepcopy(function_object)
|
|
210
|
-
|
|
211
|
-
# Create a new OrderedDict with inner_thoughts as the first item
|
|
212
211
|
new_properties = OrderedDict()
|
|
213
|
-
new_properties[inner_thoughts_key] = {
|
|
214
|
-
"type": "string",
|
|
215
|
-
"description": inner_thoughts_description,
|
|
216
|
-
}
|
|
217
212
|
|
|
218
|
-
#
|
|
219
|
-
|
|
213
|
+
# For chat completions, we want inner thoughts to come later
|
|
214
|
+
if put_inner_thoughts_first:
|
|
215
|
+
# Create with inner_thoughts as the first item
|
|
216
|
+
new_properties[inner_thoughts_key] = {
|
|
217
|
+
"type": "string",
|
|
218
|
+
"description": inner_thoughts_description,
|
|
219
|
+
}
|
|
220
|
+
# Add the rest of the properties
|
|
221
|
+
new_properties.update(function_object["parameters"]["properties"])
|
|
222
|
+
else:
|
|
223
|
+
new_properties.update(function_object["parameters"]["properties"])
|
|
224
|
+
new_properties[inner_thoughts_key] = {
|
|
225
|
+
"type": "string",
|
|
226
|
+
"description": inner_thoughts_description,
|
|
227
|
+
}
|
|
220
228
|
|
|
221
229
|
# Cast OrderedDict back to a regular dict
|
|
222
230
|
new_function_object["parameters"]["properties"] = dict(new_properties)
|
|
@@ -225,9 +233,11 @@ def add_inner_thoughts_to_functions(
|
|
|
225
233
|
if inner_thoughts_required:
|
|
226
234
|
required_params = new_function_object["parameters"].get("required", [])
|
|
227
235
|
if inner_thoughts_key not in required_params:
|
|
228
|
-
|
|
236
|
+
if put_inner_thoughts_first:
|
|
237
|
+
required_params.insert(0, inner_thoughts_key)
|
|
238
|
+
else:
|
|
239
|
+
required_params.append(inner_thoughts_key)
|
|
229
240
|
new_function_object["parameters"]["required"] = required_params
|
|
230
|
-
|
|
231
241
|
new_functions.append(new_function_object)
|
|
232
242
|
|
|
233
243
|
return new_functions
|
letta/llm_api/llm_api_tools.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import json
|
|
1
2
|
import random
|
|
2
3
|
import time
|
|
3
4
|
from typing import List, Optional, Union
|
|
@@ -13,6 +14,7 @@ from letta.llm_api.anthropic import (
|
|
|
13
14
|
)
|
|
14
15
|
from letta.llm_api.aws_bedrock import has_valid_aws_credentials
|
|
15
16
|
from letta.llm_api.azure_openai import azure_openai_chat_completions_request
|
|
17
|
+
from letta.llm_api.deepseek import build_deepseek_chat_completions_request, convert_deepseek_response_to_chatcompletion
|
|
16
18
|
from letta.llm_api.google_ai import convert_tools_to_google_ai_format, google_ai_chat_completions_request
|
|
17
19
|
from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
|
|
18
20
|
from letta.llm_api.openai import (
|
|
@@ -29,8 +31,9 @@ from letta.schemas.openai.chat_completion_request import ChatCompletionRequest,
|
|
|
29
31
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
|
30
32
|
from letta.settings import ModelSettings
|
|
31
33
|
from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
|
|
34
|
+
from letta.tracing import log_event, trace_method
|
|
32
35
|
|
|
33
|
-
LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq"]
|
|
36
|
+
LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq", "deepseek"]
|
|
34
37
|
|
|
35
38
|
|
|
36
39
|
def retry_with_exponential_backoff(
|
|
@@ -68,9 +71,28 @@ def retry_with_exponential_backoff(
|
|
|
68
71
|
if http_err.response.status_code in error_codes:
|
|
69
72
|
# Increment retries
|
|
70
73
|
num_retries += 1
|
|
74
|
+
log_event(
|
|
75
|
+
"llm_retry_attempt",
|
|
76
|
+
{
|
|
77
|
+
"attempt": num_retries,
|
|
78
|
+
"delay": delay,
|
|
79
|
+
"status_code": http_err.response.status_code,
|
|
80
|
+
"error_type": type(http_err).__name__,
|
|
81
|
+
"error": str(http_err),
|
|
82
|
+
},
|
|
83
|
+
)
|
|
71
84
|
|
|
72
85
|
# Check if max retries has been reached
|
|
73
86
|
if num_retries > max_retries:
|
|
87
|
+
log_event(
|
|
88
|
+
"llm_max_retries_exceeded",
|
|
89
|
+
{
|
|
90
|
+
"max_retries": max_retries,
|
|
91
|
+
"status_code": http_err.response.status_code,
|
|
92
|
+
"error_type": type(http_err).__name__,
|
|
93
|
+
"error": str(http_err),
|
|
94
|
+
},
|
|
95
|
+
)
|
|
74
96
|
raise RateLimitExceededError("Maximum number of retries exceeded", max_retries=max_retries)
|
|
75
97
|
|
|
76
98
|
# Increment the delay
|
|
@@ -84,15 +106,21 @@ def retry_with_exponential_backoff(
|
|
|
84
106
|
time.sleep(delay)
|
|
85
107
|
else:
|
|
86
108
|
# For other HTTP errors, re-raise the exception
|
|
109
|
+
log_event(
|
|
110
|
+
"llm_non_retryable_error",
|
|
111
|
+
{"status_code": http_err.response.status_code, "error_type": type(http_err).__name__, "error": str(http_err)},
|
|
112
|
+
)
|
|
87
113
|
raise
|
|
88
114
|
|
|
89
115
|
# Raise exceptions for any errors not specified
|
|
90
116
|
except Exception as e:
|
|
117
|
+
log_event("llm_unexpected_error", {"error_type": type(e).__name__, "error": str(e)})
|
|
91
118
|
raise e
|
|
92
119
|
|
|
93
120
|
return wrapper
|
|
94
121
|
|
|
95
122
|
|
|
123
|
+
@trace_method("LLM Request")
|
|
96
124
|
@retry_with_exponential_backoff
|
|
97
125
|
def create(
|
|
98
126
|
# agent_state: AgentState,
|
|
@@ -112,6 +140,7 @@ def create(
|
|
|
112
140
|
stream: bool = False,
|
|
113
141
|
stream_interface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
|
|
114
142
|
model_settings: Optional[dict] = None, # TODO: eventually pass from server
|
|
143
|
+
put_inner_thoughts_first: bool = True,
|
|
115
144
|
) -> ChatCompletionResponse:
|
|
116
145
|
"""Return response to chat completion with backoff"""
|
|
117
146
|
from letta.utils import printd
|
|
@@ -157,7 +186,9 @@ def create(
|
|
|
157
186
|
else:
|
|
158
187
|
function_call = "required"
|
|
159
188
|
|
|
160
|
-
data = build_openai_chat_completions_request(
|
|
189
|
+
data = build_openai_chat_completions_request(
|
|
190
|
+
llm_config, messages, user_id, functions, function_call, use_tool_naming, put_inner_thoughts_first=put_inner_thoughts_first
|
|
191
|
+
)
|
|
161
192
|
if stream: # Client requested token streaming
|
|
162
193
|
data.stream = True
|
|
163
194
|
assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
|
|
@@ -453,10 +484,62 @@ def create(
|
|
|
453
484
|
),
|
|
454
485
|
)
|
|
455
486
|
|
|
487
|
+
elif llm_config.model_endpoint_type == "deepseek":
|
|
488
|
+
if model_settings.deepseek_api_key is None and llm_config.model_endpoint == "":
|
|
489
|
+
# only is a problem if we are *not* using an openai proxy
|
|
490
|
+
raise LettaConfigurationError(message="DeepSeek key is missing from letta config file", missing_fields=["deepseek_api_key"])
|
|
491
|
+
|
|
492
|
+
data = build_deepseek_chat_completions_request(
|
|
493
|
+
llm_config,
|
|
494
|
+
messages,
|
|
495
|
+
user_id,
|
|
496
|
+
functions,
|
|
497
|
+
function_call,
|
|
498
|
+
use_tool_naming,
|
|
499
|
+
llm_config.max_tokens,
|
|
500
|
+
)
|
|
501
|
+
if stream: # Client requested token streaming
|
|
502
|
+
data.stream = True
|
|
503
|
+
assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
|
|
504
|
+
stream_interface, AgentRefreshStreamingInterface
|
|
505
|
+
), type(stream_interface)
|
|
506
|
+
response = openai_chat_completions_process_stream(
|
|
507
|
+
url=llm_config.model_endpoint,
|
|
508
|
+
api_key=model_settings.deepseek_api_key,
|
|
509
|
+
chat_completion_request=data,
|
|
510
|
+
stream_interface=stream_interface,
|
|
511
|
+
)
|
|
512
|
+
else: # Client did not request token streaming (expect a blocking backend response)
|
|
513
|
+
data.stream = False
|
|
514
|
+
if isinstance(stream_interface, AgentChunkStreamingInterface):
|
|
515
|
+
stream_interface.stream_start()
|
|
516
|
+
try:
|
|
517
|
+
response = openai_chat_completions_request(
|
|
518
|
+
url=llm_config.model_endpoint,
|
|
519
|
+
api_key=model_settings.deepseek_api_key,
|
|
520
|
+
chat_completion_request=data,
|
|
521
|
+
)
|
|
522
|
+
finally:
|
|
523
|
+
if isinstance(stream_interface, AgentChunkStreamingInterface):
|
|
524
|
+
stream_interface.stream_end()
|
|
525
|
+
"""
|
|
526
|
+
if llm_config.put_inner_thoughts_in_kwargs:
|
|
527
|
+
response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
|
|
528
|
+
"""
|
|
529
|
+
response = convert_deepseek_response_to_chatcompletion(response)
|
|
530
|
+
return response
|
|
531
|
+
|
|
456
532
|
# local model
|
|
457
533
|
else:
|
|
458
534
|
if stream:
|
|
459
535
|
raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
|
|
536
|
+
|
|
537
|
+
if "DeepSeek-R1".lower() in llm_config.model.lower(): # TODO: move this to the llm_config.
|
|
538
|
+
messages[0].content[0].text += f"<available functions> {''.join(json.dumps(f) for f in functions)} </available functions>"
|
|
539
|
+
messages[0].content[
|
|
540
|
+
0
|
|
541
|
+
].text += f'Select best function to call simply by responding with a single json block with the keys "function" and "params". Use double quotes around the arguments.'
|
|
542
|
+
|
|
460
543
|
return get_chat_completion(
|
|
461
544
|
model=llm_config.model,
|
|
462
545
|
messages=messages,
|
letta/llm_api/openai.py
CHANGED
|
@@ -94,6 +94,7 @@ def build_openai_chat_completions_request(
|
|
|
94
94
|
functions: Optional[list],
|
|
95
95
|
function_call: Optional[str],
|
|
96
96
|
use_tool_naming: bool,
|
|
97
|
+
put_inner_thoughts_first: bool = True,
|
|
97
98
|
) -> ChatCompletionRequest:
|
|
98
99
|
if functions and llm_config.put_inner_thoughts_in_kwargs:
|
|
99
100
|
# Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
|
|
@@ -105,6 +106,7 @@ def build_openai_chat_completions_request(
|
|
|
105
106
|
functions=functions,
|
|
106
107
|
inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
|
107
108
|
inner_thoughts_description=inner_thoughts_desc,
|
|
109
|
+
put_inner_thoughts_first=put_inner_thoughts_first,
|
|
108
110
|
)
|
|
109
111
|
|
|
110
112
|
openai_message_list = [
|
|
@@ -166,6 +168,11 @@ def openai_chat_completions_process_stream(
|
|
|
166
168
|
create_message_id: bool = True,
|
|
167
169
|
create_message_datetime: bool = True,
|
|
168
170
|
override_tool_call_id: bool = True,
|
|
171
|
+
# if we expect reasoning content in the response,
|
|
172
|
+
# then we should emit reasoning_content as "inner_thoughts"
|
|
173
|
+
# however, we don't necessarily want to put these
|
|
174
|
+
# expect_reasoning_content: bool = False,
|
|
175
|
+
expect_reasoning_content: bool = True,
|
|
169
176
|
) -> ChatCompletionResponse:
|
|
170
177
|
"""Process a streaming completion response, and return a ChatCompletionRequest at the end.
|
|
171
178
|
|
|
@@ -250,6 +257,7 @@ def openai_chat_completions_process_stream(
|
|
|
250
257
|
chat_completion_chunk,
|
|
251
258
|
message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
|
|
252
259
|
message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
|
|
260
|
+
expect_reasoning_content=expect_reasoning_content,
|
|
253
261
|
)
|
|
254
262
|
elif isinstance(stream_interface, AgentRefreshStreamingInterface):
|
|
255
263
|
stream_interface.process_refresh(chat_completion_response)
|
|
@@ -290,6 +298,13 @@ def openai_chat_completions_process_stream(
|
|
|
290
298
|
else:
|
|
291
299
|
accum_message.content += content_delta
|
|
292
300
|
|
|
301
|
+
if expect_reasoning_content and message_delta.reasoning_content is not None:
|
|
302
|
+
reasoning_content_delta = message_delta.reasoning_content
|
|
303
|
+
if accum_message.reasoning_content is None:
|
|
304
|
+
accum_message.reasoning_content = reasoning_content_delta
|
|
305
|
+
else:
|
|
306
|
+
accum_message.reasoning_content += reasoning_content_delta
|
|
307
|
+
|
|
293
308
|
# TODO(charles) make sure this works for parallel tool calling?
|
|
294
309
|
if message_delta.tool_calls is not None:
|
|
295
310
|
tool_calls_delta = message_delta.tool_calls
|
|
@@ -377,7 +392,7 @@ def openai_chat_completions_process_stream(
|
|
|
377
392
|
chat_completion_response.usage.completion_tokens = n_chunks
|
|
378
393
|
chat_completion_response.usage.total_tokens = prompt_tokens + n_chunks
|
|
379
394
|
|
|
380
|
-
assert len(chat_completion_response.choices) > 0, chat_completion_response
|
|
395
|
+
assert len(chat_completion_response.choices) > 0, f"No response from provider {chat_completion_response}"
|
|
381
396
|
|
|
382
397
|
# printd(chat_completion_response)
|
|
383
398
|
return chat_completion_response
|
|
@@ -14,7 +14,7 @@ from letta.local_llm.grammars.gbnf_grammar_generator import create_dynamic_model
|
|
|
14
14
|
from letta.local_llm.koboldcpp.api import get_koboldcpp_completion
|
|
15
15
|
from letta.local_llm.llamacpp.api import get_llamacpp_completion
|
|
16
16
|
from letta.local_llm.llm_chat_completion_wrappers import simple_summary_wrapper
|
|
17
|
-
from letta.local_llm.lmstudio.api import get_lmstudio_completion
|
|
17
|
+
from letta.local_llm.lmstudio.api import get_lmstudio_completion, get_lmstudio_completion_chatcompletions
|
|
18
18
|
from letta.local_llm.ollama.api import get_ollama_completion
|
|
19
19
|
from letta.local_llm.utils import count_tokens, get_available_wrappers
|
|
20
20
|
from letta.local_llm.vllm.api import get_vllm_completion
|
|
@@ -141,11 +141,24 @@ def get_chat_completion(
|
|
|
141
141
|
f"Failed to convert ChatCompletion messages into prompt string with wrapper {str(llm_wrapper)} - error: {str(e)}"
|
|
142
142
|
)
|
|
143
143
|
|
|
144
|
+
# get the schema for the model
|
|
145
|
+
|
|
146
|
+
"""
|
|
147
|
+
if functions_python is not None:
|
|
148
|
+
model_schema = generate_schema(functions)
|
|
149
|
+
else:
|
|
150
|
+
model_schema = None
|
|
151
|
+
"""
|
|
152
|
+
|
|
153
|
+
# Run the LLM
|
|
144
154
|
try:
|
|
155
|
+
result_reasoning = None
|
|
145
156
|
if endpoint_type == "webui":
|
|
146
157
|
result, usage = get_webui_completion(endpoint, auth_type, auth_key, prompt, context_window, grammar=grammar)
|
|
147
158
|
elif endpoint_type == "webui-legacy":
|
|
148
159
|
result, usage = get_webui_completion_legacy(endpoint, auth_type, auth_key, prompt, context_window, grammar=grammar)
|
|
160
|
+
elif endpoint_type == "lmstudio-chatcompletions":
|
|
161
|
+
result, usage, result_reasoning = get_lmstudio_completion_chatcompletions(endpoint, auth_type, auth_key, model, messages)
|
|
149
162
|
elif endpoint_type == "lmstudio":
|
|
150
163
|
result, usage = get_lmstudio_completion(endpoint, auth_type, auth_key, prompt, context_window, api="completions")
|
|
151
164
|
elif endpoint_type == "lmstudio-legacy":
|
|
@@ -214,7 +227,7 @@ def get_chat_completion(
|
|
|
214
227
|
index=0,
|
|
215
228
|
message=Message(
|
|
216
229
|
role=chat_completion_result["role"],
|
|
217
|
-
content=chat_completion_result["content"],
|
|
230
|
+
content=result_reasoning if result_reasoning is not None else chat_completion_result["content"],
|
|
218
231
|
tool_calls=(
|
|
219
232
|
[ToolCall(id=get_tool_call_id(), type="function", function=chat_completion_result["function_call"])]
|
|
220
233
|
if "function_call" in chat_completion_result
|