letta-nightly 0.11.6.dev20250903104037__py3-none-any.whl → 0.11.7.dev20250904104046__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +10 -14
- letta/agents/base_agent.py +18 -0
- letta/agents/helpers.py +32 -7
- letta/agents/letta_agent.py +953 -762
- letta/agents/voice_agent.py +1 -1
- letta/client/streaming.py +0 -1
- letta/constants.py +11 -8
- letta/errors.py +9 -0
- letta/functions/function_sets/base.py +77 -69
- letta/functions/function_sets/builtin.py +41 -22
- letta/functions/function_sets/multi_agent.py +1 -2
- letta/functions/schema_generator.py +0 -1
- letta/helpers/converters.py +8 -3
- letta/helpers/datetime_helpers.py +5 -4
- letta/helpers/message_helper.py +1 -2
- letta/helpers/pinecone_utils.py +0 -1
- letta/helpers/tool_rule_solver.py +10 -0
- letta/helpers/tpuf_client.py +848 -0
- letta/interface.py +8 -8
- letta/interfaces/anthropic_streaming_interface.py +7 -0
- letta/interfaces/openai_streaming_interface.py +29 -6
- letta/llm_api/anthropic_client.py +188 -18
- letta/llm_api/azure_client.py +0 -1
- letta/llm_api/bedrock_client.py +1 -2
- letta/llm_api/deepseek_client.py +319 -5
- letta/llm_api/google_vertex_client.py +75 -17
- letta/llm_api/groq_client.py +0 -1
- letta/llm_api/helpers.py +2 -2
- letta/llm_api/llm_api_tools.py +1 -50
- letta/llm_api/llm_client.py +6 -8
- letta/llm_api/mistral.py +1 -1
- letta/llm_api/openai.py +16 -13
- letta/llm_api/openai_client.py +31 -16
- letta/llm_api/together_client.py +0 -1
- letta/llm_api/xai_client.py +0 -1
- letta/local_llm/chat_completion_proxy.py +7 -6
- letta/local_llm/settings/settings.py +1 -1
- letta/orm/__init__.py +1 -0
- letta/orm/agent.py +8 -6
- letta/orm/archive.py +9 -1
- letta/orm/block.py +3 -4
- letta/orm/block_history.py +3 -1
- letta/orm/group.py +2 -3
- letta/orm/identity.py +1 -2
- letta/orm/job.py +1 -2
- letta/orm/llm_batch_items.py +1 -2
- letta/orm/message.py +8 -4
- letta/orm/mixins.py +18 -0
- letta/orm/organization.py +2 -0
- letta/orm/passage.py +8 -1
- letta/orm/passage_tag.py +55 -0
- letta/orm/sandbox_config.py +1 -3
- letta/orm/step.py +1 -2
- letta/orm/tool.py +1 -0
- letta/otel/resource.py +2 -2
- letta/plugins/plugins.py +1 -1
- letta/prompts/prompt_generator.py +10 -2
- letta/schemas/agent.py +11 -0
- letta/schemas/archive.py +4 -0
- letta/schemas/block.py +13 -0
- letta/schemas/embedding_config.py +0 -1
- letta/schemas/enums.py +24 -7
- letta/schemas/group.py +12 -0
- letta/schemas/letta_message.py +55 -1
- letta/schemas/letta_message_content.py +28 -0
- letta/schemas/letta_request.py +21 -4
- letta/schemas/letta_stop_reason.py +9 -1
- letta/schemas/llm_config.py +24 -8
- letta/schemas/mcp.py +0 -3
- letta/schemas/memory.py +14 -0
- letta/schemas/message.py +245 -141
- letta/schemas/openai/chat_completion_request.py +2 -1
- letta/schemas/passage.py +1 -0
- letta/schemas/providers/bedrock.py +1 -1
- letta/schemas/providers/openai.py +2 -2
- letta/schemas/tool.py +11 -5
- letta/schemas/tool_execution_result.py +0 -1
- letta/schemas/tool_rule.py +71 -0
- letta/serialize_schemas/marshmallow_agent.py +1 -2
- letta/server/rest_api/app.py +3 -3
- letta/server/rest_api/auth/index.py +0 -1
- letta/server/rest_api/interface.py +3 -11
- letta/server/rest_api/redis_stream_manager.py +3 -4
- letta/server/rest_api/routers/v1/agents.py +143 -84
- letta/server/rest_api/routers/v1/blocks.py +1 -1
- letta/server/rest_api/routers/v1/folders.py +1 -1
- letta/server/rest_api/routers/v1/groups.py +23 -22
- letta/server/rest_api/routers/v1/internal_templates.py +68 -0
- letta/server/rest_api/routers/v1/sandbox_configs.py +11 -5
- letta/server/rest_api/routers/v1/sources.py +1 -1
- letta/server/rest_api/routers/v1/tools.py +167 -15
- letta/server/rest_api/streaming_response.py +4 -3
- letta/server/rest_api/utils.py +75 -18
- letta/server/server.py +24 -35
- letta/services/agent_manager.py +359 -45
- letta/services/agent_serialization_manager.py +23 -3
- letta/services/archive_manager.py +72 -3
- letta/services/block_manager.py +1 -2
- letta/services/context_window_calculator/token_counter.py +11 -6
- letta/services/file_manager.py +1 -3
- letta/services/files_agents_manager.py +2 -4
- letta/services/group_manager.py +73 -12
- letta/services/helpers/agent_manager_helper.py +5 -5
- letta/services/identity_manager.py +8 -3
- letta/services/job_manager.py +2 -14
- letta/services/llm_batch_manager.py +1 -3
- letta/services/mcp/base_client.py +1 -2
- letta/services/mcp_manager.py +5 -6
- letta/services/message_manager.py +536 -15
- letta/services/organization_manager.py +1 -2
- letta/services/passage_manager.py +287 -12
- letta/services/provider_manager.py +1 -3
- letta/services/sandbox_config_manager.py +12 -7
- letta/services/source_manager.py +1 -2
- letta/services/step_manager.py +0 -1
- letta/services/summarizer/summarizer.py +4 -2
- letta/services/telemetry_manager.py +1 -3
- letta/services/tool_executor/builtin_tool_executor.py +136 -316
- letta/services/tool_executor/core_tool_executor.py +231 -74
- letta/services/tool_executor/files_tool_executor.py +2 -2
- letta/services/tool_executor/mcp_tool_executor.py +0 -1
- letta/services/tool_executor/multi_agent_tool_executor.py +2 -2
- letta/services/tool_executor/sandbox_tool_executor.py +0 -1
- letta/services/tool_executor/tool_execution_sandbox.py +2 -3
- letta/services/tool_manager.py +181 -64
- letta/services/tool_sandbox/modal_deployment_manager.py +2 -2
- letta/services/user_manager.py +1 -2
- letta/settings.py +5 -3
- letta/streaming_interface.py +3 -3
- letta/system.py +1 -1
- letta/utils.py +0 -1
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904104046.dist-info}/METADATA +11 -7
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904104046.dist-info}/RECORD +137 -135
- letta/llm_api/deepseek.py +0 -303
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904104046.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904104046.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904104046.dist-info}/licenses/LICENSE +0 -0
letta/llm_api/deepseek_client.py
CHANGED
@@ -1,21 +1,327 @@
|
|
1
|
+
import json
|
1
2
|
import os
|
3
|
+
import re
|
4
|
+
import warnings
|
2
5
|
from typing import List, Optional
|
3
6
|
|
4
7
|
from openai import AsyncOpenAI, AsyncStream, OpenAI
|
5
8
|
from openai.types.chat.chat_completion import ChatCompletion
|
6
9
|
from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
|
7
10
|
|
8
|
-
from letta.llm_api.deepseek import convert_deepseek_response_to_chatcompletion, map_messages_to_deepseek_format
|
9
11
|
from letta.llm_api.openai_client import OpenAIClient
|
10
12
|
from letta.otel.tracing import trace_method
|
11
13
|
from letta.schemas.llm_config import LLMConfig
|
12
14
|
from letta.schemas.message import Message as PydanticMessage
|
15
|
+
from letta.schemas.openai.chat_completion_request import (
|
16
|
+
AssistantMessage,
|
17
|
+
ChatCompletionRequest,
|
18
|
+
ChatMessage,
|
19
|
+
FunctionCall as ToolFunctionChoiceFunctionCall,
|
20
|
+
Tool,
|
21
|
+
ToolFunctionChoice,
|
22
|
+
ToolMessage,
|
23
|
+
UserMessage,
|
24
|
+
cast_message_to_subtype,
|
25
|
+
)
|
13
26
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
27
|
+
from letta.schemas.openai.openai import Function, ToolCall
|
14
28
|
from letta.settings import model_settings
|
29
|
+
from letta.utils import get_tool_call_id
|
15
30
|
|
16
31
|
|
17
|
-
|
32
|
+
def merge_tool_message(previous_message: ChatMessage, tool_message: ToolMessage) -> ChatMessage:
|
33
|
+
"""
|
34
|
+
Merge `ToolMessage` objects into the previous message.
|
35
|
+
"""
|
36
|
+
previous_message.content += (
|
37
|
+
f"<ToolMessage> content: {tool_message.content}, role: {tool_message.role}, tool_call_id: {tool_message.tool_call_id}</ToolMessage>"
|
38
|
+
)
|
39
|
+
return previous_message
|
40
|
+
|
41
|
+
|
42
|
+
def handle_assistant_message(assistant_message: AssistantMessage) -> AssistantMessage:
|
43
|
+
"""
|
44
|
+
For `AssistantMessage` objects, remove the `tool_calls` field and add them to the `content` field.
|
45
|
+
"""
|
46
|
+
|
47
|
+
if "tool_calls" in assistant_message.dict().keys():
|
48
|
+
assistant_message.content = "".join(
|
49
|
+
[
|
50
|
+
# f"<ToolCall> name: {tool_call.function.name}, function: {tool_call.function}</ToolCall>"
|
51
|
+
f"<ToolCall> {json.dumps(tool_call.function.dict())} </ToolCall>"
|
52
|
+
for tool_call in assistant_message.tool_calls
|
53
|
+
]
|
54
|
+
)
|
55
|
+
del assistant_message.tool_calls
|
56
|
+
return assistant_message
|
57
|
+
|
58
|
+
|
59
|
+
def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List[_Message]:
|
60
|
+
"""
|
61
|
+
Deepeek API has the following constraints: messages must be interleaved between user and assistant messages, ending on a user message.
|
62
|
+
Tools are currently unstable for V3 and not supported for R1 in the API: https://api-docs.deepseek.com/guides/function_calling.
|
63
|
+
|
64
|
+
This function merges ToolMessages into AssistantMessages and removes ToolCalls from AssistantMessages, and adds a dummy user message
|
65
|
+
at the end.
|
66
|
+
|
67
|
+
"""
|
68
|
+
deepseek_messages = []
|
69
|
+
for idx, message in enumerate(messages):
|
70
|
+
# First message is the system prompt, add it
|
71
|
+
if idx == 0 and message.role == "system":
|
72
|
+
deepseek_messages.append(message)
|
73
|
+
continue
|
74
|
+
if message.role == "user":
|
75
|
+
if deepseek_messages[-1].role == "assistant" or deepseek_messages[-1].role == "system":
|
76
|
+
# User message, add it
|
77
|
+
deepseek_messages.append(UserMessage(content=message.content))
|
78
|
+
else:
|
79
|
+
# add to the content of the previous message
|
80
|
+
deepseek_messages[-1].content += message.content
|
81
|
+
elif message.role == "assistant":
|
82
|
+
if deepseek_messages[-1].role == "user":
|
83
|
+
# Assistant message, remove tool calls and add them to the content
|
84
|
+
deepseek_messages.append(handle_assistant_message(message))
|
85
|
+
else:
|
86
|
+
# add to the content of the previous message
|
87
|
+
deepseek_messages[-1].content += message.content
|
88
|
+
elif message.role == "tool" and deepseek_messages[-1].role == "assistant":
|
89
|
+
# Tool message, add it to the last assistant message
|
90
|
+
merged_message = merge_tool_message(deepseek_messages[-1], message)
|
91
|
+
deepseek_messages[-1] = merged_message
|
92
|
+
else:
|
93
|
+
print(f"Skipping message: {message}")
|
94
|
+
|
95
|
+
# This needs to end on a user message, add a dummy message if the last was assistant
|
96
|
+
if deepseek_messages[-1].role == "assistant":
|
97
|
+
deepseek_messages.append(UserMessage(content=""))
|
98
|
+
return deepseek_messages
|
99
|
+
|
100
|
+
|
101
|
+
def build_deepseek_chat_completions_request(
|
102
|
+
llm_config: LLMConfig,
|
103
|
+
messages: List[_Message],
|
104
|
+
user_id: Optional[str],
|
105
|
+
functions: Optional[list],
|
106
|
+
function_call: Optional[str],
|
107
|
+
use_tool_naming: bool,
|
108
|
+
max_tokens: Optional[int],
|
109
|
+
) -> ChatCompletionRequest:
|
110
|
+
# if functions and llm_config.put_inner_thoughts_in_kwargs:
|
111
|
+
# # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
|
112
|
+
# # TODO(fix)
|
113
|
+
# inner_thoughts_desc = (
|
114
|
+
# INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST if ":1234" in llm_config.model_endpoint else INNER_THOUGHTS_KWARG_DESCRIPTION
|
115
|
+
# )
|
116
|
+
# functions = add_inner_thoughts_to_functions(
|
117
|
+
# functions=functions,
|
118
|
+
# inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
119
|
+
# inner_thoughts_description=inner_thoughts_desc,
|
120
|
+
# )
|
121
|
+
|
122
|
+
openai_message_list = [
|
123
|
+
cast_message_to_subtype(m) for m in PydanticMessage.to_openai_dicts_from_list(messages, put_inner_thoughts_in_kwargs=False)
|
124
|
+
]
|
125
|
+
|
126
|
+
if llm_config.model:
|
127
|
+
model = llm_config.model
|
128
|
+
else:
|
129
|
+
warnings.warn(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
|
130
|
+
model = None
|
131
|
+
if use_tool_naming:
|
132
|
+
if function_call is None:
|
133
|
+
tool_choice = None
|
134
|
+
elif function_call not in ["none", "auto", "required"]:
|
135
|
+
tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=function_call))
|
136
|
+
else:
|
137
|
+
tool_choice = function_call
|
138
|
+
|
139
|
+
def add_functions_to_system_message(system_message: ChatMessage):
|
140
|
+
system_message.content += f"<available functions> {''.join(json.dumps(f) for f in functions)} </available functions>"
|
141
|
+
system_message.content += 'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.'
|
142
|
+
|
143
|
+
if llm_config.model == "deepseek-reasoner": # R1 currently doesn't support function calling natively
|
144
|
+
add_functions_to_system_message(
|
145
|
+
openai_message_list[0]
|
146
|
+
) # Inject additional instructions to the system prompt with the available functions
|
147
|
+
|
148
|
+
openai_message_list = map_messages_to_deepseek_format(openai_message_list)
|
149
|
+
|
150
|
+
data = ChatCompletionRequest(
|
151
|
+
model=model,
|
152
|
+
messages=openai_message_list,
|
153
|
+
user=str(user_id),
|
154
|
+
max_completion_tokens=max_tokens,
|
155
|
+
temperature=llm_config.temperature,
|
156
|
+
)
|
157
|
+
else:
|
158
|
+
data = ChatCompletionRequest(
|
159
|
+
model=model,
|
160
|
+
messages=openai_message_list,
|
161
|
+
tools=[Tool(type="function", function=f) for f in functions] if functions else None,
|
162
|
+
tool_choice=tool_choice,
|
163
|
+
user=str(user_id),
|
164
|
+
max_completion_tokens=max_tokens,
|
165
|
+
temperature=llm_config.temperature,
|
166
|
+
)
|
167
|
+
else:
|
168
|
+
data = ChatCompletionRequest(
|
169
|
+
model=model,
|
170
|
+
messages=openai_message_list,
|
171
|
+
functions=functions,
|
172
|
+
function_call=function_call,
|
173
|
+
user=str(user_id),
|
174
|
+
max_completion_tokens=max_tokens,
|
175
|
+
temperature=llm_config.temperature,
|
176
|
+
)
|
177
|
+
|
178
|
+
return data
|
179
|
+
|
18
180
|
|
181
|
+
def convert_deepseek_response_to_chatcompletion(
|
182
|
+
response: ChatCompletionResponse,
|
183
|
+
) -> ChatCompletionResponse:
|
184
|
+
"""
|
185
|
+
Example response from DeepSeek (NOTE: as of 8/28/25, deepseek api does populate tool call in response):
|
186
|
+
|
187
|
+
ChatCompletion(
|
188
|
+
id='bc7f7d25-82e4-443a-b217-dfad2b66da8e',
|
189
|
+
choices=[
|
190
|
+
Choice(
|
191
|
+
finish_reason='stop',
|
192
|
+
index=0,
|
193
|
+
logprobs=None,
|
194
|
+
message=ChatCompletionMessage(
|
195
|
+
content='{"function": "send_message", "arguments": {"message": "Hey! Whales are such majestic creatures, aren\'t they? How\'s your day going? 🌊 "}}',
|
196
|
+
refusal=None,
|
197
|
+
role='assistant',
|
198
|
+
audio=None,
|
199
|
+
function_call=None,
|
200
|
+
tool_calls=None,
|
201
|
+
reasoning_content='Okay, the user said "hello whales". Hmm, that\'s an interesting greeting. Maybe they meant "hello there" or are they actually talking about whales? Let me check if I misheard. Whales are fascinating creatures. I should respond in a friendly way. Let me ask them how they\'re doing and mention whales to keep the conversation going.'
|
202
|
+
)
|
203
|
+
)
|
204
|
+
],
|
205
|
+
created=1738266449,
|
206
|
+
model='deepseek-reasoner',
|
207
|
+
object='chat.completion',
|
208
|
+
service_tier=None,
|
209
|
+
system_fingerprint='fp_7e73fd9a08',
|
210
|
+
usage=CompletionUsage(
|
211
|
+
completion_tokens=111,
|
212
|
+
prompt_tokens=1270,
|
213
|
+
total_tokens=1381,
|
214
|
+
completion_tokens_details=CompletionTokensDetails(
|
215
|
+
accepted_prediction_tokens=None,
|
216
|
+
audio_tokens=None,
|
217
|
+
reasoning_tokens=72,
|
218
|
+
rejected_prediction_tokens=None
|
219
|
+
),
|
220
|
+
prompt_tokens_details=PromptTokensDetails(
|
221
|
+
audio_tokens=None,
|
222
|
+
cached_tokens=1088
|
223
|
+
),
|
224
|
+
prompt_cache_hit_tokens=1088,
|
225
|
+
prompt_cache_miss_tokens=182
|
226
|
+
)
|
227
|
+
)
|
228
|
+
"""
|
229
|
+
|
230
|
+
def convert_dict_quotes(input_dict: dict):
|
231
|
+
"""
|
232
|
+
Convert a dictionary with single-quoted keys to double-quoted keys,
|
233
|
+
properly handling boolean values and nested structures.
|
234
|
+
|
235
|
+
Args:
|
236
|
+
input_dict (dict): Input dictionary with single-quoted keys
|
237
|
+
|
238
|
+
Returns:
|
239
|
+
str: JSON string with double-quoted keys
|
240
|
+
"""
|
241
|
+
# First convert the dictionary to a JSON string to handle booleans properly
|
242
|
+
json_str = json.dumps(input_dict)
|
243
|
+
|
244
|
+
# Function to handle complex string replacements
|
245
|
+
def replace_quotes(match):
|
246
|
+
key = match.group(1)
|
247
|
+
# Escape any existing double quotes in the key
|
248
|
+
key = key.replace('"', '\\"')
|
249
|
+
return f'"{key}":'
|
250
|
+
|
251
|
+
# Replace single-quoted keys with double-quoted keys
|
252
|
+
# This regex looks for single-quoted keys followed by a colon
|
253
|
+
def strip_json_block(text):
|
254
|
+
# Check if text starts with ```json or similar
|
255
|
+
if text.strip().startswith("```"):
|
256
|
+
# Split by \n to remove the first and last lines
|
257
|
+
lines = text.split("\n")[1:-1]
|
258
|
+
return "\n".join(lines)
|
259
|
+
return text
|
260
|
+
|
261
|
+
pattern = r"'([^']*)':"
|
262
|
+
converted_str = re.sub(pattern, replace_quotes, strip_json_block(json_str))
|
263
|
+
|
264
|
+
# Parse the string back to ensure valid JSON format
|
265
|
+
try:
|
266
|
+
json.loads(converted_str)
|
267
|
+
return converted_str
|
268
|
+
except json.JSONDecodeError as e:
|
269
|
+
raise ValueError(f"Failed to create valid JSON with double quotes: {str(e)}")
|
270
|
+
|
271
|
+
def extract_json_block(text):
|
272
|
+
# Find the first {
|
273
|
+
start = text.find("{")
|
274
|
+
if start == -1:
|
275
|
+
return text
|
276
|
+
|
277
|
+
# Track nested braces to find the matching closing brace
|
278
|
+
brace_count = 0
|
279
|
+
end = start
|
280
|
+
|
281
|
+
for i in range(start, len(text)):
|
282
|
+
if text[i] == "{":
|
283
|
+
brace_count += 1
|
284
|
+
elif text[i] == "}":
|
285
|
+
brace_count -= 1
|
286
|
+
if brace_count == 0:
|
287
|
+
end = i + 1
|
288
|
+
break
|
289
|
+
|
290
|
+
return text[start:end]
|
291
|
+
|
292
|
+
content = response.choices[0].message.content
|
293
|
+
try:
|
294
|
+
content_dict = json.loads(extract_json_block(content))
|
295
|
+
|
296
|
+
if type(content_dict["arguments"]) == str:
|
297
|
+
content_dict["arguments"] = json.loads(content_dict["arguments"])
|
298
|
+
|
299
|
+
tool_calls = [
|
300
|
+
ToolCall(
|
301
|
+
id=get_tool_call_id(),
|
302
|
+
type="function",
|
303
|
+
function=Function(
|
304
|
+
name=content_dict["name"],
|
305
|
+
arguments=convert_dict_quotes(content_dict["arguments"]),
|
306
|
+
),
|
307
|
+
)
|
308
|
+
]
|
309
|
+
except (json.JSONDecodeError, TypeError, KeyError) as e:
|
310
|
+
print(e)
|
311
|
+
tool_calls = response.choices[0].message.tool_calls
|
312
|
+
raise ValueError(f"Failed to create valid JSON {content}")
|
313
|
+
|
314
|
+
# Move the "reasoning_content" into the "content" field
|
315
|
+
response.choices[0].message.content = response.choices[0].message.reasoning_content
|
316
|
+
response.choices[0].message.tool_calls = tool_calls
|
317
|
+
|
318
|
+
# Remove the "reasoning_content" field
|
319
|
+
response.choices[0].message.reasoning_content = None
|
320
|
+
|
321
|
+
return response
|
322
|
+
|
323
|
+
|
324
|
+
class DeepseekClient(OpenAIClient):
|
19
325
|
def requires_auto_tool_choice(self, llm_config: LLMConfig) -> bool:
|
20
326
|
return False
|
21
327
|
|
@@ -36,15 +342,21 @@ class DeepseekClient(OpenAIClient):
|
|
36
342
|
data = super().build_request_data(messages, llm_config, tools, force_tool_call)
|
37
343
|
|
38
344
|
def add_functions_to_system_message(system_message: ChatMessage):
|
39
|
-
system_message.content += f"<available functions> {''.join(json.dumps(f) for f in
|
345
|
+
system_message.content += f"<available functions> {''.join(json.dumps(f) for f in tools)} </available functions>"
|
40
346
|
system_message.content += 'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.'
|
41
347
|
|
348
|
+
openai_message_list = [
|
349
|
+
cast_message_to_subtype(m) for m in PydanticMessage.to_openai_dicts_from_list(messages, put_inner_thoughts_in_kwargs=False)
|
350
|
+
]
|
351
|
+
|
42
352
|
if llm_config.model == "deepseek-reasoner": # R1 currently doesn't support function calling natively
|
43
353
|
add_functions_to_system_message(
|
44
|
-
|
354
|
+
openai_message_list[0]
|
45
355
|
) # Inject additional instructions to the system prompt with the available functions
|
46
356
|
|
47
|
-
|
357
|
+
openai_message_list = map_messages_to_deepseek_format(openai_message_list)
|
358
|
+
|
359
|
+
data["messages"] = [m.dict() for m in openai_message_list]
|
48
360
|
|
49
361
|
return data
|
50
362
|
|
@@ -94,4 +406,6 @@ class DeepseekClient(OpenAIClient):
|
|
94
406
|
Handles potential extraction of inner thoughts if they were added via kwargs.
|
95
407
|
"""
|
96
408
|
response = ChatCompletionResponse(**response_data)
|
409
|
+
if response.choices[0].message.tool_calls:
|
410
|
+
return super().convert_response_to_chat_completion(response_data, input_messages, llm_config)
|
97
411
|
return convert_deepseek_response_to_chatcompletion(response)
|
@@ -3,6 +3,7 @@ import uuid
|
|
3
3
|
from typing import List, Optional
|
4
4
|
|
5
5
|
from google import genai
|
6
|
+
from google.genai import errors
|
6
7
|
from google.genai.types import (
|
7
8
|
FunctionCallingConfig,
|
8
9
|
FunctionCallingConfigMode,
|
@@ -31,6 +32,7 @@ logger = get_logger(__name__)
|
|
31
32
|
|
32
33
|
|
33
34
|
class GoogleVertexClient(LLMClientBase):
|
35
|
+
MAX_RETRIES = model_settings.gemini_max_retries
|
34
36
|
|
35
37
|
def _get_client(self):
|
36
38
|
timeout_ms = int(settings.llm_request_timeout_seconds * 1000)
|
@@ -60,12 +62,59 @@ class GoogleVertexClient(LLMClientBase):
|
|
60
62
|
Performs underlying request to llm and returns raw response.
|
61
63
|
"""
|
62
64
|
client = self._get_client()
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
65
|
+
|
66
|
+
# Gemini 2.5 models will often return MALFORMED_FUNCTION_CALL, force a retry
|
67
|
+
# https://github.com/googleapis/python-aiplatform/issues/4472
|
68
|
+
retry_count = 1
|
69
|
+
should_retry = True
|
70
|
+
while should_retry and retry_count <= self.MAX_RETRIES:
|
71
|
+
try:
|
72
|
+
response = await client.aio.models.generate_content(
|
73
|
+
model=llm_config.model,
|
74
|
+
contents=request_data["contents"],
|
75
|
+
config=request_data["config"],
|
76
|
+
)
|
77
|
+
except errors.APIError as e:
|
78
|
+
# Retry on 503 and 500 errors as well, usually ephemeral from Gemini
|
79
|
+
if e.code == 503 or e.code == 500:
|
80
|
+
logger.warning(f"Received {e}, retrying {retry_count}/{self.MAX_RETRIES}")
|
81
|
+
retry_count += 1
|
82
|
+
continue
|
83
|
+
raise e
|
84
|
+
except Exception as e:
|
85
|
+
raise e
|
86
|
+
response_data = response.model_dump()
|
87
|
+
is_malformed_function_call = self.is_malformed_function_call(response_data)
|
88
|
+
if is_malformed_function_call:
|
89
|
+
logger.warning(
|
90
|
+
f"Received FinishReason.MALFORMED_FUNCTION_CALL in response for {llm_config.model}, retrying {retry_count}/{self.MAX_RETRIES}"
|
91
|
+
)
|
92
|
+
# Modify the last message if it's a heartbeat to include warning about special characters
|
93
|
+
if request_data["contents"] and len(request_data["contents"]) > 0:
|
94
|
+
last_message = request_data["contents"][-1]
|
95
|
+
if last_message.get("role") == "user" and last_message.get("parts"):
|
96
|
+
for part in last_message["parts"]:
|
97
|
+
if "text" in part:
|
98
|
+
try:
|
99
|
+
# Try to parse as JSON to check if it's a heartbeat
|
100
|
+
message_json = json_loads(part["text"])
|
101
|
+
if message_json.get("type") == "heartbeat" and "reason" in message_json:
|
102
|
+
# Append warning to the reason
|
103
|
+
warning = f" RETRY {retry_count}/{self.MAX_RETRIES} ***DO NOT USE SPECIAL CHARACTERS OR QUOTATIONS INSIDE FUNCTION CALL ARGUMENTS. IF YOU MUST, MAKE SURE TO ESCAPE THEM PROPERLY***"
|
104
|
+
message_json["reason"] = message_json["reason"] + warning
|
105
|
+
# Update the text with modified JSON
|
106
|
+
part["text"] = json_dumps(message_json)
|
107
|
+
logger.warning(
|
108
|
+
f"Modified heartbeat message with special character warning for retry {retry_count}/{self.MAX_RETRIES}"
|
109
|
+
)
|
110
|
+
except (json.JSONDecodeError, TypeError):
|
111
|
+
# Not a JSON message or not a heartbeat, skip modification
|
112
|
+
pass
|
113
|
+
|
114
|
+
should_retry = is_malformed_function_call
|
115
|
+
retry_count += 1
|
116
|
+
|
117
|
+
return response_data
|
69
118
|
|
70
119
|
@staticmethod
|
71
120
|
def add_dummy_model_messages(messages: List[dict]) -> List[dict]:
|
@@ -230,10 +279,12 @@ class GoogleVertexClient(LLMClientBase):
|
|
230
279
|
"contents": contents,
|
231
280
|
"config": {
|
232
281
|
"temperature": llm_config.temperature,
|
233
|
-
"max_output_tokens": llm_config.max_tokens,
|
234
282
|
"tools": formatted_tools,
|
235
283
|
},
|
236
284
|
}
|
285
|
+
# Make tokens is optional
|
286
|
+
if llm_config.max_tokens:
|
287
|
+
request_data["config"]["max_output_tokens"] = llm_config.max_tokens
|
237
288
|
|
238
289
|
if len(tool_names) == 1 and settings.use_vertex_structured_outputs_experimental:
|
239
290
|
request_data["config"]["response_mime_type"] = "application/json"
|
@@ -298,7 +349,6 @@ class GoogleVertexClient(LLMClientBase):
|
|
298
349
|
}
|
299
350
|
}
|
300
351
|
"""
|
301
|
-
|
302
352
|
response = GenerateContentResponse(**response_data)
|
303
353
|
try:
|
304
354
|
choices = []
|
@@ -310,7 +360,7 @@ class GoogleVertexClient(LLMClientBase):
|
|
310
360
|
# This means the response is malformed like MALFORMED_FUNCTION_CALL
|
311
361
|
# NOTE: must be a ValueError to trigger a retry
|
312
362
|
if candidate.finish_reason == "MALFORMED_FUNCTION_CALL":
|
313
|
-
raise ValueError(f"Error in response data from LLM: {candidate.finish_reason}
|
363
|
+
raise ValueError(f"Error in response data from LLM: {candidate.finish_reason}")
|
314
364
|
else:
|
315
365
|
raise ValueError(f"Error in response data from LLM: {candidate.model_dump()}")
|
316
366
|
|
@@ -344,9 +394,9 @@ class GoogleVertexClient(LLMClientBase):
|
|
344
394
|
if llm_config.put_inner_thoughts_in_kwargs:
|
345
395
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG_VERTEX
|
346
396
|
|
347
|
-
assert (
|
348
|
-
|
349
|
-
)
|
397
|
+
assert INNER_THOUGHTS_KWARG_VERTEX in function_args, (
|
398
|
+
f"Couldn't find inner thoughts in function args:\n{function_call}"
|
399
|
+
)
|
350
400
|
inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG_VERTEX)
|
351
401
|
assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
|
352
402
|
else:
|
@@ -380,9 +430,9 @@ class GoogleVertexClient(LLMClientBase):
|
|
380
430
|
if llm_config.put_inner_thoughts_in_kwargs:
|
381
431
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG_VERTEX
|
382
432
|
|
383
|
-
assert (
|
384
|
-
|
385
|
-
)
|
433
|
+
assert INNER_THOUGHTS_KWARG_VERTEX in function_args, (
|
434
|
+
f"Couldn't find inner thoughts in function args:\n{function_call}"
|
435
|
+
)
|
386
436
|
inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG_VERTEX)
|
387
437
|
assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
|
388
438
|
else:
|
@@ -406,7 +456,7 @@ class GoogleVertexClient(LLMClientBase):
|
|
406
456
|
|
407
457
|
except json.decoder.JSONDecodeError:
|
408
458
|
if candidate.finish_reason == "MAX_TOKENS":
|
409
|
-
raise ValueError(
|
459
|
+
raise ValueError("Could not parse response data from LLM: exceeded max token limit")
|
410
460
|
# Inner thoughts are the content by default
|
411
461
|
inner_thoughts = response_message.text
|
412
462
|
|
@@ -463,7 +513,7 @@ class GoogleVertexClient(LLMClientBase):
|
|
463
513
|
)
|
464
514
|
else:
|
465
515
|
# Count it ourselves
|
466
|
-
assert input_messages is not None,
|
516
|
+
assert input_messages is not None, "Didn't get UsageMetadata from the API response, so input_messages is required"
|
467
517
|
prompt_tokens = count_tokens(json_dumps(input_messages)) # NOTE: this is a very rough approximation
|
468
518
|
completion_tokens = count_tokens(json_dumps(openai_response_message.model_dump())) # NOTE: this is also approximate
|
469
519
|
total_tokens = prompt_tokens + completion_tokens
|
@@ -516,6 +566,14 @@ class GoogleVertexClient(LLMClientBase):
|
|
516
566
|
def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
|
517
567
|
return llm_config.model.startswith("gemini-2.5-flash") or llm_config.model.startswith("gemini-2.5-pro")
|
518
568
|
|
569
|
+
def is_malformed_function_call(self, response_data: dict) -> dict:
|
570
|
+
response = GenerateContentResponse(**response_data)
|
571
|
+
for candidate in response.candidates:
|
572
|
+
content = candidate.content
|
573
|
+
if content is None or content.role is None or content.parts is None:
|
574
|
+
return candidate.finish_reason == "MALFORMED_FUNCTION_CALL"
|
575
|
+
return False
|
576
|
+
|
519
577
|
@trace_method
|
520
578
|
def handle_llm_error(self, e: Exception) -> Exception:
|
521
579
|
# Fallback to base implementation
|
letta/llm_api/groq_client.py
CHANGED
letta/llm_api/helpers.py
CHANGED
@@ -310,7 +310,7 @@ def calculate_summarizer_cutoff(in_context_messages: List[Message], token_counts
|
|
310
310
|
f"Given in_context_messages has different length from given token_counts: {len(in_context_messages)} != {len(token_counts)}"
|
311
311
|
)
|
312
312
|
|
313
|
-
in_context_messages_openai =
|
313
|
+
in_context_messages_openai = Message.to_openai_dicts_from_list(in_context_messages)
|
314
314
|
|
315
315
|
if summarizer_settings.evict_all_messages:
|
316
316
|
logger.info("Evicting all messages...")
|
@@ -351,7 +351,7 @@ def calculate_summarizer_cutoff(in_context_messages: List[Message], token_counts
|
|
351
351
|
|
352
352
|
|
353
353
|
def get_token_counts_for_messages(in_context_messages: List[Message]) -> List[int]:
|
354
|
-
in_context_messages_openai =
|
354
|
+
in_context_messages_openai = Message.to_openai_dicts_from_list(in_context_messages)
|
355
355
|
token_counts = [count_tokens(str(msg)) for msg in in_context_messages_openai]
|
356
356
|
return token_counts
|
357
357
|
|
letta/llm_api/llm_api_tools.py
CHANGED
@@ -7,7 +7,6 @@ import requests
|
|
7
7
|
|
8
8
|
from letta.constants import CLI_WARNING_PREFIX
|
9
9
|
from letta.errors import LettaConfigurationError, RateLimitExceededError
|
10
|
-
from letta.llm_api.deepseek import build_deepseek_chat_completions_request, convert_deepseek_response_to_chatcompletion
|
11
10
|
from letta.llm_api.helpers import unpack_all_inner_thoughts_from_kwargs
|
12
11
|
from letta.llm_api.openai import (
|
13
12
|
build_openai_chat_completions_request,
|
@@ -146,7 +145,7 @@ def create(
|
|
146
145
|
|
147
146
|
# Count the tokens first, if there's an overflow exit early by throwing an error up the stack
|
148
147
|
# NOTE: we want to include a specific substring in the error message to trigger summarization
|
149
|
-
messages_oai_format =
|
148
|
+
messages_oai_format = Message.to_openai_dicts_from_list(messages)
|
150
149
|
prompt_tokens = num_tokens_from_messages(messages=messages_oai_format, model=llm_config.model)
|
151
150
|
function_tokens = num_tokens_from_functions(functions=functions, model=llm_config.model) if functions else 0
|
152
151
|
if prompt_tokens + function_tokens > llm_config.context_window:
|
@@ -245,54 +244,6 @@ def create(
|
|
245
244
|
|
246
245
|
return response
|
247
246
|
|
248
|
-
elif llm_config.model_endpoint_type == "deepseek":
|
249
|
-
if model_settings.deepseek_api_key is None and llm_config.model_endpoint == "":
|
250
|
-
# only is a problem if we are *not* using an openai proxy
|
251
|
-
raise LettaConfigurationError(message="DeepSeek key is missing from letta config file", missing_fields=["deepseek_api_key"])
|
252
|
-
|
253
|
-
data = build_deepseek_chat_completions_request(
|
254
|
-
llm_config,
|
255
|
-
messages,
|
256
|
-
user_id,
|
257
|
-
functions,
|
258
|
-
function_call,
|
259
|
-
use_tool_naming,
|
260
|
-
llm_config.max_tokens,
|
261
|
-
)
|
262
|
-
if stream: # Client requested token streaming
|
263
|
-
data.stream = True
|
264
|
-
assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
|
265
|
-
stream_interface, AgentRefreshStreamingInterface
|
266
|
-
), type(stream_interface)
|
267
|
-
response = openai_chat_completions_process_stream(
|
268
|
-
url=llm_config.model_endpoint,
|
269
|
-
api_key=model_settings.deepseek_api_key,
|
270
|
-
chat_completion_request=data,
|
271
|
-
stream_interface=stream_interface,
|
272
|
-
name=name,
|
273
|
-
# TODO should we toggle for R1 vs V3?
|
274
|
-
expect_reasoning_content=True,
|
275
|
-
)
|
276
|
-
else: # Client did not request token streaming (expect a blocking backend response)
|
277
|
-
data.stream = False
|
278
|
-
if isinstance(stream_interface, AgentChunkStreamingInterface):
|
279
|
-
stream_interface.stream_start()
|
280
|
-
try:
|
281
|
-
response = openai_chat_completions_request(
|
282
|
-
url=llm_config.model_endpoint,
|
283
|
-
api_key=model_settings.deepseek_api_key,
|
284
|
-
chat_completion_request=data,
|
285
|
-
)
|
286
|
-
finally:
|
287
|
-
if isinstance(stream_interface, AgentChunkStreamingInterface):
|
288
|
-
stream_interface.stream_end()
|
289
|
-
"""
|
290
|
-
if llm_config.put_inner_thoughts_in_kwargs:
|
291
|
-
response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
|
292
|
-
"""
|
293
|
-
response = convert_deepseek_response_to_chatcompletion(response)
|
294
|
-
return response
|
295
|
-
|
296
247
|
# local model
|
297
248
|
else:
|
298
249
|
if stream:
|
letta/llm_api/llm_client.py
CHANGED
@@ -58,13 +58,6 @@ class LLMClient:
|
|
58
58
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
59
59
|
actor=actor,
|
60
60
|
)
|
61
|
-
case ProviderType.openai | ProviderType.ollama:
|
62
|
-
from letta.llm_api.openai_client import OpenAIClient
|
63
|
-
|
64
|
-
return OpenAIClient(
|
65
|
-
put_inner_thoughts_first=put_inner_thoughts_first,
|
66
|
-
actor=actor,
|
67
|
-
)
|
68
61
|
case ProviderType.together:
|
69
62
|
from letta.llm_api.together_client import TogetherClient
|
70
63
|
|
@@ -101,4 +94,9 @@ class LLMClient:
|
|
101
94
|
actor=actor,
|
102
95
|
)
|
103
96
|
case _:
|
104
|
-
|
97
|
+
from letta.llm_api.openai_client import OpenAIClient
|
98
|
+
|
99
|
+
return OpenAIClient(
|
100
|
+
put_inner_thoughts_first=put_inner_thoughts_first,
|
101
|
+
actor=actor,
|
102
|
+
)
|
letta/llm_api/mistral.py
CHANGED
@@ -13,7 +13,7 @@ async def mistral_get_model_list_async(url: str, api_key: str) -> dict:
|
|
13
13
|
if api_key is not None:
|
14
14
|
headers["Authorization"] = f"Bearer {api_key}"
|
15
15
|
|
16
|
-
logger.debug(
|
16
|
+
logger.debug("Sending request to %s", url)
|
17
17
|
|
18
18
|
async with aiohttp.ClientSession() as session:
|
19
19
|
# TODO add query param "tool" to be true
|