letta-nightly 0.11.6.dev20250902104140__py3-none-any.whl → 0.11.7.dev20250904045700__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +10 -14
  3. letta/agents/base_agent.py +18 -0
  4. letta/agents/helpers.py +32 -7
  5. letta/agents/letta_agent.py +953 -762
  6. letta/agents/voice_agent.py +1 -1
  7. letta/client/streaming.py +0 -1
  8. letta/constants.py +11 -8
  9. letta/errors.py +9 -0
  10. letta/functions/function_sets/base.py +77 -69
  11. letta/functions/function_sets/builtin.py +41 -22
  12. letta/functions/function_sets/multi_agent.py +1 -2
  13. letta/functions/schema_generator.py +0 -1
  14. letta/helpers/converters.py +8 -3
  15. letta/helpers/datetime_helpers.py +5 -4
  16. letta/helpers/message_helper.py +1 -2
  17. letta/helpers/pinecone_utils.py +0 -1
  18. letta/helpers/tool_rule_solver.py +10 -0
  19. letta/helpers/tpuf_client.py +848 -0
  20. letta/interface.py +8 -8
  21. letta/interfaces/anthropic_streaming_interface.py +7 -0
  22. letta/interfaces/openai_streaming_interface.py +29 -6
  23. letta/llm_api/anthropic_client.py +188 -18
  24. letta/llm_api/azure_client.py +0 -1
  25. letta/llm_api/bedrock_client.py +1 -2
  26. letta/llm_api/deepseek_client.py +319 -5
  27. letta/llm_api/google_vertex_client.py +75 -17
  28. letta/llm_api/groq_client.py +0 -1
  29. letta/llm_api/helpers.py +2 -2
  30. letta/llm_api/llm_api_tools.py +1 -50
  31. letta/llm_api/llm_client.py +6 -8
  32. letta/llm_api/mistral.py +1 -1
  33. letta/llm_api/openai.py +16 -13
  34. letta/llm_api/openai_client.py +31 -16
  35. letta/llm_api/together_client.py +0 -1
  36. letta/llm_api/xai_client.py +0 -1
  37. letta/local_llm/chat_completion_proxy.py +7 -6
  38. letta/local_llm/settings/settings.py +1 -1
  39. letta/orm/__init__.py +1 -0
  40. letta/orm/agent.py +8 -6
  41. letta/orm/archive.py +9 -1
  42. letta/orm/block.py +3 -4
  43. letta/orm/block_history.py +3 -1
  44. letta/orm/group.py +2 -3
  45. letta/orm/identity.py +1 -2
  46. letta/orm/job.py +1 -2
  47. letta/orm/llm_batch_items.py +1 -2
  48. letta/orm/message.py +8 -4
  49. letta/orm/mixins.py +18 -0
  50. letta/orm/organization.py +2 -0
  51. letta/orm/passage.py +8 -1
  52. letta/orm/passage_tag.py +55 -0
  53. letta/orm/sandbox_config.py +1 -3
  54. letta/orm/step.py +1 -2
  55. letta/orm/tool.py +1 -0
  56. letta/otel/resource.py +2 -2
  57. letta/plugins/plugins.py +1 -1
  58. letta/prompts/prompt_generator.py +10 -2
  59. letta/schemas/agent.py +11 -0
  60. letta/schemas/archive.py +4 -0
  61. letta/schemas/block.py +13 -0
  62. letta/schemas/embedding_config.py +0 -1
  63. letta/schemas/enums.py +24 -7
  64. letta/schemas/group.py +12 -0
  65. letta/schemas/letta_message.py +55 -1
  66. letta/schemas/letta_message_content.py +28 -0
  67. letta/schemas/letta_request.py +21 -4
  68. letta/schemas/letta_stop_reason.py +9 -1
  69. letta/schemas/llm_config.py +24 -8
  70. letta/schemas/mcp.py +0 -3
  71. letta/schemas/memory.py +14 -0
  72. letta/schemas/message.py +245 -141
  73. letta/schemas/openai/chat_completion_request.py +2 -1
  74. letta/schemas/passage.py +1 -0
  75. letta/schemas/providers/bedrock.py +1 -1
  76. letta/schemas/providers/openai.py +2 -2
  77. letta/schemas/tool.py +11 -5
  78. letta/schemas/tool_execution_result.py +0 -1
  79. letta/schemas/tool_rule.py +71 -0
  80. letta/serialize_schemas/marshmallow_agent.py +1 -2
  81. letta/server/rest_api/app.py +3 -3
  82. letta/server/rest_api/auth/index.py +0 -1
  83. letta/server/rest_api/interface.py +3 -11
  84. letta/server/rest_api/redis_stream_manager.py +3 -4
  85. letta/server/rest_api/routers/v1/agents.py +143 -84
  86. letta/server/rest_api/routers/v1/blocks.py +1 -1
  87. letta/server/rest_api/routers/v1/folders.py +1 -1
  88. letta/server/rest_api/routers/v1/groups.py +23 -22
  89. letta/server/rest_api/routers/v1/internal_templates.py +68 -0
  90. letta/server/rest_api/routers/v1/sandbox_configs.py +11 -5
  91. letta/server/rest_api/routers/v1/sources.py +1 -1
  92. letta/server/rest_api/routers/v1/tools.py +167 -15
  93. letta/server/rest_api/streaming_response.py +4 -3
  94. letta/server/rest_api/utils.py +75 -18
  95. letta/server/server.py +24 -35
  96. letta/services/agent_manager.py +359 -45
  97. letta/services/agent_serialization_manager.py +23 -3
  98. letta/services/archive_manager.py +72 -3
  99. letta/services/block_manager.py +1 -2
  100. letta/services/context_window_calculator/token_counter.py +11 -6
  101. letta/services/file_manager.py +1 -3
  102. letta/services/files_agents_manager.py +2 -4
  103. letta/services/group_manager.py +73 -12
  104. letta/services/helpers/agent_manager_helper.py +5 -5
  105. letta/services/identity_manager.py +8 -3
  106. letta/services/job_manager.py +2 -14
  107. letta/services/llm_batch_manager.py +1 -3
  108. letta/services/mcp/base_client.py +1 -2
  109. letta/services/mcp_manager.py +5 -6
  110. letta/services/message_manager.py +536 -15
  111. letta/services/organization_manager.py +1 -2
  112. letta/services/passage_manager.py +287 -12
  113. letta/services/provider_manager.py +1 -3
  114. letta/services/sandbox_config_manager.py +12 -7
  115. letta/services/source_manager.py +1 -2
  116. letta/services/step_manager.py +0 -1
  117. letta/services/summarizer/summarizer.py +4 -2
  118. letta/services/telemetry_manager.py +1 -3
  119. letta/services/tool_executor/builtin_tool_executor.py +136 -316
  120. letta/services/tool_executor/core_tool_executor.py +231 -74
  121. letta/services/tool_executor/files_tool_executor.py +2 -2
  122. letta/services/tool_executor/mcp_tool_executor.py +0 -1
  123. letta/services/tool_executor/multi_agent_tool_executor.py +2 -2
  124. letta/services/tool_executor/sandbox_tool_executor.py +0 -1
  125. letta/services/tool_executor/tool_execution_sandbox.py +2 -3
  126. letta/services/tool_manager.py +181 -64
  127. letta/services/tool_sandbox/modal_deployment_manager.py +2 -2
  128. letta/services/user_manager.py +1 -2
  129. letta/settings.py +5 -3
  130. letta/streaming_interface.py +3 -3
  131. letta/system.py +1 -1
  132. letta/utils.py +0 -1
  133. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/METADATA +11 -7
  134. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/RECORD +137 -135
  135. letta/llm_api/deepseek.py +0 -303
  136. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/WHEEL +0 -0
  137. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/entry_points.txt +0 -0
  138. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/licenses/LICENSE +0 -0
@@ -1,21 +1,327 @@
1
+ import json
1
2
  import os
3
+ import re
4
+ import warnings
2
5
  from typing import List, Optional
3
6
 
4
7
  from openai import AsyncOpenAI, AsyncStream, OpenAI
5
8
  from openai.types.chat.chat_completion import ChatCompletion
6
9
  from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
7
10
 
8
- from letta.llm_api.deepseek import convert_deepseek_response_to_chatcompletion, map_messages_to_deepseek_format
9
11
  from letta.llm_api.openai_client import OpenAIClient
10
12
  from letta.otel.tracing import trace_method
11
13
  from letta.schemas.llm_config import LLMConfig
12
14
  from letta.schemas.message import Message as PydanticMessage
15
+ from letta.schemas.openai.chat_completion_request import (
16
+ AssistantMessage,
17
+ ChatCompletionRequest,
18
+ ChatMessage,
19
+ FunctionCall as ToolFunctionChoiceFunctionCall,
20
+ Tool,
21
+ ToolFunctionChoice,
22
+ ToolMessage,
23
+ UserMessage,
24
+ cast_message_to_subtype,
25
+ )
13
26
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
27
+ from letta.schemas.openai.openai import Function, ToolCall
14
28
  from letta.settings import model_settings
29
+ from letta.utils import get_tool_call_id
15
30
 
16
31
 
17
- class DeepseekClient(OpenAIClient):
32
+ def merge_tool_message(previous_message: ChatMessage, tool_message: ToolMessage) -> ChatMessage:
33
+ """
34
+ Merge `ToolMessage` objects into the previous message.
35
+ """
36
+ previous_message.content += (
37
+ f"<ToolMessage> content: {tool_message.content}, role: {tool_message.role}, tool_call_id: {tool_message.tool_call_id}</ToolMessage>"
38
+ )
39
+ return previous_message
40
+
41
+
42
+ def handle_assistant_message(assistant_message: AssistantMessage) -> AssistantMessage:
43
+ """
44
+ For `AssistantMessage` objects, remove the `tool_calls` field and add them to the `content` field.
45
+ """
46
+
47
+ if "tool_calls" in assistant_message.dict().keys():
48
+ assistant_message.content = "".join(
49
+ [
50
+ # f"<ToolCall> name: {tool_call.function.name}, function: {tool_call.function}</ToolCall>"
51
+ f"<ToolCall> {json.dumps(tool_call.function.dict())} </ToolCall>"
52
+ for tool_call in assistant_message.tool_calls
53
+ ]
54
+ )
55
+ del assistant_message.tool_calls
56
+ return assistant_message
57
+
58
+
59
+ def map_messages_to_deepseek_format(messages: List[ChatMessage]) -> List[_Message]:
60
+ """
61
+ Deepeek API has the following constraints: messages must be interleaved between user and assistant messages, ending on a user message.
62
+ Tools are currently unstable for V3 and not supported for R1 in the API: https://api-docs.deepseek.com/guides/function_calling.
63
+
64
+ This function merges ToolMessages into AssistantMessages and removes ToolCalls from AssistantMessages, and adds a dummy user message
65
+ at the end.
66
+
67
+ """
68
+ deepseek_messages = []
69
+ for idx, message in enumerate(messages):
70
+ # First message is the system prompt, add it
71
+ if idx == 0 and message.role == "system":
72
+ deepseek_messages.append(message)
73
+ continue
74
+ if message.role == "user":
75
+ if deepseek_messages[-1].role == "assistant" or deepseek_messages[-1].role == "system":
76
+ # User message, add it
77
+ deepseek_messages.append(UserMessage(content=message.content))
78
+ else:
79
+ # add to the content of the previous message
80
+ deepseek_messages[-1].content += message.content
81
+ elif message.role == "assistant":
82
+ if deepseek_messages[-1].role == "user":
83
+ # Assistant message, remove tool calls and add them to the content
84
+ deepseek_messages.append(handle_assistant_message(message))
85
+ else:
86
+ # add to the content of the previous message
87
+ deepseek_messages[-1].content += message.content
88
+ elif message.role == "tool" and deepseek_messages[-1].role == "assistant":
89
+ # Tool message, add it to the last assistant message
90
+ merged_message = merge_tool_message(deepseek_messages[-1], message)
91
+ deepseek_messages[-1] = merged_message
92
+ else:
93
+ print(f"Skipping message: {message}")
94
+
95
+ # This needs to end on a user message, add a dummy message if the last was assistant
96
+ if deepseek_messages[-1].role == "assistant":
97
+ deepseek_messages.append(UserMessage(content=""))
98
+ return deepseek_messages
99
+
100
+
101
+ def build_deepseek_chat_completions_request(
102
+ llm_config: LLMConfig,
103
+ messages: List[_Message],
104
+ user_id: Optional[str],
105
+ functions: Optional[list],
106
+ function_call: Optional[str],
107
+ use_tool_naming: bool,
108
+ max_tokens: Optional[int],
109
+ ) -> ChatCompletionRequest:
110
+ # if functions and llm_config.put_inner_thoughts_in_kwargs:
111
+ # # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
112
+ # # TODO(fix)
113
+ # inner_thoughts_desc = (
114
+ # INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST if ":1234" in llm_config.model_endpoint else INNER_THOUGHTS_KWARG_DESCRIPTION
115
+ # )
116
+ # functions = add_inner_thoughts_to_functions(
117
+ # functions=functions,
118
+ # inner_thoughts_key=INNER_THOUGHTS_KWARG,
119
+ # inner_thoughts_description=inner_thoughts_desc,
120
+ # )
121
+
122
+ openai_message_list = [
123
+ cast_message_to_subtype(m) for m in PydanticMessage.to_openai_dicts_from_list(messages, put_inner_thoughts_in_kwargs=False)
124
+ ]
125
+
126
+ if llm_config.model:
127
+ model = llm_config.model
128
+ else:
129
+ warnings.warn(f"Model type not set in llm_config: {llm_config.model_dump_json(indent=4)}")
130
+ model = None
131
+ if use_tool_naming:
132
+ if function_call is None:
133
+ tool_choice = None
134
+ elif function_call not in ["none", "auto", "required"]:
135
+ tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=function_call))
136
+ else:
137
+ tool_choice = function_call
138
+
139
+ def add_functions_to_system_message(system_message: ChatMessage):
140
+ system_message.content += f"<available functions> {''.join(json.dumps(f) for f in functions)} </available functions>"
141
+ system_message.content += 'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.'
142
+
143
+ if llm_config.model == "deepseek-reasoner": # R1 currently doesn't support function calling natively
144
+ add_functions_to_system_message(
145
+ openai_message_list[0]
146
+ ) # Inject additional instructions to the system prompt with the available functions
147
+
148
+ openai_message_list = map_messages_to_deepseek_format(openai_message_list)
149
+
150
+ data = ChatCompletionRequest(
151
+ model=model,
152
+ messages=openai_message_list,
153
+ user=str(user_id),
154
+ max_completion_tokens=max_tokens,
155
+ temperature=llm_config.temperature,
156
+ )
157
+ else:
158
+ data = ChatCompletionRequest(
159
+ model=model,
160
+ messages=openai_message_list,
161
+ tools=[Tool(type="function", function=f) for f in functions] if functions else None,
162
+ tool_choice=tool_choice,
163
+ user=str(user_id),
164
+ max_completion_tokens=max_tokens,
165
+ temperature=llm_config.temperature,
166
+ )
167
+ else:
168
+ data = ChatCompletionRequest(
169
+ model=model,
170
+ messages=openai_message_list,
171
+ functions=functions,
172
+ function_call=function_call,
173
+ user=str(user_id),
174
+ max_completion_tokens=max_tokens,
175
+ temperature=llm_config.temperature,
176
+ )
177
+
178
+ return data
179
+
18
180
 
181
+ def convert_deepseek_response_to_chatcompletion(
182
+ response: ChatCompletionResponse,
183
+ ) -> ChatCompletionResponse:
184
+ """
185
+ Example response from DeepSeek (NOTE: as of 8/28/25, deepseek api does populate tool call in response):
186
+
187
+ ChatCompletion(
188
+ id='bc7f7d25-82e4-443a-b217-dfad2b66da8e',
189
+ choices=[
190
+ Choice(
191
+ finish_reason='stop',
192
+ index=0,
193
+ logprobs=None,
194
+ message=ChatCompletionMessage(
195
+ content='{"function": "send_message", "arguments": {"message": "Hey! Whales are such majestic creatures, aren\'t they? How\'s your day going? 🌊 "}}',
196
+ refusal=None,
197
+ role='assistant',
198
+ audio=None,
199
+ function_call=None,
200
+ tool_calls=None,
201
+ reasoning_content='Okay, the user said "hello whales". Hmm, that\'s an interesting greeting. Maybe they meant "hello there" or are they actually talking about whales? Let me check if I misheard. Whales are fascinating creatures. I should respond in a friendly way. Let me ask them how they\'re doing and mention whales to keep the conversation going.'
202
+ )
203
+ )
204
+ ],
205
+ created=1738266449,
206
+ model='deepseek-reasoner',
207
+ object='chat.completion',
208
+ service_tier=None,
209
+ system_fingerprint='fp_7e73fd9a08',
210
+ usage=CompletionUsage(
211
+ completion_tokens=111,
212
+ prompt_tokens=1270,
213
+ total_tokens=1381,
214
+ completion_tokens_details=CompletionTokensDetails(
215
+ accepted_prediction_tokens=None,
216
+ audio_tokens=None,
217
+ reasoning_tokens=72,
218
+ rejected_prediction_tokens=None
219
+ ),
220
+ prompt_tokens_details=PromptTokensDetails(
221
+ audio_tokens=None,
222
+ cached_tokens=1088
223
+ ),
224
+ prompt_cache_hit_tokens=1088,
225
+ prompt_cache_miss_tokens=182
226
+ )
227
+ )
228
+ """
229
+
230
+ def convert_dict_quotes(input_dict: dict):
231
+ """
232
+ Convert a dictionary with single-quoted keys to double-quoted keys,
233
+ properly handling boolean values and nested structures.
234
+
235
+ Args:
236
+ input_dict (dict): Input dictionary with single-quoted keys
237
+
238
+ Returns:
239
+ str: JSON string with double-quoted keys
240
+ """
241
+ # First convert the dictionary to a JSON string to handle booleans properly
242
+ json_str = json.dumps(input_dict)
243
+
244
+ # Function to handle complex string replacements
245
+ def replace_quotes(match):
246
+ key = match.group(1)
247
+ # Escape any existing double quotes in the key
248
+ key = key.replace('"', '\\"')
249
+ return f'"{key}":'
250
+
251
+ # Replace single-quoted keys with double-quoted keys
252
+ # This regex looks for single-quoted keys followed by a colon
253
+ def strip_json_block(text):
254
+ # Check if text starts with ```json or similar
255
+ if text.strip().startswith("```"):
256
+ # Split by \n to remove the first and last lines
257
+ lines = text.split("\n")[1:-1]
258
+ return "\n".join(lines)
259
+ return text
260
+
261
+ pattern = r"'([^']*)':"
262
+ converted_str = re.sub(pattern, replace_quotes, strip_json_block(json_str))
263
+
264
+ # Parse the string back to ensure valid JSON format
265
+ try:
266
+ json.loads(converted_str)
267
+ return converted_str
268
+ except json.JSONDecodeError as e:
269
+ raise ValueError(f"Failed to create valid JSON with double quotes: {str(e)}")
270
+
271
+ def extract_json_block(text):
272
+ # Find the first {
273
+ start = text.find("{")
274
+ if start == -1:
275
+ return text
276
+
277
+ # Track nested braces to find the matching closing brace
278
+ brace_count = 0
279
+ end = start
280
+
281
+ for i in range(start, len(text)):
282
+ if text[i] == "{":
283
+ brace_count += 1
284
+ elif text[i] == "}":
285
+ brace_count -= 1
286
+ if brace_count == 0:
287
+ end = i + 1
288
+ break
289
+
290
+ return text[start:end]
291
+
292
+ content = response.choices[0].message.content
293
+ try:
294
+ content_dict = json.loads(extract_json_block(content))
295
+
296
+ if type(content_dict["arguments"]) == str:
297
+ content_dict["arguments"] = json.loads(content_dict["arguments"])
298
+
299
+ tool_calls = [
300
+ ToolCall(
301
+ id=get_tool_call_id(),
302
+ type="function",
303
+ function=Function(
304
+ name=content_dict["name"],
305
+ arguments=convert_dict_quotes(content_dict["arguments"]),
306
+ ),
307
+ )
308
+ ]
309
+ except (json.JSONDecodeError, TypeError, KeyError) as e:
310
+ print(e)
311
+ tool_calls = response.choices[0].message.tool_calls
312
+ raise ValueError(f"Failed to create valid JSON {content}")
313
+
314
+ # Move the "reasoning_content" into the "content" field
315
+ response.choices[0].message.content = response.choices[0].message.reasoning_content
316
+ response.choices[0].message.tool_calls = tool_calls
317
+
318
+ # Remove the "reasoning_content" field
319
+ response.choices[0].message.reasoning_content = None
320
+
321
+ return response
322
+
323
+
324
+ class DeepseekClient(OpenAIClient):
19
325
  def requires_auto_tool_choice(self, llm_config: LLMConfig) -> bool:
20
326
  return False
21
327
 
@@ -36,15 +342,21 @@ class DeepseekClient(OpenAIClient):
36
342
  data = super().build_request_data(messages, llm_config, tools, force_tool_call)
37
343
 
38
344
  def add_functions_to_system_message(system_message: ChatMessage):
39
- system_message.content += f"<available functions> {''.join(json.dumps(f) for f in functions)} </available functions>"
345
+ system_message.content += f"<available functions> {''.join(json.dumps(f) for f in tools)} </available functions>"
40
346
  system_message.content += 'Select best function to call simply respond with a single json block with the fields "name" and "arguments". Use double quotes around the arguments.'
41
347
 
348
+ openai_message_list = [
349
+ cast_message_to_subtype(m) for m in PydanticMessage.to_openai_dicts_from_list(messages, put_inner_thoughts_in_kwargs=False)
350
+ ]
351
+
42
352
  if llm_config.model == "deepseek-reasoner": # R1 currently doesn't support function calling natively
43
353
  add_functions_to_system_message(
44
- data["messages"][0]
354
+ openai_message_list[0]
45
355
  ) # Inject additional instructions to the system prompt with the available functions
46
356
 
47
- data["messages"] = map_messages_to_deepseek_format(data["messages"])
357
+ openai_message_list = map_messages_to_deepseek_format(openai_message_list)
358
+
359
+ data["messages"] = [m.dict() for m in openai_message_list]
48
360
 
49
361
  return data
50
362
 
@@ -94,4 +406,6 @@ class DeepseekClient(OpenAIClient):
94
406
  Handles potential extraction of inner thoughts if they were added via kwargs.
95
407
  """
96
408
  response = ChatCompletionResponse(**response_data)
409
+ if response.choices[0].message.tool_calls:
410
+ return super().convert_response_to_chat_completion(response_data, input_messages, llm_config)
97
411
  return convert_deepseek_response_to_chatcompletion(response)
@@ -3,6 +3,7 @@ import uuid
3
3
  from typing import List, Optional
4
4
 
5
5
  from google import genai
6
+ from google.genai import errors
6
7
  from google.genai.types import (
7
8
  FunctionCallingConfig,
8
9
  FunctionCallingConfigMode,
@@ -31,6 +32,7 @@ logger = get_logger(__name__)
31
32
 
32
33
 
33
34
  class GoogleVertexClient(LLMClientBase):
35
+ MAX_RETRIES = model_settings.gemini_max_retries
34
36
 
35
37
  def _get_client(self):
36
38
  timeout_ms = int(settings.llm_request_timeout_seconds * 1000)
@@ -60,12 +62,59 @@ class GoogleVertexClient(LLMClientBase):
60
62
  Performs underlying request to llm and returns raw response.
61
63
  """
62
64
  client = self._get_client()
63
- response = await client.aio.models.generate_content(
64
- model=llm_config.model,
65
- contents=request_data["contents"],
66
- config=request_data["config"],
67
- )
68
- return response.model_dump()
65
+
66
+ # Gemini 2.5 models will often return MALFORMED_FUNCTION_CALL, force a retry
67
+ # https://github.com/googleapis/python-aiplatform/issues/4472
68
+ retry_count = 1
69
+ should_retry = True
70
+ while should_retry and retry_count <= self.MAX_RETRIES:
71
+ try:
72
+ response = await client.aio.models.generate_content(
73
+ model=llm_config.model,
74
+ contents=request_data["contents"],
75
+ config=request_data["config"],
76
+ )
77
+ except errors.APIError as e:
78
+ # Retry on 503 and 500 errors as well, usually ephemeral from Gemini
79
+ if e.code == 503 or e.code == 500:
80
+ logger.warning(f"Received {e}, retrying {retry_count}/{self.MAX_RETRIES}")
81
+ retry_count += 1
82
+ continue
83
+ raise e
84
+ except Exception as e:
85
+ raise e
86
+ response_data = response.model_dump()
87
+ is_malformed_function_call = self.is_malformed_function_call(response_data)
88
+ if is_malformed_function_call:
89
+ logger.warning(
90
+ f"Received FinishReason.MALFORMED_FUNCTION_CALL in response for {llm_config.model}, retrying {retry_count}/{self.MAX_RETRIES}"
91
+ )
92
+ # Modify the last message if it's a heartbeat to include warning about special characters
93
+ if request_data["contents"] and len(request_data["contents"]) > 0:
94
+ last_message = request_data["contents"][-1]
95
+ if last_message.get("role") == "user" and last_message.get("parts"):
96
+ for part in last_message["parts"]:
97
+ if "text" in part:
98
+ try:
99
+ # Try to parse as JSON to check if it's a heartbeat
100
+ message_json = json_loads(part["text"])
101
+ if message_json.get("type") == "heartbeat" and "reason" in message_json:
102
+ # Append warning to the reason
103
+ warning = f" RETRY {retry_count}/{self.MAX_RETRIES} ***DO NOT USE SPECIAL CHARACTERS OR QUOTATIONS INSIDE FUNCTION CALL ARGUMENTS. IF YOU MUST, MAKE SURE TO ESCAPE THEM PROPERLY***"
104
+ message_json["reason"] = message_json["reason"] + warning
105
+ # Update the text with modified JSON
106
+ part["text"] = json_dumps(message_json)
107
+ logger.warning(
108
+ f"Modified heartbeat message with special character warning for retry {retry_count}/{self.MAX_RETRIES}"
109
+ )
110
+ except (json.JSONDecodeError, TypeError):
111
+ # Not a JSON message or not a heartbeat, skip modification
112
+ pass
113
+
114
+ should_retry = is_malformed_function_call
115
+ retry_count += 1
116
+
117
+ return response_data
69
118
 
70
119
  @staticmethod
71
120
  def add_dummy_model_messages(messages: List[dict]) -> List[dict]:
@@ -230,10 +279,12 @@ class GoogleVertexClient(LLMClientBase):
230
279
  "contents": contents,
231
280
  "config": {
232
281
  "temperature": llm_config.temperature,
233
- "max_output_tokens": llm_config.max_tokens,
234
282
  "tools": formatted_tools,
235
283
  },
236
284
  }
285
+ # Make tokens is optional
286
+ if llm_config.max_tokens:
287
+ request_data["config"]["max_output_tokens"] = llm_config.max_tokens
237
288
 
238
289
  if len(tool_names) == 1 and settings.use_vertex_structured_outputs_experimental:
239
290
  request_data["config"]["response_mime_type"] = "application/json"
@@ -298,7 +349,6 @@ class GoogleVertexClient(LLMClientBase):
298
349
  }
299
350
  }
300
351
  """
301
-
302
352
  response = GenerateContentResponse(**response_data)
303
353
  try:
304
354
  choices = []
@@ -310,7 +360,7 @@ class GoogleVertexClient(LLMClientBase):
310
360
  # This means the response is malformed like MALFORMED_FUNCTION_CALL
311
361
  # NOTE: must be a ValueError to trigger a retry
312
362
  if candidate.finish_reason == "MALFORMED_FUNCTION_CALL":
313
- raise ValueError(f"Error in response data from LLM: {candidate.finish_reason}...")
363
+ raise ValueError(f"Error in response data from LLM: {candidate.finish_reason}")
314
364
  else:
315
365
  raise ValueError(f"Error in response data from LLM: {candidate.model_dump()}")
316
366
 
@@ -344,9 +394,9 @@ class GoogleVertexClient(LLMClientBase):
344
394
  if llm_config.put_inner_thoughts_in_kwargs:
345
395
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG_VERTEX
346
396
 
347
- assert (
348
- INNER_THOUGHTS_KWARG_VERTEX in function_args
349
- ), f"Couldn't find inner thoughts in function args:\n{function_call}"
397
+ assert INNER_THOUGHTS_KWARG_VERTEX in function_args, (
398
+ f"Couldn't find inner thoughts in function args:\n{function_call}"
399
+ )
350
400
  inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG_VERTEX)
351
401
  assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
352
402
  else:
@@ -380,9 +430,9 @@ class GoogleVertexClient(LLMClientBase):
380
430
  if llm_config.put_inner_thoughts_in_kwargs:
381
431
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG_VERTEX
382
432
 
383
- assert (
384
- INNER_THOUGHTS_KWARG_VERTEX in function_args
385
- ), f"Couldn't find inner thoughts in function args:\n{function_call}"
433
+ assert INNER_THOUGHTS_KWARG_VERTEX in function_args, (
434
+ f"Couldn't find inner thoughts in function args:\n{function_call}"
435
+ )
386
436
  inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG_VERTEX)
387
437
  assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
388
438
  else:
@@ -406,7 +456,7 @@ class GoogleVertexClient(LLMClientBase):
406
456
 
407
457
  except json.decoder.JSONDecodeError:
408
458
  if candidate.finish_reason == "MAX_TOKENS":
409
- raise ValueError(f"Could not parse response data from LLM: exceeded max token limit")
459
+ raise ValueError("Could not parse response data from LLM: exceeded max token limit")
410
460
  # Inner thoughts are the content by default
411
461
  inner_thoughts = response_message.text
412
462
 
@@ -463,7 +513,7 @@ class GoogleVertexClient(LLMClientBase):
463
513
  )
464
514
  else:
465
515
  # Count it ourselves
466
- assert input_messages is not None, f"Didn't get UsageMetadata from the API response, so input_messages is required"
516
+ assert input_messages is not None, "Didn't get UsageMetadata from the API response, so input_messages is required"
467
517
  prompt_tokens = count_tokens(json_dumps(input_messages)) # NOTE: this is a very rough approximation
468
518
  completion_tokens = count_tokens(json_dumps(openai_response_message.model_dump())) # NOTE: this is also approximate
469
519
  total_tokens = prompt_tokens + completion_tokens
@@ -516,6 +566,14 @@ class GoogleVertexClient(LLMClientBase):
516
566
  def is_reasoning_model(self, llm_config: LLMConfig) -> bool:
517
567
  return llm_config.model.startswith("gemini-2.5-flash") or llm_config.model.startswith("gemini-2.5-pro")
518
568
 
569
+ def is_malformed_function_call(self, response_data: dict) -> dict:
570
+ response = GenerateContentResponse(**response_data)
571
+ for candidate in response.candidates:
572
+ content = candidate.content
573
+ if content is None or content.role is None or content.parts is None:
574
+ return candidate.finish_reason == "MALFORMED_FUNCTION_CALL"
575
+ return False
576
+
519
577
  @trace_method
520
578
  def handle_llm_error(self, e: Exception) -> Exception:
521
579
  # Fallback to base implementation
@@ -14,7 +14,6 @@ from letta.settings import model_settings
14
14
 
15
15
 
16
16
  class GroqClient(OpenAIClient):
17
-
18
17
  def requires_auto_tool_choice(self, llm_config: LLMConfig) -> bool:
19
18
  return False
20
19
 
letta/llm_api/helpers.py CHANGED
@@ -310,7 +310,7 @@ def calculate_summarizer_cutoff(in_context_messages: List[Message], token_counts
310
310
  f"Given in_context_messages has different length from given token_counts: {len(in_context_messages)} != {len(token_counts)}"
311
311
  )
312
312
 
313
- in_context_messages_openai = [m.to_openai_dict() for m in in_context_messages]
313
+ in_context_messages_openai = Message.to_openai_dicts_from_list(in_context_messages)
314
314
 
315
315
  if summarizer_settings.evict_all_messages:
316
316
  logger.info("Evicting all messages...")
@@ -351,7 +351,7 @@ def calculate_summarizer_cutoff(in_context_messages: List[Message], token_counts
351
351
 
352
352
 
353
353
  def get_token_counts_for_messages(in_context_messages: List[Message]) -> List[int]:
354
- in_context_messages_openai = [m.to_openai_dict() for m in in_context_messages]
354
+ in_context_messages_openai = Message.to_openai_dicts_from_list(in_context_messages)
355
355
  token_counts = [count_tokens(str(msg)) for msg in in_context_messages_openai]
356
356
  return token_counts
357
357
 
@@ -7,7 +7,6 @@ import requests
7
7
 
8
8
  from letta.constants import CLI_WARNING_PREFIX
9
9
  from letta.errors import LettaConfigurationError, RateLimitExceededError
10
- from letta.llm_api.deepseek import build_deepseek_chat_completions_request, convert_deepseek_response_to_chatcompletion
11
10
  from letta.llm_api.helpers import unpack_all_inner_thoughts_from_kwargs
12
11
  from letta.llm_api.openai import (
13
12
  build_openai_chat_completions_request,
@@ -146,7 +145,7 @@ def create(
146
145
 
147
146
  # Count the tokens first, if there's an overflow exit early by throwing an error up the stack
148
147
  # NOTE: we want to include a specific substring in the error message to trigger summarization
149
- messages_oai_format = [m.to_openai_dict() for m in messages]
148
+ messages_oai_format = Message.to_openai_dicts_from_list(messages)
150
149
  prompt_tokens = num_tokens_from_messages(messages=messages_oai_format, model=llm_config.model)
151
150
  function_tokens = num_tokens_from_functions(functions=functions, model=llm_config.model) if functions else 0
152
151
  if prompt_tokens + function_tokens > llm_config.context_window:
@@ -245,54 +244,6 @@ def create(
245
244
 
246
245
  return response
247
246
 
248
- elif llm_config.model_endpoint_type == "deepseek":
249
- if model_settings.deepseek_api_key is None and llm_config.model_endpoint == "":
250
- # only is a problem if we are *not* using an openai proxy
251
- raise LettaConfigurationError(message="DeepSeek key is missing from letta config file", missing_fields=["deepseek_api_key"])
252
-
253
- data = build_deepseek_chat_completions_request(
254
- llm_config,
255
- messages,
256
- user_id,
257
- functions,
258
- function_call,
259
- use_tool_naming,
260
- llm_config.max_tokens,
261
- )
262
- if stream: # Client requested token streaming
263
- data.stream = True
264
- assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
265
- stream_interface, AgentRefreshStreamingInterface
266
- ), type(stream_interface)
267
- response = openai_chat_completions_process_stream(
268
- url=llm_config.model_endpoint,
269
- api_key=model_settings.deepseek_api_key,
270
- chat_completion_request=data,
271
- stream_interface=stream_interface,
272
- name=name,
273
- # TODO should we toggle for R1 vs V3?
274
- expect_reasoning_content=True,
275
- )
276
- else: # Client did not request token streaming (expect a blocking backend response)
277
- data.stream = False
278
- if isinstance(stream_interface, AgentChunkStreamingInterface):
279
- stream_interface.stream_start()
280
- try:
281
- response = openai_chat_completions_request(
282
- url=llm_config.model_endpoint,
283
- api_key=model_settings.deepseek_api_key,
284
- chat_completion_request=data,
285
- )
286
- finally:
287
- if isinstance(stream_interface, AgentChunkStreamingInterface):
288
- stream_interface.stream_end()
289
- """
290
- if llm_config.put_inner_thoughts_in_kwargs:
291
- response = unpack_all_inner_thoughts_from_kwargs(response=response, inner_thoughts_key=INNER_THOUGHTS_KWARG)
292
- """
293
- response = convert_deepseek_response_to_chatcompletion(response)
294
- return response
295
-
296
247
  # local model
297
248
  else:
298
249
  if stream:
@@ -58,13 +58,6 @@ class LLMClient:
58
58
  put_inner_thoughts_first=put_inner_thoughts_first,
59
59
  actor=actor,
60
60
  )
61
- case ProviderType.openai | ProviderType.ollama:
62
- from letta.llm_api.openai_client import OpenAIClient
63
-
64
- return OpenAIClient(
65
- put_inner_thoughts_first=put_inner_thoughts_first,
66
- actor=actor,
67
- )
68
61
  case ProviderType.together:
69
62
  from letta.llm_api.together_client import TogetherClient
70
63
 
@@ -101,4 +94,9 @@ class LLMClient:
101
94
  actor=actor,
102
95
  )
103
96
  case _:
104
- return None
97
+ from letta.llm_api.openai_client import OpenAIClient
98
+
99
+ return OpenAIClient(
100
+ put_inner_thoughts_first=put_inner_thoughts_first,
101
+ actor=actor,
102
+ )
letta/llm_api/mistral.py CHANGED
@@ -13,7 +13,7 @@ async def mistral_get_model_list_async(url: str, api_key: str) -> dict:
13
13
  if api_key is not None:
14
14
  headers["Authorization"] = f"Bearer {api_key}"
15
15
 
16
- logger.debug(f"Sending request to %s", url)
16
+ logger.debug("Sending request to %s", url)
17
17
 
18
18
  async with aiohttp.ClientSession() as session:
19
19
  # TODO add query param "tool" to be true