letta-nightly 0.11.6.dev20250902104140__py3-none-any.whl → 0.11.7.dev20250904045700__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +10 -14
  3. letta/agents/base_agent.py +18 -0
  4. letta/agents/helpers.py +32 -7
  5. letta/agents/letta_agent.py +953 -762
  6. letta/agents/voice_agent.py +1 -1
  7. letta/client/streaming.py +0 -1
  8. letta/constants.py +11 -8
  9. letta/errors.py +9 -0
  10. letta/functions/function_sets/base.py +77 -69
  11. letta/functions/function_sets/builtin.py +41 -22
  12. letta/functions/function_sets/multi_agent.py +1 -2
  13. letta/functions/schema_generator.py +0 -1
  14. letta/helpers/converters.py +8 -3
  15. letta/helpers/datetime_helpers.py +5 -4
  16. letta/helpers/message_helper.py +1 -2
  17. letta/helpers/pinecone_utils.py +0 -1
  18. letta/helpers/tool_rule_solver.py +10 -0
  19. letta/helpers/tpuf_client.py +848 -0
  20. letta/interface.py +8 -8
  21. letta/interfaces/anthropic_streaming_interface.py +7 -0
  22. letta/interfaces/openai_streaming_interface.py +29 -6
  23. letta/llm_api/anthropic_client.py +188 -18
  24. letta/llm_api/azure_client.py +0 -1
  25. letta/llm_api/bedrock_client.py +1 -2
  26. letta/llm_api/deepseek_client.py +319 -5
  27. letta/llm_api/google_vertex_client.py +75 -17
  28. letta/llm_api/groq_client.py +0 -1
  29. letta/llm_api/helpers.py +2 -2
  30. letta/llm_api/llm_api_tools.py +1 -50
  31. letta/llm_api/llm_client.py +6 -8
  32. letta/llm_api/mistral.py +1 -1
  33. letta/llm_api/openai.py +16 -13
  34. letta/llm_api/openai_client.py +31 -16
  35. letta/llm_api/together_client.py +0 -1
  36. letta/llm_api/xai_client.py +0 -1
  37. letta/local_llm/chat_completion_proxy.py +7 -6
  38. letta/local_llm/settings/settings.py +1 -1
  39. letta/orm/__init__.py +1 -0
  40. letta/orm/agent.py +8 -6
  41. letta/orm/archive.py +9 -1
  42. letta/orm/block.py +3 -4
  43. letta/orm/block_history.py +3 -1
  44. letta/orm/group.py +2 -3
  45. letta/orm/identity.py +1 -2
  46. letta/orm/job.py +1 -2
  47. letta/orm/llm_batch_items.py +1 -2
  48. letta/orm/message.py +8 -4
  49. letta/orm/mixins.py +18 -0
  50. letta/orm/organization.py +2 -0
  51. letta/orm/passage.py +8 -1
  52. letta/orm/passage_tag.py +55 -0
  53. letta/orm/sandbox_config.py +1 -3
  54. letta/orm/step.py +1 -2
  55. letta/orm/tool.py +1 -0
  56. letta/otel/resource.py +2 -2
  57. letta/plugins/plugins.py +1 -1
  58. letta/prompts/prompt_generator.py +10 -2
  59. letta/schemas/agent.py +11 -0
  60. letta/schemas/archive.py +4 -0
  61. letta/schemas/block.py +13 -0
  62. letta/schemas/embedding_config.py +0 -1
  63. letta/schemas/enums.py +24 -7
  64. letta/schemas/group.py +12 -0
  65. letta/schemas/letta_message.py +55 -1
  66. letta/schemas/letta_message_content.py +28 -0
  67. letta/schemas/letta_request.py +21 -4
  68. letta/schemas/letta_stop_reason.py +9 -1
  69. letta/schemas/llm_config.py +24 -8
  70. letta/schemas/mcp.py +0 -3
  71. letta/schemas/memory.py +14 -0
  72. letta/schemas/message.py +245 -141
  73. letta/schemas/openai/chat_completion_request.py +2 -1
  74. letta/schemas/passage.py +1 -0
  75. letta/schemas/providers/bedrock.py +1 -1
  76. letta/schemas/providers/openai.py +2 -2
  77. letta/schemas/tool.py +11 -5
  78. letta/schemas/tool_execution_result.py +0 -1
  79. letta/schemas/tool_rule.py +71 -0
  80. letta/serialize_schemas/marshmallow_agent.py +1 -2
  81. letta/server/rest_api/app.py +3 -3
  82. letta/server/rest_api/auth/index.py +0 -1
  83. letta/server/rest_api/interface.py +3 -11
  84. letta/server/rest_api/redis_stream_manager.py +3 -4
  85. letta/server/rest_api/routers/v1/agents.py +143 -84
  86. letta/server/rest_api/routers/v1/blocks.py +1 -1
  87. letta/server/rest_api/routers/v1/folders.py +1 -1
  88. letta/server/rest_api/routers/v1/groups.py +23 -22
  89. letta/server/rest_api/routers/v1/internal_templates.py +68 -0
  90. letta/server/rest_api/routers/v1/sandbox_configs.py +11 -5
  91. letta/server/rest_api/routers/v1/sources.py +1 -1
  92. letta/server/rest_api/routers/v1/tools.py +167 -15
  93. letta/server/rest_api/streaming_response.py +4 -3
  94. letta/server/rest_api/utils.py +75 -18
  95. letta/server/server.py +24 -35
  96. letta/services/agent_manager.py +359 -45
  97. letta/services/agent_serialization_manager.py +23 -3
  98. letta/services/archive_manager.py +72 -3
  99. letta/services/block_manager.py +1 -2
  100. letta/services/context_window_calculator/token_counter.py +11 -6
  101. letta/services/file_manager.py +1 -3
  102. letta/services/files_agents_manager.py +2 -4
  103. letta/services/group_manager.py +73 -12
  104. letta/services/helpers/agent_manager_helper.py +5 -5
  105. letta/services/identity_manager.py +8 -3
  106. letta/services/job_manager.py +2 -14
  107. letta/services/llm_batch_manager.py +1 -3
  108. letta/services/mcp/base_client.py +1 -2
  109. letta/services/mcp_manager.py +5 -6
  110. letta/services/message_manager.py +536 -15
  111. letta/services/organization_manager.py +1 -2
  112. letta/services/passage_manager.py +287 -12
  113. letta/services/provider_manager.py +1 -3
  114. letta/services/sandbox_config_manager.py +12 -7
  115. letta/services/source_manager.py +1 -2
  116. letta/services/step_manager.py +0 -1
  117. letta/services/summarizer/summarizer.py +4 -2
  118. letta/services/telemetry_manager.py +1 -3
  119. letta/services/tool_executor/builtin_tool_executor.py +136 -316
  120. letta/services/tool_executor/core_tool_executor.py +231 -74
  121. letta/services/tool_executor/files_tool_executor.py +2 -2
  122. letta/services/tool_executor/mcp_tool_executor.py +0 -1
  123. letta/services/tool_executor/multi_agent_tool_executor.py +2 -2
  124. letta/services/tool_executor/sandbox_tool_executor.py +0 -1
  125. letta/services/tool_executor/tool_execution_sandbox.py +2 -3
  126. letta/services/tool_manager.py +181 -64
  127. letta/services/tool_sandbox/modal_deployment_manager.py +2 -2
  128. letta/services/user_manager.py +1 -2
  129. letta/settings.py +5 -3
  130. letta/streaming_interface.py +3 -3
  131. letta/system.py +1 -1
  132. letta/utils.py +0 -1
  133. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/METADATA +11 -7
  134. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/RECORD +137 -135
  135. letta/llm_api/deepseek.py +0 -303
  136. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/WHEEL +0 -0
  137. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/entry_points.txt +0 -0
  138. {letta_nightly-0.11.6.dev20250902104140.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/licenses/LICENSE +0 -0
letta/interface.py CHANGED
@@ -198,23 +198,23 @@ class CLIInterface(AgentInterface):
198
198
  try:
199
199
  msg_dict = eval(function_args)
200
200
  if function_name == "archival_memory_search":
201
- output = f'\tquery: {msg_dict["query"]}, page: {msg_dict["page"]}'
201
+ output = f"\tquery: {msg_dict['query']}, page: {msg_dict['page']}"
202
202
  if STRIP_UI:
203
203
  print(output)
204
204
  else:
205
205
  print(f"{Fore.RED}{output}{Style.RESET_ALL}")
206
206
  elif function_name == "archival_memory_insert":
207
- output = f'\t→ {msg_dict["content"]}'
207
+ output = f"\t→ {msg_dict['content']}"
208
208
  if STRIP_UI:
209
209
  print(output)
210
210
  else:
211
211
  print(f"{Style.BRIGHT}{Fore.RED}{output}{Style.RESET_ALL}")
212
212
  else:
213
213
  if STRIP_UI:
214
- print(f'\t {msg_dict["old_content"]}\n\t→ {msg_dict["new_content"]}')
214
+ print(f"\t {msg_dict['old_content']}\n\t→ {msg_dict['new_content']}")
215
215
  else:
216
216
  print(
217
- f'{Style.BRIGHT}\t{Fore.RED} {msg_dict["old_content"]}\n\t{Fore.GREEN}→ {msg_dict["new_content"]}{Style.RESET_ALL}'
217
+ f"{Style.BRIGHT}\t{Fore.RED} {msg_dict['old_content']}\n\t{Fore.GREEN}→ {msg_dict['new_content']}{Style.RESET_ALL}"
218
218
  )
219
219
  except Exception as e:
220
220
  printd(str(e))
@@ -223,7 +223,7 @@ class CLIInterface(AgentInterface):
223
223
  print_function_message("🧠", f"searching memory with {function_name}")
224
224
  try:
225
225
  msg_dict = eval(function_args)
226
- output = f'\tquery: {msg_dict["query"]}, page: {msg_dict["page"]}'
226
+ output = f"\tquery: {msg_dict['query']}, page: {msg_dict['page']}"
227
227
  if STRIP_UI:
228
228
  print(output)
229
229
  else:
@@ -248,7 +248,7 @@ class CLIInterface(AgentInterface):
248
248
  @staticmethod
249
249
  def print_messages(message_sequence: List[Message], dump=False):
250
250
  # rewrite to dict format
251
- message_sequence = [msg.to_openai_dict() for msg in message_sequence]
251
+ message_sequence = Message.to_openai_dicts_from_list(message_sequence)
252
252
 
253
253
  idx = len(message_sequence)
254
254
  for msg in message_sequence:
@@ -291,7 +291,7 @@ class CLIInterface(AgentInterface):
291
291
  @staticmethod
292
292
  def print_messages_simple(message_sequence: List[Message]):
293
293
  # rewrite to dict format
294
- message_sequence = [msg.to_openai_dict() for msg in message_sequence]
294
+ message_sequence = Message.to_openai_dicts_from_list(message_sequence)
295
295
 
296
296
  for msg in message_sequence:
297
297
  role = msg["role"]
@@ -309,7 +309,7 @@ class CLIInterface(AgentInterface):
309
309
  @staticmethod
310
310
  def print_messages_raw(message_sequence: List[Message]):
311
311
  # rewrite to dict format
312
- message_sequence = [msg.to_openai_dict() for msg in message_sequence]
312
+ message_sequence = Message.to_openai_dicts_from_list(message_sequence)
313
313
 
314
314
  for msg in message_sequence:
315
315
  print(msg)
@@ -289,6 +289,13 @@ class AnthropicStreamingInterface:
289
289
  if not self.anthropic_mode == EventMode.TEXT:
290
290
  raise RuntimeError(f"Streaming integrity failed - received BetaTextDelta object while not in TEXT EventMode: {delta}")
291
291
 
292
+ # Weird bug happens with native thinking where a single response can contain:
293
+ # [reasoning, text, tool_call]
294
+ # In these cases, we should pipe text out to null / ignore it
295
+ # TODO this will have to be redone to support non-tool calling message sending
296
+ if not self.put_inner_thoughts_in_kwarg:
297
+ return
298
+
292
299
  # Combine buffer with current text to handle tags split across chunks
293
300
  combined_text = self.partial_tag_buffer + delta.text
294
301
 
@@ -10,7 +10,14 @@ from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
10
10
  from letta.llm_api.openai_client import is_openai_reasoning_model
11
11
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
12
12
  from letta.log import get_logger
13
- from letta.schemas.letta_message import AssistantMessage, LettaMessage, ReasoningMessage, ToolCallDelta, ToolCallMessage
13
+ from letta.schemas.letta_message import (
14
+ AssistantMessage,
15
+ HiddenReasoningMessage,
16
+ LettaMessage,
17
+ ReasoningMessage,
18
+ ToolCallDelta,
19
+ ToolCallMessage,
20
+ )
14
21
  from letta.schemas.letta_message_content import OmittedReasoningContent, TextContent
15
22
  from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
16
23
  from letta.schemas.message import Message
@@ -35,13 +42,15 @@ class OpenAIStreamingInterface:
35
42
  is_openai_proxy: bool = False,
36
43
  messages: Optional[list] = None,
37
44
  tools: Optional[list] = None,
45
+ put_inner_thoughts_in_kwarg: bool = True,
38
46
  ):
39
47
  self.use_assistant_message = use_assistant_message
40
48
  self.assistant_message_tool_name = DEFAULT_MESSAGE_TOOL
41
49
  self.assistant_message_tool_kwarg = DEFAULT_MESSAGE_TOOL_KWARG
50
+ self.put_inner_thoughts_in_kwarg = put_inner_thoughts_in_kwarg
42
51
 
43
52
  self.optimistic_json_parser: OptimisticJSONParser = OptimisticJSONParser()
44
- self.function_args_reader = JSONInnerThoughtsExtractor(wait_for_first_key=True) # TODO: pass in kwarg
53
+ self.function_args_reader = JSONInnerThoughtsExtractor(wait_for_first_key=put_inner_thoughts_in_kwarg)
45
54
  self.function_name_buffer = None
46
55
  self.function_args_buffer = None
47
56
  self.function_id_buffer = None
@@ -75,6 +84,7 @@ class OpenAIStreamingInterface:
75
84
  self.tool_call_name: str | None = None
76
85
  self.tool_call_id: str | None = None
77
86
  self.reasoning_messages = []
87
+ self.emitted_hidden_reasoning = False # Track if we've emitted hidden reasoning message
78
88
 
79
89
  def get_reasoning_content(self) -> list[TextContent | OmittedReasoningContent]:
80
90
  content = "".join(self.reasoning_messages).strip()
@@ -113,6 +123,7 @@ class OpenAIStreamingInterface:
113
123
  if self.messages:
114
124
  # Convert messages to dict format for token counting
115
125
  message_dicts = [msg.to_openai_dict() if hasattr(msg, "to_openai_dict") else msg for msg in self.messages]
126
+ message_dicts = [m for m in message_dicts if m is not None]
116
127
  self.fallback_input_tokens = num_tokens_from_messages(message_dicts) # fallback to gpt-4 cl100k-base
117
128
 
118
129
  if self.tools:
@@ -184,6 +195,22 @@ class OpenAIStreamingInterface:
184
195
  if message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
185
196
  tool_call = message_delta.tool_calls[0]
186
197
 
198
+ # For OpenAI reasoning models, emit a hidden reasoning message before the first tool call
199
+ if not self.emitted_hidden_reasoning and is_openai_reasoning_model(self.model) and not self.put_inner_thoughts_in_kwarg:
200
+ self.emitted_hidden_reasoning = True
201
+ if prev_message_type and prev_message_type != "hidden_reasoning_message":
202
+ message_index += 1
203
+ hidden_message = HiddenReasoningMessage(
204
+ id=self.letta_message_id,
205
+ date=datetime.now(timezone.utc),
206
+ state="omitted",
207
+ hidden_reasoning=None,
208
+ otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
209
+ )
210
+ yield hidden_message
211
+ prev_message_type = hidden_message.message_type
212
+ message_index += 1 # Increment for the next message
213
+
187
214
  if tool_call.function.name:
188
215
  # If we're waiting for the first key, then we should hold back the name
189
216
  # ie add it to a buffer instead of returning it as a chunk
@@ -232,16 +259,13 @@ class OpenAIStreamingInterface:
232
259
 
233
260
  # If we have main_json, we should output a ToolCallMessage
234
261
  elif updates_main_json:
235
-
236
262
  # If there's something in the function_name buffer, we should release it first
237
263
  # NOTE: we could output it as part of a chunk that has both name and args,
238
264
  # however the frontend may expect name first, then args, so to be
239
265
  # safe we'll output name first in a separate chunk
240
266
  if self.function_name_buffer:
241
-
242
267
  # use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
243
268
  if self.use_assistant_message and self.function_name_buffer == self.assistant_message_tool_name:
244
-
245
269
  # Store the ID of the tool call so allow skipping the corresponding response
246
270
  if self.function_id_buffer:
247
271
  self.prev_assistant_message_id = self.function_id_buffer
@@ -373,7 +397,6 @@ class OpenAIStreamingInterface:
373
397
  # clear buffers
374
398
  self.function_id_buffer = None
375
399
  else:
376
-
377
400
  # There may be a buffer from a previous chunk, for example
378
401
  # if the previous chunk had arguments but we needed to flush name
379
402
  if self.function_args_buffer:
@@ -5,12 +5,12 @@ from typing import Dict, List, Optional, Union
5
5
 
6
6
  import anthropic
7
7
  from anthropic import AsyncStream
8
- from anthropic.types.beta import BetaMessage as AnthropicMessage
9
- from anthropic.types.beta import BetaRawMessageStreamEvent
8
+ from anthropic.types.beta import BetaMessage as AnthropicMessage, BetaRawMessageStreamEvent
10
9
  from anthropic.types.beta.message_create_params import MessageCreateParamsNonStreaming
11
10
  from anthropic.types.beta.messages import BetaMessageBatch
12
11
  from anthropic.types.beta.messages.batch_create_params import Request
13
12
 
13
+ from letta.constants import FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE
14
14
  from letta.errors import (
15
15
  ContextWindowExceededError,
16
16
  ErrorCode,
@@ -34,9 +34,14 @@ from letta.otel.tracing import trace_method
34
34
  from letta.schemas.llm_config import LLMConfig
35
35
  from letta.schemas.message import Message as PydanticMessage
36
36
  from letta.schemas.openai.chat_completion_request import Tool as OpenAITool
37
- from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall
38
- from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
39
- from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
37
+ from letta.schemas.openai.chat_completion_response import (
38
+ ChatCompletionResponse,
39
+ Choice,
40
+ FunctionCall,
41
+ Message as ChoiceMessage,
42
+ ToolCall,
43
+ UsageStatistics,
44
+ )
40
45
  from letta.settings import model_settings
41
46
 
42
47
  DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."
@@ -45,7 +50,6 @@ logger = get_logger(__name__)
45
50
 
46
51
 
47
52
  class AnthropicClient(LLMClientBase):
48
-
49
53
  @trace_method
50
54
  @deprecated("Synchronous version of this is no longer valid. Will result in model_dump of coroutine")
51
55
  def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
@@ -56,7 +60,12 @@ class AnthropicClient(LLMClientBase):
56
60
  @trace_method
57
61
  async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
58
62
  client = await self._get_anthropic_client_async(llm_config, async_client=True)
59
- response = await client.beta.messages.create(**request_data)
63
+
64
+ if llm_config.enable_reasoner:
65
+ response = await client.beta.messages.create(**request_data, betas=["interleaved-thinking-2025-05-14"])
66
+ else:
67
+ response = await client.beta.messages.create(**request_data)
68
+
60
69
  return response.model_dump()
61
70
 
62
71
  @trace_method
@@ -69,6 +78,11 @@ class AnthropicClient(LLMClientBase):
69
78
  # See: https://docs.anthropic.com/en/docs/build-with-claude/tool-use/fine-grained-streaming
70
79
  betas = ["fine-grained-tool-streaming-2025-05-14"]
71
80
 
81
+ # If extended thinking, turn on interleaved header
82
+ # https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#interleaved-thinking
83
+ if llm_config.enable_reasoner:
84
+ betas.append("interleaved-thinking-2025-05-14")
85
+
72
86
  return await client.beta.messages.create(**request_data, betas=betas)
73
87
 
74
88
  @trace_method
@@ -173,11 +187,14 @@ class AnthropicClient(LLMClientBase):
173
187
  raise NotImplementedError("Only tool calling supported on Anthropic API requests")
174
188
 
175
189
  if not llm_config.max_tokens:
176
- raise ValueError("Max tokens must be set for anthropic")
190
+ # TODO strip this default once we add provider-specific defaults
191
+ max_output_tokens = 4096 # the minimum max tokens (for Haiku 3)
192
+ else:
193
+ max_output_tokens = llm_config.max_tokens
177
194
 
178
195
  data = {
179
196
  "model": llm_config.model,
180
- "max_tokens": llm_config.max_tokens,
197
+ "max_tokens": max_output_tokens,
181
198
  "temperature": llm_config.temperature,
182
199
  }
183
200
 
@@ -249,13 +266,11 @@ class AnthropicClient(LLMClientBase):
249
266
  raise RuntimeError(f"First message is not a system message, instead has role {messages[0].role}")
250
267
  system_content = messages[0].content if isinstance(messages[0].content, str) else messages[0].content[0].text
251
268
  data["system"] = self._add_cache_control_to_system_message(system_content)
252
- data["messages"] = [
253
- m.to_anthropic_dict(
254
- inner_thoughts_xml_tag=inner_thoughts_xml_tag,
255
- put_inner_thoughts_in_kwargs=bool(llm_config.put_inner_thoughts_in_kwargs),
256
- )
257
- for m in messages[1:]
258
- ]
269
+ data["messages"] = PydanticMessage.to_anthropic_dicts_from_list(
270
+ messages=messages[1:],
271
+ inner_thoughts_xml_tag=inner_thoughts_xml_tag,
272
+ put_inner_thoughts_in_kwargs=bool(llm_config.put_inner_thoughts_in_kwargs),
273
+ )
259
274
 
260
275
  # Ensure first message is user
261
276
  if data["messages"][0]["role"] != "user":
@@ -264,6 +279,10 @@ class AnthropicClient(LLMClientBase):
264
279
  # Handle alternating messages
265
280
  data["messages"] = merge_tool_results_into_user_messages(data["messages"])
266
281
 
282
+ # Strip heartbeat pings if extended thinking
283
+ if llm_config.enable_reasoner:
284
+ data["messages"] = merge_heartbeats_into_tool_responses(data["messages"])
285
+
267
286
  # Prefix fill
268
287
  # https://docs.anthropic.com/en/api/messages#body-messages
269
288
  # NOTE: cannot prefill with tools for opus:
@@ -599,16 +618,167 @@ def convert_tools_to_anthropic_format(tools: List[OpenAITool]) -> List[dict]:
599
618
  """
600
619
  formatted_tools = []
601
620
  for tool in tools:
621
+ # Get the input schema
622
+ input_schema = tool.function.parameters or {"type": "object", "properties": {}, "required": []}
623
+
624
+ # Clean up the properties in the schema
625
+ # The presence of union types / default fields seems Anthropic to produce invalid JSON for tool calls
626
+ if isinstance(input_schema, dict) and "properties" in input_schema:
627
+ cleaned_properties = {}
628
+ for prop_name, prop_schema in input_schema.get("properties", {}).items():
629
+ if isinstance(prop_schema, dict):
630
+ cleaned_properties[prop_name] = _clean_property_schema(prop_schema)
631
+ else:
632
+ cleaned_properties[prop_name] = prop_schema
633
+
634
+ # Create cleaned input schema
635
+ cleaned_input_schema = {
636
+ "type": input_schema.get("type", "object"),
637
+ "properties": cleaned_properties,
638
+ }
639
+
640
+ # Only add required field if it exists and is non-empty
641
+ if "required" in input_schema and input_schema["required"]:
642
+ cleaned_input_schema["required"] = input_schema["required"]
643
+ else:
644
+ cleaned_input_schema = input_schema
645
+
602
646
  formatted_tool = {
603
647
  "name": tool.function.name,
604
648
  "description": tool.function.description if tool.function.description else "",
605
- "input_schema": tool.function.parameters or {"type": "object", "properties": {}, "required": []},
649
+ "input_schema": cleaned_input_schema,
606
650
  }
607
651
  formatted_tools.append(formatted_tool)
608
652
 
609
653
  return formatted_tools
610
654
 
611
655
 
656
+ def _clean_property_schema(prop_schema: dict) -> dict:
657
+ """Clean up a property schema by removing defaults and simplifying union types."""
658
+ cleaned = {}
659
+
660
+ # Handle type field - simplify union types like ["null", "string"] to just "string"
661
+ if "type" in prop_schema:
662
+ prop_type = prop_schema["type"]
663
+ if isinstance(prop_type, list):
664
+ # Remove "null" from union types to simplify
665
+ # e.g., ["null", "string"] becomes "string"
666
+ non_null_types = [t for t in prop_type if t != "null"]
667
+ if len(non_null_types) == 1:
668
+ cleaned["type"] = non_null_types[0]
669
+ elif len(non_null_types) > 1:
670
+ # Keep as array if multiple non-null types
671
+ cleaned["type"] = non_null_types
672
+ else:
673
+ # If only "null" was in the list, default to string
674
+ cleaned["type"] = "string"
675
+ else:
676
+ cleaned["type"] = prop_type
677
+
678
+ # Copy over other fields except 'default'
679
+ for key, value in prop_schema.items():
680
+ if key not in ["type", "default"]: # Skip 'default' field
681
+ if key == "properties" and isinstance(value, dict):
682
+ # Recursively clean nested properties
683
+ cleaned["properties"] = {k: _clean_property_schema(v) if isinstance(v, dict) else v for k, v in value.items()}
684
+ else:
685
+ cleaned[key] = value
686
+
687
+ return cleaned
688
+
689
+
690
+ def is_heartbeat(message: dict, is_ping: bool = False) -> bool:
691
+ """Check if the message is an automated heartbeat ping"""
692
+
693
+ if "role" not in message or message["role"] != "user" or "content" not in message:
694
+ return False
695
+
696
+ try:
697
+ message_json = json.loads(message["content"])
698
+ except:
699
+ return False
700
+
701
+ if "reason" not in message_json:
702
+ return False
703
+
704
+ if message_json["type"] != "heartbeat":
705
+ return False
706
+
707
+ if not is_ping:
708
+ # Just checking if 'type': 'heartbeat'
709
+ return True
710
+ else:
711
+ # Also checking if it's specifically a 'ping' style message
712
+ # NOTE: this will not catch tool rule heartbeats
713
+ if REQ_HEARTBEAT_MESSAGE in message_json["reason"] or FUNC_FAILED_HEARTBEAT_MESSAGE in message_json["reason"]:
714
+ return True
715
+ else:
716
+ return False
717
+
718
+
719
+ def merge_heartbeats_into_tool_responses(messages: List[dict]):
720
+ """For extended thinking mode, we don't want anything other than tool responses in-between assistant actions
721
+
722
+ Otherwise, the thinking will silently get dropped.
723
+
724
+ NOTE: assumes merge_tool_results_into_user_messages has already been called
725
+ """
726
+
727
+ merged_messages = []
728
+
729
+ # Loop through messages
730
+ # For messages with role 'user' and len(content) > 1,
731
+ # Check if content[0].type == 'tool_result'
732
+ # If so, iterate over content[1:] and while content.type == 'text' and is_heartbeat(content.text),
733
+ # merge into content[0].content
734
+
735
+ for message in messages:
736
+ if "role" not in message or "content" not in message:
737
+ # Skip invalid messages
738
+ merged_messages.append(message)
739
+ continue
740
+
741
+ if message["role"] == "user" and len(message["content"]) > 1:
742
+ content_parts = message["content"]
743
+
744
+ # If the first content part is a tool result, merge the heartbeat content into index 0 of the content
745
+ # Two end cases:
746
+ # 1. It was [tool_result, heartbeat], in which case merged result is [tool_result+heartbeat] (len 1)
747
+ # 2. It was [tool_result, user_text], in which case it should be unchanged (len 2)
748
+ if "type" in content_parts[0] and "content" in content_parts[0] and content_parts[0]["type"] == "tool_result":
749
+ new_content_parts = [content_parts[0]]
750
+
751
+ # If the first content part is a tool result, merge the heartbeat content into index 0 of the content
752
+ for i, content_part in enumerate(content_parts[1:]):
753
+ # If it's a heartbeat, add it to the merge
754
+ if (
755
+ content_part["type"] == "text"
756
+ and "text" in content_part
757
+ and is_heartbeat({"role": "user", "content": content_part["text"]})
758
+ ):
759
+ # NOTE: joining with a ','
760
+ new_content_parts[0]["content"] += ", " + content_part["text"]
761
+
762
+ # If it's not, break, and concat to finish
763
+ else:
764
+ # Append the rest directly, no merging of content strings
765
+ new_content_parts.extend(content_parts[i + 1 :])
766
+ break
767
+
768
+ # Set the content_parts
769
+ message["content"] = new_content_parts
770
+ merged_messages.append(message)
771
+
772
+ else:
773
+ # Skip invalid messages parts
774
+ merged_messages.append(message)
775
+ continue
776
+ else:
777
+ merged_messages.append(message)
778
+
779
+ return merged_messages
780
+
781
+
612
782
  def merge_tool_results_into_user_messages(messages: List[dict]):
613
783
  """Anthropic API doesn't allow role 'tool'->'user' sequences
614
784
 
@@ -647,7 +817,7 @@ def merge_tool_results_into_user_messages(messages: List[dict]):
647
817
  if isinstance(next_message["content"], list)
648
818
  else [{"type": "text", "text": next_message["content"]}]
649
819
  )
650
- merged_content = current_content + next_content
820
+ merged_content: list = current_content + next_content
651
821
  current_message["content"] = merged_content
652
822
  else:
653
823
  # Append the current message to result as it's complete
@@ -13,7 +13,6 @@ from letta.settings import model_settings
13
13
 
14
14
 
15
15
  class AzureClient(OpenAIClient):
16
-
17
16
  def get_byok_overrides(self, llm_config: LLMConfig) -> Tuple[Optional[str], Optional[str], Optional[str]]:
18
17
  if llm_config.provider_category == ProviderCategory.byok:
19
18
  from letta.services.provider_manager import ProviderManager
@@ -16,7 +16,6 @@ logger = get_logger(__name__)
16
16
 
17
17
 
18
18
  class BedrockClient(AnthropicClient):
19
-
20
19
  async def get_byok_overrides_async(self, llm_config: LLMConfig) -> tuple[str, str, str]:
21
20
  override_access_key_id, override_secret_access_key, override_default_region = None, None, None
22
21
  if llm_config.provider_category == ProviderCategory.byok:
@@ -28,7 +27,7 @@ class BedrockClient(AnthropicClient):
28
27
  llm_config.provider_name,
29
28
  actor=self.actor,
30
29
  )
31
- return override_access_key_id, override_secret_access_key, override_default_regions
30
+ return override_access_key_id, override_secret_access_key, override_default_region
32
31
 
33
32
  @trace_method
34
33
  async def _get_anthropic_client_async(