letta-nightly 0.11.6.dev20250903104037__py3-none-any.whl → 0.11.7.dev20250904045700__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +10 -14
- letta/agents/base_agent.py +18 -0
- letta/agents/helpers.py +32 -7
- letta/agents/letta_agent.py +953 -762
- letta/agents/voice_agent.py +1 -1
- letta/client/streaming.py +0 -1
- letta/constants.py +11 -8
- letta/errors.py +9 -0
- letta/functions/function_sets/base.py +77 -69
- letta/functions/function_sets/builtin.py +41 -22
- letta/functions/function_sets/multi_agent.py +1 -2
- letta/functions/schema_generator.py +0 -1
- letta/helpers/converters.py +8 -3
- letta/helpers/datetime_helpers.py +5 -4
- letta/helpers/message_helper.py +1 -2
- letta/helpers/pinecone_utils.py +0 -1
- letta/helpers/tool_rule_solver.py +10 -0
- letta/helpers/tpuf_client.py +848 -0
- letta/interface.py +8 -8
- letta/interfaces/anthropic_streaming_interface.py +7 -0
- letta/interfaces/openai_streaming_interface.py +29 -6
- letta/llm_api/anthropic_client.py +188 -18
- letta/llm_api/azure_client.py +0 -1
- letta/llm_api/bedrock_client.py +1 -2
- letta/llm_api/deepseek_client.py +319 -5
- letta/llm_api/google_vertex_client.py +75 -17
- letta/llm_api/groq_client.py +0 -1
- letta/llm_api/helpers.py +2 -2
- letta/llm_api/llm_api_tools.py +1 -50
- letta/llm_api/llm_client.py +6 -8
- letta/llm_api/mistral.py +1 -1
- letta/llm_api/openai.py +16 -13
- letta/llm_api/openai_client.py +31 -16
- letta/llm_api/together_client.py +0 -1
- letta/llm_api/xai_client.py +0 -1
- letta/local_llm/chat_completion_proxy.py +7 -6
- letta/local_llm/settings/settings.py +1 -1
- letta/orm/__init__.py +1 -0
- letta/orm/agent.py +8 -6
- letta/orm/archive.py +9 -1
- letta/orm/block.py +3 -4
- letta/orm/block_history.py +3 -1
- letta/orm/group.py +2 -3
- letta/orm/identity.py +1 -2
- letta/orm/job.py +1 -2
- letta/orm/llm_batch_items.py +1 -2
- letta/orm/message.py +8 -4
- letta/orm/mixins.py +18 -0
- letta/orm/organization.py +2 -0
- letta/orm/passage.py +8 -1
- letta/orm/passage_tag.py +55 -0
- letta/orm/sandbox_config.py +1 -3
- letta/orm/step.py +1 -2
- letta/orm/tool.py +1 -0
- letta/otel/resource.py +2 -2
- letta/plugins/plugins.py +1 -1
- letta/prompts/prompt_generator.py +10 -2
- letta/schemas/agent.py +11 -0
- letta/schemas/archive.py +4 -0
- letta/schemas/block.py +13 -0
- letta/schemas/embedding_config.py +0 -1
- letta/schemas/enums.py +24 -7
- letta/schemas/group.py +12 -0
- letta/schemas/letta_message.py +55 -1
- letta/schemas/letta_message_content.py +28 -0
- letta/schemas/letta_request.py +21 -4
- letta/schemas/letta_stop_reason.py +9 -1
- letta/schemas/llm_config.py +24 -8
- letta/schemas/mcp.py +0 -3
- letta/schemas/memory.py +14 -0
- letta/schemas/message.py +245 -141
- letta/schemas/openai/chat_completion_request.py +2 -1
- letta/schemas/passage.py +1 -0
- letta/schemas/providers/bedrock.py +1 -1
- letta/schemas/providers/openai.py +2 -2
- letta/schemas/tool.py +11 -5
- letta/schemas/tool_execution_result.py +0 -1
- letta/schemas/tool_rule.py +71 -0
- letta/serialize_schemas/marshmallow_agent.py +1 -2
- letta/server/rest_api/app.py +3 -3
- letta/server/rest_api/auth/index.py +0 -1
- letta/server/rest_api/interface.py +3 -11
- letta/server/rest_api/redis_stream_manager.py +3 -4
- letta/server/rest_api/routers/v1/agents.py +143 -84
- letta/server/rest_api/routers/v1/blocks.py +1 -1
- letta/server/rest_api/routers/v1/folders.py +1 -1
- letta/server/rest_api/routers/v1/groups.py +23 -22
- letta/server/rest_api/routers/v1/internal_templates.py +68 -0
- letta/server/rest_api/routers/v1/sandbox_configs.py +11 -5
- letta/server/rest_api/routers/v1/sources.py +1 -1
- letta/server/rest_api/routers/v1/tools.py +167 -15
- letta/server/rest_api/streaming_response.py +4 -3
- letta/server/rest_api/utils.py +75 -18
- letta/server/server.py +24 -35
- letta/services/agent_manager.py +359 -45
- letta/services/agent_serialization_manager.py +23 -3
- letta/services/archive_manager.py +72 -3
- letta/services/block_manager.py +1 -2
- letta/services/context_window_calculator/token_counter.py +11 -6
- letta/services/file_manager.py +1 -3
- letta/services/files_agents_manager.py +2 -4
- letta/services/group_manager.py +73 -12
- letta/services/helpers/agent_manager_helper.py +5 -5
- letta/services/identity_manager.py +8 -3
- letta/services/job_manager.py +2 -14
- letta/services/llm_batch_manager.py +1 -3
- letta/services/mcp/base_client.py +1 -2
- letta/services/mcp_manager.py +5 -6
- letta/services/message_manager.py +536 -15
- letta/services/organization_manager.py +1 -2
- letta/services/passage_manager.py +287 -12
- letta/services/provider_manager.py +1 -3
- letta/services/sandbox_config_manager.py +12 -7
- letta/services/source_manager.py +1 -2
- letta/services/step_manager.py +0 -1
- letta/services/summarizer/summarizer.py +4 -2
- letta/services/telemetry_manager.py +1 -3
- letta/services/tool_executor/builtin_tool_executor.py +136 -316
- letta/services/tool_executor/core_tool_executor.py +231 -74
- letta/services/tool_executor/files_tool_executor.py +2 -2
- letta/services/tool_executor/mcp_tool_executor.py +0 -1
- letta/services/tool_executor/multi_agent_tool_executor.py +2 -2
- letta/services/tool_executor/sandbox_tool_executor.py +0 -1
- letta/services/tool_executor/tool_execution_sandbox.py +2 -3
- letta/services/tool_manager.py +181 -64
- letta/services/tool_sandbox/modal_deployment_manager.py +2 -2
- letta/services/user_manager.py +1 -2
- letta/settings.py +5 -3
- letta/streaming_interface.py +3 -3
- letta/system.py +1 -1
- letta/utils.py +0 -1
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/METADATA +11 -7
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/RECORD +137 -135
- letta/llm_api/deepseek.py +0 -303
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/WHEEL +0 -0
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/entry_points.txt +0 -0
- {letta_nightly-0.11.6.dev20250903104037.dist-info → letta_nightly-0.11.7.dev20250904045700.dist-info}/licenses/LICENSE +0 -0
letta/interface.py
CHANGED
@@ -198,23 +198,23 @@ class CLIInterface(AgentInterface):
|
|
198
198
|
try:
|
199
199
|
msg_dict = eval(function_args)
|
200
200
|
if function_name == "archival_memory_search":
|
201
|
-
output = f
|
201
|
+
output = f"\tquery: {msg_dict['query']}, page: {msg_dict['page']}"
|
202
202
|
if STRIP_UI:
|
203
203
|
print(output)
|
204
204
|
else:
|
205
205
|
print(f"{Fore.RED}{output}{Style.RESET_ALL}")
|
206
206
|
elif function_name == "archival_memory_insert":
|
207
|
-
output = f
|
207
|
+
output = f"\t→ {msg_dict['content']}"
|
208
208
|
if STRIP_UI:
|
209
209
|
print(output)
|
210
210
|
else:
|
211
211
|
print(f"{Style.BRIGHT}{Fore.RED}{output}{Style.RESET_ALL}")
|
212
212
|
else:
|
213
213
|
if STRIP_UI:
|
214
|
-
print(f
|
214
|
+
print(f"\t {msg_dict['old_content']}\n\t→ {msg_dict['new_content']}")
|
215
215
|
else:
|
216
216
|
print(
|
217
|
-
f
|
217
|
+
f"{Style.BRIGHT}\t{Fore.RED} {msg_dict['old_content']}\n\t{Fore.GREEN}→ {msg_dict['new_content']}{Style.RESET_ALL}"
|
218
218
|
)
|
219
219
|
except Exception as e:
|
220
220
|
printd(str(e))
|
@@ -223,7 +223,7 @@ class CLIInterface(AgentInterface):
|
|
223
223
|
print_function_message("🧠", f"searching memory with {function_name}")
|
224
224
|
try:
|
225
225
|
msg_dict = eval(function_args)
|
226
|
-
output = f
|
226
|
+
output = f"\tquery: {msg_dict['query']}, page: {msg_dict['page']}"
|
227
227
|
if STRIP_UI:
|
228
228
|
print(output)
|
229
229
|
else:
|
@@ -248,7 +248,7 @@ class CLIInterface(AgentInterface):
|
|
248
248
|
@staticmethod
|
249
249
|
def print_messages(message_sequence: List[Message], dump=False):
|
250
250
|
# rewrite to dict format
|
251
|
-
message_sequence =
|
251
|
+
message_sequence = Message.to_openai_dicts_from_list(message_sequence)
|
252
252
|
|
253
253
|
idx = len(message_sequence)
|
254
254
|
for msg in message_sequence:
|
@@ -291,7 +291,7 @@ class CLIInterface(AgentInterface):
|
|
291
291
|
@staticmethod
|
292
292
|
def print_messages_simple(message_sequence: List[Message]):
|
293
293
|
# rewrite to dict format
|
294
|
-
message_sequence =
|
294
|
+
message_sequence = Message.to_openai_dicts_from_list(message_sequence)
|
295
295
|
|
296
296
|
for msg in message_sequence:
|
297
297
|
role = msg["role"]
|
@@ -309,7 +309,7 @@ class CLIInterface(AgentInterface):
|
|
309
309
|
@staticmethod
|
310
310
|
def print_messages_raw(message_sequence: List[Message]):
|
311
311
|
# rewrite to dict format
|
312
|
-
message_sequence =
|
312
|
+
message_sequence = Message.to_openai_dicts_from_list(message_sequence)
|
313
313
|
|
314
314
|
for msg in message_sequence:
|
315
315
|
print(msg)
|
@@ -289,6 +289,13 @@ class AnthropicStreamingInterface:
|
|
289
289
|
if not self.anthropic_mode == EventMode.TEXT:
|
290
290
|
raise RuntimeError(f"Streaming integrity failed - received BetaTextDelta object while not in TEXT EventMode: {delta}")
|
291
291
|
|
292
|
+
# Weird bug happens with native thinking where a single response can contain:
|
293
|
+
# [reasoning, text, tool_call]
|
294
|
+
# In these cases, we should pipe text out to null / ignore it
|
295
|
+
# TODO this will have to be redone to support non-tool calling message sending
|
296
|
+
if not self.put_inner_thoughts_in_kwarg:
|
297
|
+
return
|
298
|
+
|
292
299
|
# Combine buffer with current text to handle tags split across chunks
|
293
300
|
combined_text = self.partial_tag_buffer + delta.text
|
294
301
|
|
@@ -10,7 +10,14 @@ from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
|
|
10
10
|
from letta.llm_api.openai_client import is_openai_reasoning_model
|
11
11
|
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
12
12
|
from letta.log import get_logger
|
13
|
-
from letta.schemas.letta_message import
|
13
|
+
from letta.schemas.letta_message import (
|
14
|
+
AssistantMessage,
|
15
|
+
HiddenReasoningMessage,
|
16
|
+
LettaMessage,
|
17
|
+
ReasoningMessage,
|
18
|
+
ToolCallDelta,
|
19
|
+
ToolCallMessage,
|
20
|
+
)
|
14
21
|
from letta.schemas.letta_message_content import OmittedReasoningContent, TextContent
|
15
22
|
from letta.schemas.letta_stop_reason import LettaStopReason, StopReasonType
|
16
23
|
from letta.schemas.message import Message
|
@@ -35,13 +42,15 @@ class OpenAIStreamingInterface:
|
|
35
42
|
is_openai_proxy: bool = False,
|
36
43
|
messages: Optional[list] = None,
|
37
44
|
tools: Optional[list] = None,
|
45
|
+
put_inner_thoughts_in_kwarg: bool = True,
|
38
46
|
):
|
39
47
|
self.use_assistant_message = use_assistant_message
|
40
48
|
self.assistant_message_tool_name = DEFAULT_MESSAGE_TOOL
|
41
49
|
self.assistant_message_tool_kwarg = DEFAULT_MESSAGE_TOOL_KWARG
|
50
|
+
self.put_inner_thoughts_in_kwarg = put_inner_thoughts_in_kwarg
|
42
51
|
|
43
52
|
self.optimistic_json_parser: OptimisticJSONParser = OptimisticJSONParser()
|
44
|
-
self.function_args_reader = JSONInnerThoughtsExtractor(wait_for_first_key=
|
53
|
+
self.function_args_reader = JSONInnerThoughtsExtractor(wait_for_first_key=put_inner_thoughts_in_kwarg)
|
45
54
|
self.function_name_buffer = None
|
46
55
|
self.function_args_buffer = None
|
47
56
|
self.function_id_buffer = None
|
@@ -75,6 +84,7 @@ class OpenAIStreamingInterface:
|
|
75
84
|
self.tool_call_name: str | None = None
|
76
85
|
self.tool_call_id: str | None = None
|
77
86
|
self.reasoning_messages = []
|
87
|
+
self.emitted_hidden_reasoning = False # Track if we've emitted hidden reasoning message
|
78
88
|
|
79
89
|
def get_reasoning_content(self) -> list[TextContent | OmittedReasoningContent]:
|
80
90
|
content = "".join(self.reasoning_messages).strip()
|
@@ -113,6 +123,7 @@ class OpenAIStreamingInterface:
|
|
113
123
|
if self.messages:
|
114
124
|
# Convert messages to dict format for token counting
|
115
125
|
message_dicts = [msg.to_openai_dict() if hasattr(msg, "to_openai_dict") else msg for msg in self.messages]
|
126
|
+
message_dicts = [m for m in message_dicts if m is not None]
|
116
127
|
self.fallback_input_tokens = num_tokens_from_messages(message_dicts) # fallback to gpt-4 cl100k-base
|
117
128
|
|
118
129
|
if self.tools:
|
@@ -184,6 +195,22 @@ class OpenAIStreamingInterface:
|
|
184
195
|
if message_delta.tool_calls is not None and len(message_delta.tool_calls) > 0:
|
185
196
|
tool_call = message_delta.tool_calls[0]
|
186
197
|
|
198
|
+
# For OpenAI reasoning models, emit a hidden reasoning message before the first tool call
|
199
|
+
if not self.emitted_hidden_reasoning and is_openai_reasoning_model(self.model) and not self.put_inner_thoughts_in_kwarg:
|
200
|
+
self.emitted_hidden_reasoning = True
|
201
|
+
if prev_message_type and prev_message_type != "hidden_reasoning_message":
|
202
|
+
message_index += 1
|
203
|
+
hidden_message = HiddenReasoningMessage(
|
204
|
+
id=self.letta_message_id,
|
205
|
+
date=datetime.now(timezone.utc),
|
206
|
+
state="omitted",
|
207
|
+
hidden_reasoning=None,
|
208
|
+
otid=Message.generate_otid_from_id(self.letta_message_id, message_index),
|
209
|
+
)
|
210
|
+
yield hidden_message
|
211
|
+
prev_message_type = hidden_message.message_type
|
212
|
+
message_index += 1 # Increment for the next message
|
213
|
+
|
187
214
|
if tool_call.function.name:
|
188
215
|
# If we're waiting for the first key, then we should hold back the name
|
189
216
|
# ie add it to a buffer instead of returning it as a chunk
|
@@ -232,16 +259,13 @@ class OpenAIStreamingInterface:
|
|
232
259
|
|
233
260
|
# If we have main_json, we should output a ToolCallMessage
|
234
261
|
elif updates_main_json:
|
235
|
-
|
236
262
|
# If there's something in the function_name buffer, we should release it first
|
237
263
|
# NOTE: we could output it as part of a chunk that has both name and args,
|
238
264
|
# however the frontend may expect name first, then args, so to be
|
239
265
|
# safe we'll output name first in a separate chunk
|
240
266
|
if self.function_name_buffer:
|
241
|
-
|
242
267
|
# use_assisitant_message means that we should also not release main_json raw, and instead should only release the contents of "message": "..."
|
243
268
|
if self.use_assistant_message and self.function_name_buffer == self.assistant_message_tool_name:
|
244
|
-
|
245
269
|
# Store the ID of the tool call so allow skipping the corresponding response
|
246
270
|
if self.function_id_buffer:
|
247
271
|
self.prev_assistant_message_id = self.function_id_buffer
|
@@ -373,7 +397,6 @@ class OpenAIStreamingInterface:
|
|
373
397
|
# clear buffers
|
374
398
|
self.function_id_buffer = None
|
375
399
|
else:
|
376
|
-
|
377
400
|
# There may be a buffer from a previous chunk, for example
|
378
401
|
# if the previous chunk had arguments but we needed to flush name
|
379
402
|
if self.function_args_buffer:
|
@@ -5,12 +5,12 @@ from typing import Dict, List, Optional, Union
|
|
5
5
|
|
6
6
|
import anthropic
|
7
7
|
from anthropic import AsyncStream
|
8
|
-
from anthropic.types.beta import BetaMessage as AnthropicMessage
|
9
|
-
from anthropic.types.beta import BetaRawMessageStreamEvent
|
8
|
+
from anthropic.types.beta import BetaMessage as AnthropicMessage, BetaRawMessageStreamEvent
|
10
9
|
from anthropic.types.beta.message_create_params import MessageCreateParamsNonStreaming
|
11
10
|
from anthropic.types.beta.messages import BetaMessageBatch
|
12
11
|
from anthropic.types.beta.messages.batch_create_params import Request
|
13
12
|
|
13
|
+
from letta.constants import FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE
|
14
14
|
from letta.errors import (
|
15
15
|
ContextWindowExceededError,
|
16
16
|
ErrorCode,
|
@@ -34,9 +34,14 @@ from letta.otel.tracing import trace_method
|
|
34
34
|
from letta.schemas.llm_config import LLMConfig
|
35
35
|
from letta.schemas.message import Message as PydanticMessage
|
36
36
|
from letta.schemas.openai.chat_completion_request import Tool as OpenAITool
|
37
|
-
from letta.schemas.openai.chat_completion_response import
|
38
|
-
|
39
|
-
|
37
|
+
from letta.schemas.openai.chat_completion_response import (
|
38
|
+
ChatCompletionResponse,
|
39
|
+
Choice,
|
40
|
+
FunctionCall,
|
41
|
+
Message as ChoiceMessage,
|
42
|
+
ToolCall,
|
43
|
+
UsageStatistics,
|
44
|
+
)
|
40
45
|
from letta.settings import model_settings
|
41
46
|
|
42
47
|
DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."
|
@@ -45,7 +50,6 @@ logger = get_logger(__name__)
|
|
45
50
|
|
46
51
|
|
47
52
|
class AnthropicClient(LLMClientBase):
|
48
|
-
|
49
53
|
@trace_method
|
50
54
|
@deprecated("Synchronous version of this is no longer valid. Will result in model_dump of coroutine")
|
51
55
|
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
@@ -56,7 +60,12 @@ class AnthropicClient(LLMClientBase):
|
|
56
60
|
@trace_method
|
57
61
|
async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
58
62
|
client = await self._get_anthropic_client_async(llm_config, async_client=True)
|
59
|
-
|
63
|
+
|
64
|
+
if llm_config.enable_reasoner:
|
65
|
+
response = await client.beta.messages.create(**request_data, betas=["interleaved-thinking-2025-05-14"])
|
66
|
+
else:
|
67
|
+
response = await client.beta.messages.create(**request_data)
|
68
|
+
|
60
69
|
return response.model_dump()
|
61
70
|
|
62
71
|
@trace_method
|
@@ -69,6 +78,11 @@ class AnthropicClient(LLMClientBase):
|
|
69
78
|
# See: https://docs.anthropic.com/en/docs/build-with-claude/tool-use/fine-grained-streaming
|
70
79
|
betas = ["fine-grained-tool-streaming-2025-05-14"]
|
71
80
|
|
81
|
+
# If extended thinking, turn on interleaved header
|
82
|
+
# https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#interleaved-thinking
|
83
|
+
if llm_config.enable_reasoner:
|
84
|
+
betas.append("interleaved-thinking-2025-05-14")
|
85
|
+
|
72
86
|
return await client.beta.messages.create(**request_data, betas=betas)
|
73
87
|
|
74
88
|
@trace_method
|
@@ -173,11 +187,14 @@ class AnthropicClient(LLMClientBase):
|
|
173
187
|
raise NotImplementedError("Only tool calling supported on Anthropic API requests")
|
174
188
|
|
175
189
|
if not llm_config.max_tokens:
|
176
|
-
|
190
|
+
# TODO strip this default once we add provider-specific defaults
|
191
|
+
max_output_tokens = 4096 # the minimum max tokens (for Haiku 3)
|
192
|
+
else:
|
193
|
+
max_output_tokens = llm_config.max_tokens
|
177
194
|
|
178
195
|
data = {
|
179
196
|
"model": llm_config.model,
|
180
|
-
"max_tokens":
|
197
|
+
"max_tokens": max_output_tokens,
|
181
198
|
"temperature": llm_config.temperature,
|
182
199
|
}
|
183
200
|
|
@@ -249,13 +266,11 @@ class AnthropicClient(LLMClientBase):
|
|
249
266
|
raise RuntimeError(f"First message is not a system message, instead has role {messages[0].role}")
|
250
267
|
system_content = messages[0].content if isinstance(messages[0].content, str) else messages[0].content[0].text
|
251
268
|
data["system"] = self._add_cache_control_to_system_message(system_content)
|
252
|
-
data["messages"] =
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
for m in messages[1:]
|
258
|
-
]
|
269
|
+
data["messages"] = PydanticMessage.to_anthropic_dicts_from_list(
|
270
|
+
messages=messages[1:],
|
271
|
+
inner_thoughts_xml_tag=inner_thoughts_xml_tag,
|
272
|
+
put_inner_thoughts_in_kwargs=bool(llm_config.put_inner_thoughts_in_kwargs),
|
273
|
+
)
|
259
274
|
|
260
275
|
# Ensure first message is user
|
261
276
|
if data["messages"][0]["role"] != "user":
|
@@ -264,6 +279,10 @@ class AnthropicClient(LLMClientBase):
|
|
264
279
|
# Handle alternating messages
|
265
280
|
data["messages"] = merge_tool_results_into_user_messages(data["messages"])
|
266
281
|
|
282
|
+
# Strip heartbeat pings if extended thinking
|
283
|
+
if llm_config.enable_reasoner:
|
284
|
+
data["messages"] = merge_heartbeats_into_tool_responses(data["messages"])
|
285
|
+
|
267
286
|
# Prefix fill
|
268
287
|
# https://docs.anthropic.com/en/api/messages#body-messages
|
269
288
|
# NOTE: cannot prefill with tools for opus:
|
@@ -599,16 +618,167 @@ def convert_tools_to_anthropic_format(tools: List[OpenAITool]) -> List[dict]:
|
|
599
618
|
"""
|
600
619
|
formatted_tools = []
|
601
620
|
for tool in tools:
|
621
|
+
# Get the input schema
|
622
|
+
input_schema = tool.function.parameters or {"type": "object", "properties": {}, "required": []}
|
623
|
+
|
624
|
+
# Clean up the properties in the schema
|
625
|
+
# The presence of union types / default fields seems Anthropic to produce invalid JSON for tool calls
|
626
|
+
if isinstance(input_schema, dict) and "properties" in input_schema:
|
627
|
+
cleaned_properties = {}
|
628
|
+
for prop_name, prop_schema in input_schema.get("properties", {}).items():
|
629
|
+
if isinstance(prop_schema, dict):
|
630
|
+
cleaned_properties[prop_name] = _clean_property_schema(prop_schema)
|
631
|
+
else:
|
632
|
+
cleaned_properties[prop_name] = prop_schema
|
633
|
+
|
634
|
+
# Create cleaned input schema
|
635
|
+
cleaned_input_schema = {
|
636
|
+
"type": input_schema.get("type", "object"),
|
637
|
+
"properties": cleaned_properties,
|
638
|
+
}
|
639
|
+
|
640
|
+
# Only add required field if it exists and is non-empty
|
641
|
+
if "required" in input_schema and input_schema["required"]:
|
642
|
+
cleaned_input_schema["required"] = input_schema["required"]
|
643
|
+
else:
|
644
|
+
cleaned_input_schema = input_schema
|
645
|
+
|
602
646
|
formatted_tool = {
|
603
647
|
"name": tool.function.name,
|
604
648
|
"description": tool.function.description if tool.function.description else "",
|
605
|
-
"input_schema":
|
649
|
+
"input_schema": cleaned_input_schema,
|
606
650
|
}
|
607
651
|
formatted_tools.append(formatted_tool)
|
608
652
|
|
609
653
|
return formatted_tools
|
610
654
|
|
611
655
|
|
656
|
+
def _clean_property_schema(prop_schema: dict) -> dict:
|
657
|
+
"""Clean up a property schema by removing defaults and simplifying union types."""
|
658
|
+
cleaned = {}
|
659
|
+
|
660
|
+
# Handle type field - simplify union types like ["null", "string"] to just "string"
|
661
|
+
if "type" in prop_schema:
|
662
|
+
prop_type = prop_schema["type"]
|
663
|
+
if isinstance(prop_type, list):
|
664
|
+
# Remove "null" from union types to simplify
|
665
|
+
# e.g., ["null", "string"] becomes "string"
|
666
|
+
non_null_types = [t for t in prop_type if t != "null"]
|
667
|
+
if len(non_null_types) == 1:
|
668
|
+
cleaned["type"] = non_null_types[0]
|
669
|
+
elif len(non_null_types) > 1:
|
670
|
+
# Keep as array if multiple non-null types
|
671
|
+
cleaned["type"] = non_null_types
|
672
|
+
else:
|
673
|
+
# If only "null" was in the list, default to string
|
674
|
+
cleaned["type"] = "string"
|
675
|
+
else:
|
676
|
+
cleaned["type"] = prop_type
|
677
|
+
|
678
|
+
# Copy over other fields except 'default'
|
679
|
+
for key, value in prop_schema.items():
|
680
|
+
if key not in ["type", "default"]: # Skip 'default' field
|
681
|
+
if key == "properties" and isinstance(value, dict):
|
682
|
+
# Recursively clean nested properties
|
683
|
+
cleaned["properties"] = {k: _clean_property_schema(v) if isinstance(v, dict) else v for k, v in value.items()}
|
684
|
+
else:
|
685
|
+
cleaned[key] = value
|
686
|
+
|
687
|
+
return cleaned
|
688
|
+
|
689
|
+
|
690
|
+
def is_heartbeat(message: dict, is_ping: bool = False) -> bool:
|
691
|
+
"""Check if the message is an automated heartbeat ping"""
|
692
|
+
|
693
|
+
if "role" not in message or message["role"] != "user" or "content" not in message:
|
694
|
+
return False
|
695
|
+
|
696
|
+
try:
|
697
|
+
message_json = json.loads(message["content"])
|
698
|
+
except:
|
699
|
+
return False
|
700
|
+
|
701
|
+
if "reason" not in message_json:
|
702
|
+
return False
|
703
|
+
|
704
|
+
if message_json["type"] != "heartbeat":
|
705
|
+
return False
|
706
|
+
|
707
|
+
if not is_ping:
|
708
|
+
# Just checking if 'type': 'heartbeat'
|
709
|
+
return True
|
710
|
+
else:
|
711
|
+
# Also checking if it's specifically a 'ping' style message
|
712
|
+
# NOTE: this will not catch tool rule heartbeats
|
713
|
+
if REQ_HEARTBEAT_MESSAGE in message_json["reason"] or FUNC_FAILED_HEARTBEAT_MESSAGE in message_json["reason"]:
|
714
|
+
return True
|
715
|
+
else:
|
716
|
+
return False
|
717
|
+
|
718
|
+
|
719
|
+
def merge_heartbeats_into_tool_responses(messages: List[dict]):
|
720
|
+
"""For extended thinking mode, we don't want anything other than tool responses in-between assistant actions
|
721
|
+
|
722
|
+
Otherwise, the thinking will silently get dropped.
|
723
|
+
|
724
|
+
NOTE: assumes merge_tool_results_into_user_messages has already been called
|
725
|
+
"""
|
726
|
+
|
727
|
+
merged_messages = []
|
728
|
+
|
729
|
+
# Loop through messages
|
730
|
+
# For messages with role 'user' and len(content) > 1,
|
731
|
+
# Check if content[0].type == 'tool_result'
|
732
|
+
# If so, iterate over content[1:] and while content.type == 'text' and is_heartbeat(content.text),
|
733
|
+
# merge into content[0].content
|
734
|
+
|
735
|
+
for message in messages:
|
736
|
+
if "role" not in message or "content" not in message:
|
737
|
+
# Skip invalid messages
|
738
|
+
merged_messages.append(message)
|
739
|
+
continue
|
740
|
+
|
741
|
+
if message["role"] == "user" and len(message["content"]) > 1:
|
742
|
+
content_parts = message["content"]
|
743
|
+
|
744
|
+
# If the first content part is a tool result, merge the heartbeat content into index 0 of the content
|
745
|
+
# Two end cases:
|
746
|
+
# 1. It was [tool_result, heartbeat], in which case merged result is [tool_result+heartbeat] (len 1)
|
747
|
+
# 2. It was [tool_result, user_text], in which case it should be unchanged (len 2)
|
748
|
+
if "type" in content_parts[0] and "content" in content_parts[0] and content_parts[0]["type"] == "tool_result":
|
749
|
+
new_content_parts = [content_parts[0]]
|
750
|
+
|
751
|
+
# If the first content part is a tool result, merge the heartbeat content into index 0 of the content
|
752
|
+
for i, content_part in enumerate(content_parts[1:]):
|
753
|
+
# If it's a heartbeat, add it to the merge
|
754
|
+
if (
|
755
|
+
content_part["type"] == "text"
|
756
|
+
and "text" in content_part
|
757
|
+
and is_heartbeat({"role": "user", "content": content_part["text"]})
|
758
|
+
):
|
759
|
+
# NOTE: joining with a ','
|
760
|
+
new_content_parts[0]["content"] += ", " + content_part["text"]
|
761
|
+
|
762
|
+
# If it's not, break, and concat to finish
|
763
|
+
else:
|
764
|
+
# Append the rest directly, no merging of content strings
|
765
|
+
new_content_parts.extend(content_parts[i + 1 :])
|
766
|
+
break
|
767
|
+
|
768
|
+
# Set the content_parts
|
769
|
+
message["content"] = new_content_parts
|
770
|
+
merged_messages.append(message)
|
771
|
+
|
772
|
+
else:
|
773
|
+
# Skip invalid messages parts
|
774
|
+
merged_messages.append(message)
|
775
|
+
continue
|
776
|
+
else:
|
777
|
+
merged_messages.append(message)
|
778
|
+
|
779
|
+
return merged_messages
|
780
|
+
|
781
|
+
|
612
782
|
def merge_tool_results_into_user_messages(messages: List[dict]):
|
613
783
|
"""Anthropic API doesn't allow role 'tool'->'user' sequences
|
614
784
|
|
@@ -647,7 +817,7 @@ def merge_tool_results_into_user_messages(messages: List[dict]):
|
|
647
817
|
if isinstance(next_message["content"], list)
|
648
818
|
else [{"type": "text", "text": next_message["content"]}]
|
649
819
|
)
|
650
|
-
merged_content = current_content + next_content
|
820
|
+
merged_content: list = current_content + next_content
|
651
821
|
current_message["content"] = merged_content
|
652
822
|
else:
|
653
823
|
# Append the current message to result as it's complete
|
letta/llm_api/azure_client.py
CHANGED
@@ -13,7 +13,6 @@ from letta.settings import model_settings
|
|
13
13
|
|
14
14
|
|
15
15
|
class AzureClient(OpenAIClient):
|
16
|
-
|
17
16
|
def get_byok_overrides(self, llm_config: LLMConfig) -> Tuple[Optional[str], Optional[str], Optional[str]]:
|
18
17
|
if llm_config.provider_category == ProviderCategory.byok:
|
19
18
|
from letta.services.provider_manager import ProviderManager
|
letta/llm_api/bedrock_client.py
CHANGED
@@ -16,7 +16,6 @@ logger = get_logger(__name__)
|
|
16
16
|
|
17
17
|
|
18
18
|
class BedrockClient(AnthropicClient):
|
19
|
-
|
20
19
|
async def get_byok_overrides_async(self, llm_config: LLMConfig) -> tuple[str, str, str]:
|
21
20
|
override_access_key_id, override_secret_access_key, override_default_region = None, None, None
|
22
21
|
if llm_config.provider_category == ProviderCategory.byok:
|
@@ -28,7 +27,7 @@ class BedrockClient(AnthropicClient):
|
|
28
27
|
llm_config.provider_name,
|
29
28
|
actor=self.actor,
|
30
29
|
)
|
31
|
-
return override_access_key_id, override_secret_access_key,
|
30
|
+
return override_access_key_id, override_secret_access_key, override_default_region
|
32
31
|
|
33
32
|
@trace_method
|
34
33
|
async def _get_anthropic_client_async(
|