letta-nightly 0.6.44.dev20250326104203__py3-none-any.whl → 0.6.45.dev20250327104152__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -1
- letta/agent.py +1 -2
- letta/llm_api/anthropic.py +3 -1
- letta/llm_api/anthropic_client.py +487 -0
- letta/llm_api/llm_api_tools.py +6 -55
- letta/llm_api/llm_client.py +6 -0
- letta/llm_api/llm_client_base.py +2 -0
- letta/schemas/agent.py +8 -0
- letta/schemas/providers.py +3 -3
- letta/serialize_schemas/marshmallow_agent.py +3 -0
- letta/serialize_schemas/marshmallow_block.py +1 -1
- letta/serialize_schemas/marshmallow_tool.py +1 -1
- letta/serialize_schemas/pydantic_agent_schema.py +0 -7
- letta/server/rest_api/routers/v1/agents.py +2 -2
- letta/server/rest_api/routers/v1/blocks.py +1 -1
- letta/server/server.py +21 -5
- {letta_nightly-0.6.44.dev20250326104203.dist-info → letta_nightly-0.6.45.dev20250327104152.dist-info}/METADATA +3 -2
- {letta_nightly-0.6.44.dev20250326104203.dist-info → letta_nightly-0.6.45.dev20250327104152.dist-info}/RECORD +21 -22
- letta/llm_api/google_ai.py +0 -438
- letta/llm_api/google_vertex.py +0 -346
- {letta_nightly-0.6.44.dev20250326104203.dist-info → letta_nightly-0.6.45.dev20250327104152.dist-info}/LICENSE +0 -0
- {letta_nightly-0.6.44.dev20250326104203.dist-info → letta_nightly-0.6.45.dev20250327104152.dist-info}/WHEEL +0 -0
- {letta_nightly-0.6.44.dev20250326104203.dist-info → letta_nightly-0.6.45.dev20250327104152.dist-info}/entry_points.txt +0 -0
letta/__init__.py
CHANGED
letta/agent.py
CHANGED
|
@@ -760,6 +760,7 @@ class Agent(BaseAgent):
|
|
|
760
760
|
break
|
|
761
761
|
|
|
762
762
|
if self.agent_state.message_buffer_autoclear:
|
|
763
|
+
self.logger.info("Autoclearing message buffer")
|
|
763
764
|
self.agent_state = self.agent_manager.trim_all_in_context_messages_except_system(self.agent_state.id, actor=self.user)
|
|
764
765
|
|
|
765
766
|
return LettaUsageStatistics(**total_usage.model_dump(), step_count=step_count, steps_messages=steps_messages)
|
|
@@ -934,8 +935,6 @@ class Agent(BaseAgent):
|
|
|
934
935
|
)
|
|
935
936
|
raise e
|
|
936
937
|
|
|
937
|
-
summarize_attempt_count += 1
|
|
938
|
-
|
|
939
938
|
if summarize_attempt_count <= summarizer_settings.max_summarizer_retries:
|
|
940
939
|
logger.warning(
|
|
941
940
|
f"context window exceeded with limit {self.agent_state.llm_config.context_window}, attempting to summarize ({summarize_attempt_count}/{summarizer_settings.max_summarizer_retries}"
|
letta/llm_api/anthropic.py
CHANGED
|
@@ -352,7 +352,7 @@ def convert_anthropic_response_to_chatcompletion(
|
|
|
352
352
|
redacted_reasoning_content = None
|
|
353
353
|
tool_calls = None
|
|
354
354
|
|
|
355
|
-
if len(response.content) >
|
|
355
|
+
if len(response.content) > 0:
|
|
356
356
|
for content_part in response.content:
|
|
357
357
|
if content_part.type == "text":
|
|
358
358
|
content = strip_xml_tags(string=content_part.text, tag=inner_thoughts_xml_tag)
|
|
@@ -743,6 +743,8 @@ def anthropic_chat_completions_request(
|
|
|
743
743
|
anthropic_client = anthropic.Anthropic(api_key=anthropic_override_key)
|
|
744
744
|
elif model_settings.anthropic_api_key:
|
|
745
745
|
anthropic_client = anthropic.Anthropic()
|
|
746
|
+
else:
|
|
747
|
+
raise ValueError("No available Anthropic API key")
|
|
746
748
|
data = _prepare_anthropic_request(
|
|
747
749
|
data=data,
|
|
748
750
|
inner_thoughts_xml_tag=inner_thoughts_xml_tag,
|
|
@@ -0,0 +1,487 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
3
|
+
from typing import List, Optional, Union
|
|
4
|
+
|
|
5
|
+
import anthropic
|
|
6
|
+
from anthropic.types import Message as AnthropicMessage
|
|
7
|
+
|
|
8
|
+
from letta.helpers.datetime_helpers import get_utc_time
|
|
9
|
+
from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
|
|
10
|
+
from letta.llm_api.llm_api_tools import cast_message_to_subtype
|
|
11
|
+
from letta.llm_api.llm_client_base import LLMClientBase
|
|
12
|
+
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
|
13
|
+
from letta.log import get_logger
|
|
14
|
+
from letta.schemas.message import Message as PydanticMessage
|
|
15
|
+
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
|
|
16
|
+
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall
|
|
17
|
+
from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
|
|
18
|
+
from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
|
|
19
|
+
from letta.services.provider_manager import ProviderManager
|
|
20
|
+
|
|
21
|
+
DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."
|
|
22
|
+
|
|
23
|
+
logger = get_logger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class AnthropicClient(LLMClientBase):
|
|
27
|
+
|
|
28
|
+
def request(self, request_data: dict) -> dict:
|
|
29
|
+
try:
|
|
30
|
+
client = self._get_anthropic_client(async_client=False)
|
|
31
|
+
response = client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
|
|
32
|
+
return response.model_dump()
|
|
33
|
+
except Exception as e:
|
|
34
|
+
self._handle_anthropic_error(e)
|
|
35
|
+
|
|
36
|
+
async def request_async(self, request_data: dict) -> dict:
|
|
37
|
+
try:
|
|
38
|
+
client = self._get_anthropic_client(async_client=True)
|
|
39
|
+
response = await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
|
|
40
|
+
return response.model_dump()
|
|
41
|
+
except Exception as e:
|
|
42
|
+
self._handle_anthropic_error(e)
|
|
43
|
+
|
|
44
|
+
def _get_anthropic_client(self, async_client: bool = False) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
|
|
45
|
+
override_key = ProviderManager().get_anthropic_override_key()
|
|
46
|
+
if async_client:
|
|
47
|
+
return anthropic.AsyncAnthropic(api_key=override_key) if override_key else anthropic.AsyncAnthropic()
|
|
48
|
+
return anthropic.Anthropic(api_key=override_key) if override_key else anthropic.Anthropic()
|
|
49
|
+
|
|
50
|
+
def _handle_anthropic_error(self, e: Exception):
|
|
51
|
+
if isinstance(e, anthropic.APIConnectionError):
|
|
52
|
+
logger.warning(f"[Anthropic] API connection error: {e.__cause__}")
|
|
53
|
+
elif isinstance(e, anthropic.RateLimitError):
|
|
54
|
+
logger.warning("[Anthropic] Rate limited (429). Consider backoff.")
|
|
55
|
+
elif isinstance(e, anthropic.APIStatusError):
|
|
56
|
+
logger.warning(f"[Anthropic] API status error: {e.status_code}, {e.response}")
|
|
57
|
+
raise e
|
|
58
|
+
|
|
59
|
+
def build_request_data(
|
|
60
|
+
self,
|
|
61
|
+
messages: List[PydanticMessage],
|
|
62
|
+
tools: List[dict],
|
|
63
|
+
tool_call: Optional[str],
|
|
64
|
+
force_tool_call: Optional[str] = None,
|
|
65
|
+
) -> dict:
|
|
66
|
+
if not self.use_tool_naming:
|
|
67
|
+
raise NotImplementedError("Only tool calling supported on Anthropic API requests")
|
|
68
|
+
|
|
69
|
+
if tools is None:
|
|
70
|
+
# Special case for summarization path
|
|
71
|
+
available_tools = None
|
|
72
|
+
tool_choice = None
|
|
73
|
+
elif force_tool_call is not None:
|
|
74
|
+
assert tools is not None
|
|
75
|
+
tool_choice = {"type": "tool", "name": force_tool_call}
|
|
76
|
+
available_tools = [{"type": "function", "function": f} for f in tools if f["name"] == force_tool_call]
|
|
77
|
+
|
|
78
|
+
# need to have this setting to be able to put inner thoughts in kwargs
|
|
79
|
+
self.llm_config.put_inner_thoughts_in_kwargs = True
|
|
80
|
+
else:
|
|
81
|
+
if self.llm_config.put_inner_thoughts_in_kwargs:
|
|
82
|
+
# tool_choice_type other than "auto" only plays nice if thinking goes inside the tool calls
|
|
83
|
+
tool_choice = {"type": "any", "disable_parallel_tool_use": True}
|
|
84
|
+
else:
|
|
85
|
+
tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
|
|
86
|
+
available_tools = [{"type": "function", "function": f} for f in tools]
|
|
87
|
+
|
|
88
|
+
chat_completion_request = ChatCompletionRequest(
|
|
89
|
+
model=self.llm_config.model,
|
|
90
|
+
messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
|
|
91
|
+
tools=available_tools,
|
|
92
|
+
tool_choice=tool_choice,
|
|
93
|
+
max_tokens=self.llm_config.max_tokens, # Note: max_tokens is required for Anthropic API
|
|
94
|
+
temperature=self.llm_config.temperature,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
return _prepare_anthropic_request(
|
|
98
|
+
data=chat_completion_request,
|
|
99
|
+
put_inner_thoughts_in_kwargs=self.llm_config.put_inner_thoughts_in_kwargs,
|
|
100
|
+
extended_thinking=self.llm_config.enable_reasoner,
|
|
101
|
+
max_reasoning_tokens=self.llm_config.max_reasoning_tokens,
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
def convert_response_to_chat_completion(
|
|
105
|
+
self,
|
|
106
|
+
response_data: dict,
|
|
107
|
+
input_messages: List[PydanticMessage],
|
|
108
|
+
) -> ChatCompletionResponse:
|
|
109
|
+
"""
|
|
110
|
+
Example response from Claude 3:
|
|
111
|
+
response.json = {
|
|
112
|
+
'id': 'msg_01W1xg9hdRzbeN2CfZM7zD2w',
|
|
113
|
+
'type': 'message',
|
|
114
|
+
'role': 'assistant',
|
|
115
|
+
'content': [
|
|
116
|
+
{
|
|
117
|
+
'type': 'text',
|
|
118
|
+
'text': "<thinking>Analyzing user login event. This is Chad's first
|
|
119
|
+
interaction with me. I will adjust my personality and rapport accordingly.</thinking>"
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
'type':
|
|
123
|
+
'tool_use',
|
|
124
|
+
'id': 'toolu_01Ka4AuCmfvxiidnBZuNfP1u',
|
|
125
|
+
'name': 'core_memory_append',
|
|
126
|
+
'input': {
|
|
127
|
+
'name': 'human',
|
|
128
|
+
'content': 'Chad is logging in for the first time. I will aim to build a warm
|
|
129
|
+
and welcoming rapport.',
|
|
130
|
+
'request_heartbeat': True
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
],
|
|
134
|
+
'model': 'claude-3-haiku-20240307',
|
|
135
|
+
'stop_reason': 'tool_use',
|
|
136
|
+
'stop_sequence': None,
|
|
137
|
+
'usage': {
|
|
138
|
+
'input_tokens': 3305,
|
|
139
|
+
'output_tokens': 141
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
"""
|
|
143
|
+
response = AnthropicMessage(**response_data)
|
|
144
|
+
prompt_tokens = response.usage.input_tokens
|
|
145
|
+
completion_tokens = response.usage.output_tokens
|
|
146
|
+
finish_reason = remap_finish_reason(response.stop_reason)
|
|
147
|
+
|
|
148
|
+
content = None
|
|
149
|
+
reasoning_content = None
|
|
150
|
+
reasoning_content_signature = None
|
|
151
|
+
redacted_reasoning_content = None
|
|
152
|
+
tool_calls = None
|
|
153
|
+
|
|
154
|
+
if len(response.content) > 0:
|
|
155
|
+
for content_part in response.content:
|
|
156
|
+
if content_part.type == "text":
|
|
157
|
+
content = strip_xml_tags(string=content_part.text, tag="thinking")
|
|
158
|
+
if content_part.type == "tool_use":
|
|
159
|
+
tool_calls = [
|
|
160
|
+
ToolCall(
|
|
161
|
+
id=content_part.id,
|
|
162
|
+
type="function",
|
|
163
|
+
function=FunctionCall(
|
|
164
|
+
name=content_part.name,
|
|
165
|
+
arguments=json.dumps(content_part.input, indent=2),
|
|
166
|
+
),
|
|
167
|
+
)
|
|
168
|
+
]
|
|
169
|
+
if content_part.type == "thinking":
|
|
170
|
+
reasoning_content = content_part.thinking
|
|
171
|
+
reasoning_content_signature = content_part.signature
|
|
172
|
+
if content_part.type == "redacted_thinking":
|
|
173
|
+
redacted_reasoning_content = content_part.data
|
|
174
|
+
|
|
175
|
+
else:
|
|
176
|
+
raise RuntimeError("Unexpected empty content in response")
|
|
177
|
+
|
|
178
|
+
assert response.role == "assistant"
|
|
179
|
+
choice = Choice(
|
|
180
|
+
index=0,
|
|
181
|
+
finish_reason=finish_reason,
|
|
182
|
+
message=ChoiceMessage(
|
|
183
|
+
role=response.role,
|
|
184
|
+
content=content,
|
|
185
|
+
reasoning_content=reasoning_content,
|
|
186
|
+
reasoning_content_signature=reasoning_content_signature,
|
|
187
|
+
redacted_reasoning_content=redacted_reasoning_content,
|
|
188
|
+
tool_calls=tool_calls,
|
|
189
|
+
),
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
chat_completion_response = ChatCompletionResponse(
|
|
193
|
+
id=response.id,
|
|
194
|
+
choices=[choice],
|
|
195
|
+
created=get_utc_time(),
|
|
196
|
+
model=response.model,
|
|
197
|
+
usage=UsageStatistics(
|
|
198
|
+
prompt_tokens=prompt_tokens,
|
|
199
|
+
completion_tokens=completion_tokens,
|
|
200
|
+
total_tokens=prompt_tokens + completion_tokens,
|
|
201
|
+
),
|
|
202
|
+
)
|
|
203
|
+
if self.llm_config.put_inner_thoughts_in_kwargs:
|
|
204
|
+
chat_completion_response = unpack_all_inner_thoughts_from_kwargs(
|
|
205
|
+
response=chat_completion_response, inner_thoughts_key=INNER_THOUGHTS_KWARG
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
return chat_completion_response
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _prepare_anthropic_request(
|
|
212
|
+
data: ChatCompletionRequest,
|
|
213
|
+
inner_thoughts_xml_tag: Optional[str] = "thinking",
|
|
214
|
+
# if true, prefix fill the generation with the thinking tag
|
|
215
|
+
prefix_fill: bool = True,
|
|
216
|
+
# if true, put COT inside the tool calls instead of inside the content
|
|
217
|
+
put_inner_thoughts_in_kwargs: bool = False,
|
|
218
|
+
bedrock: bool = False,
|
|
219
|
+
# extended thinking related fields
|
|
220
|
+
# https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
|
|
221
|
+
extended_thinking: bool = False,
|
|
222
|
+
max_reasoning_tokens: Optional[int] = None,
|
|
223
|
+
) -> dict:
|
|
224
|
+
"""Prepare the request data for Anthropic API format."""
|
|
225
|
+
if extended_thinking:
|
|
226
|
+
assert (
|
|
227
|
+
max_reasoning_tokens is not None and max_reasoning_tokens < data.max_tokens
|
|
228
|
+
), "max tokens must be greater than thinking budget"
|
|
229
|
+
assert not put_inner_thoughts_in_kwargs, "extended thinking not compatible with put_inner_thoughts_in_kwargs"
|
|
230
|
+
# assert not prefix_fill, "extended thinking not compatible with prefix_fill"
|
|
231
|
+
# Silently disable prefix_fill for now
|
|
232
|
+
prefix_fill = False
|
|
233
|
+
|
|
234
|
+
# if needed, put inner thoughts as a kwarg for all tools
|
|
235
|
+
if data.tools and put_inner_thoughts_in_kwargs:
|
|
236
|
+
functions = add_inner_thoughts_to_functions(
|
|
237
|
+
functions=[t.function.model_dump() for t in data.tools],
|
|
238
|
+
inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
|
239
|
+
inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
|
|
240
|
+
)
|
|
241
|
+
data.tools = [Tool(function=f) for f in functions]
|
|
242
|
+
|
|
243
|
+
# convert the tools to Anthropic's payload format
|
|
244
|
+
anthropic_tools = None if data.tools is None else convert_tools_to_anthropic_format(data.tools)
|
|
245
|
+
|
|
246
|
+
# pydantic -> dict
|
|
247
|
+
data = data.model_dump(exclude_none=True)
|
|
248
|
+
|
|
249
|
+
if extended_thinking:
|
|
250
|
+
data["thinking"] = {
|
|
251
|
+
"type": "enabled",
|
|
252
|
+
"budget_tokens": max_reasoning_tokens,
|
|
253
|
+
}
|
|
254
|
+
# `temperature` may only be set to 1 when thinking is enabled. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking'
|
|
255
|
+
data["temperature"] = 1.0
|
|
256
|
+
|
|
257
|
+
if "functions" in data:
|
|
258
|
+
raise ValueError(f"'functions' unexpected in Anthropic API payload")
|
|
259
|
+
|
|
260
|
+
# Handle tools
|
|
261
|
+
if "tools" in data and data["tools"] is None:
|
|
262
|
+
data.pop("tools")
|
|
263
|
+
data.pop("tool_choice", None)
|
|
264
|
+
elif anthropic_tools is not None:
|
|
265
|
+
# TODO eventually enable parallel tool use
|
|
266
|
+
data["tools"] = anthropic_tools
|
|
267
|
+
|
|
268
|
+
# Move 'system' to the top level
|
|
269
|
+
assert data["messages"][0]["role"] == "system", f"Expected 'system' role in messages[0]:\n{data['messages'][0]}"
|
|
270
|
+
data["system"] = data["messages"][0]["content"]
|
|
271
|
+
data["messages"] = data["messages"][1:]
|
|
272
|
+
|
|
273
|
+
# Process messages
|
|
274
|
+
for message in data["messages"]:
|
|
275
|
+
if "content" not in message:
|
|
276
|
+
message["content"] = None
|
|
277
|
+
|
|
278
|
+
# Convert to Anthropic format
|
|
279
|
+
msg_objs = [
|
|
280
|
+
PydanticMessage.dict_to_message(
|
|
281
|
+
user_id=None,
|
|
282
|
+
agent_id=None,
|
|
283
|
+
openai_message_dict=m,
|
|
284
|
+
)
|
|
285
|
+
for m in data["messages"]
|
|
286
|
+
]
|
|
287
|
+
data["messages"] = [
|
|
288
|
+
m.to_anthropic_dict(
|
|
289
|
+
inner_thoughts_xml_tag=inner_thoughts_xml_tag,
|
|
290
|
+
put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
|
|
291
|
+
)
|
|
292
|
+
for m in msg_objs
|
|
293
|
+
]
|
|
294
|
+
|
|
295
|
+
# Ensure first message is user
|
|
296
|
+
if data["messages"][0]["role"] != "user":
|
|
297
|
+
data["messages"] = [{"role": "user", "content": DUMMY_FIRST_USER_MESSAGE}] + data["messages"]
|
|
298
|
+
|
|
299
|
+
# Handle alternating messages
|
|
300
|
+
data["messages"] = merge_tool_results_into_user_messages(data["messages"])
|
|
301
|
+
|
|
302
|
+
# Handle prefix fill (not compatible with inner-thouguhts-in-kwargs)
|
|
303
|
+
# https://docs.anthropic.com/en/api/messages#body-messages
|
|
304
|
+
# NOTE: cannot prefill with tools for opus:
|
|
305
|
+
# Your API request included an `assistant` message in the final position, which would pre-fill the `assistant` response. When using tools with "claude-3-opus-20240229"
|
|
306
|
+
if prefix_fill and not put_inner_thoughts_in_kwargs and "opus" not in data["model"]:
|
|
307
|
+
if not bedrock: # not support for bedrock
|
|
308
|
+
data["messages"].append(
|
|
309
|
+
# Start the thinking process for the assistant
|
|
310
|
+
{"role": "assistant", "content": f"<{inner_thoughts_xml_tag}>"},
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
# Validate max_tokens
|
|
314
|
+
assert "max_tokens" in data, data
|
|
315
|
+
|
|
316
|
+
# Remove OpenAI-specific fields
|
|
317
|
+
for field in ["frequency_penalty", "logprobs", "n", "top_p", "presence_penalty", "user", "stream"]:
|
|
318
|
+
data.pop(field, None)
|
|
319
|
+
|
|
320
|
+
return data
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def convert_tools_to_anthropic_format(tools: List[Tool]) -> List[dict]:
|
|
324
|
+
"""See: https://docs.anthropic.com/claude/docs/tool-use
|
|
325
|
+
|
|
326
|
+
OpenAI style:
|
|
327
|
+
"tools": [{
|
|
328
|
+
"type": "function",
|
|
329
|
+
"function": {
|
|
330
|
+
"name": "find_movies",
|
|
331
|
+
"description": "find ....",
|
|
332
|
+
"parameters": {
|
|
333
|
+
"type": "object",
|
|
334
|
+
"properties": {
|
|
335
|
+
PARAM: {
|
|
336
|
+
"type": PARAM_TYPE, # eg "string"
|
|
337
|
+
"description": PARAM_DESCRIPTION,
|
|
338
|
+
},
|
|
339
|
+
...
|
|
340
|
+
},
|
|
341
|
+
"required": List[str],
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
]
|
|
346
|
+
|
|
347
|
+
Anthropic style:
|
|
348
|
+
"tools": [{
|
|
349
|
+
"name": "find_movies",
|
|
350
|
+
"description": "find ....",
|
|
351
|
+
"input_schema": {
|
|
352
|
+
"type": "object",
|
|
353
|
+
"properties": {
|
|
354
|
+
PARAM: {
|
|
355
|
+
"type": PARAM_TYPE, # eg "string"
|
|
356
|
+
"description": PARAM_DESCRIPTION,
|
|
357
|
+
},
|
|
358
|
+
...
|
|
359
|
+
},
|
|
360
|
+
"required": List[str],
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
]
|
|
364
|
+
|
|
365
|
+
Two small differences:
|
|
366
|
+
- 1 level less of nesting
|
|
367
|
+
- "parameters" -> "input_schema"
|
|
368
|
+
"""
|
|
369
|
+
formatted_tools = []
|
|
370
|
+
for tool in tools:
|
|
371
|
+
formatted_tool = {
|
|
372
|
+
"name": tool.function.name,
|
|
373
|
+
"description": tool.function.description,
|
|
374
|
+
"input_schema": tool.function.parameters or {"type": "object", "properties": {}, "required": []},
|
|
375
|
+
}
|
|
376
|
+
formatted_tools.append(formatted_tool)
|
|
377
|
+
|
|
378
|
+
return formatted_tools
|
|
379
|
+
|
|
380
|
+
|
|
381
|
+
def merge_tool_results_into_user_messages(messages: List[dict]):
|
|
382
|
+
"""Anthropic API doesn't allow role 'tool'->'user' sequences
|
|
383
|
+
|
|
384
|
+
Example HTTP error:
|
|
385
|
+
messages: roles must alternate between "user" and "assistant", but found multiple "user" roles in a row
|
|
386
|
+
|
|
387
|
+
From: https://docs.anthropic.com/claude/docs/tool-use
|
|
388
|
+
You may be familiar with other APIs that return tool use as separate from the model's primary output,
|
|
389
|
+
or which use a special-purpose tool or function message role.
|
|
390
|
+
In contrast, Anthropic's models and API are built around alternating user and assistant messages,
|
|
391
|
+
where each message is an array of rich content blocks: text, image, tool_use, and tool_result.
|
|
392
|
+
"""
|
|
393
|
+
|
|
394
|
+
# TODO walk through the messages list
|
|
395
|
+
# When a dict (dict_A) with 'role' == 'user' is followed by a dict with 'role' == 'user' (dict B), do the following
|
|
396
|
+
# dict_A["content"] = dict_A["content"] + dict_B["content"]
|
|
397
|
+
|
|
398
|
+
# The result should be a new merged_messages list that doesn't have any back-to-back dicts with 'role' == 'user'
|
|
399
|
+
merged_messages = []
|
|
400
|
+
if not messages:
|
|
401
|
+
return merged_messages
|
|
402
|
+
|
|
403
|
+
# Start with the first message in the list
|
|
404
|
+
current_message = messages[0]
|
|
405
|
+
|
|
406
|
+
for next_message in messages[1:]:
|
|
407
|
+
if current_message["role"] == "user" and next_message["role"] == "user":
|
|
408
|
+
# Merge contents of the next user message into current one
|
|
409
|
+
current_content = (
|
|
410
|
+
current_message["content"]
|
|
411
|
+
if isinstance(current_message["content"], list)
|
|
412
|
+
else [{"type": "text", "text": current_message["content"]}]
|
|
413
|
+
)
|
|
414
|
+
next_content = (
|
|
415
|
+
next_message["content"]
|
|
416
|
+
if isinstance(next_message["content"], list)
|
|
417
|
+
else [{"type": "text", "text": next_message["content"]}]
|
|
418
|
+
)
|
|
419
|
+
merged_content = current_content + next_content
|
|
420
|
+
current_message["content"] = merged_content
|
|
421
|
+
else:
|
|
422
|
+
# Append the current message to result as it's complete
|
|
423
|
+
merged_messages.append(current_message)
|
|
424
|
+
# Move on to the next message
|
|
425
|
+
current_message = next_message
|
|
426
|
+
|
|
427
|
+
# Append the last processed message to the result
|
|
428
|
+
merged_messages.append(current_message)
|
|
429
|
+
|
|
430
|
+
return merged_messages
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
def remap_finish_reason(stop_reason: str) -> str:
|
|
434
|
+
"""Remap Anthropic's 'stop_reason' to OpenAI 'finish_reason'
|
|
435
|
+
|
|
436
|
+
OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
|
|
437
|
+
see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api
|
|
438
|
+
|
|
439
|
+
From: https://docs.anthropic.com/claude/reference/migrating-from-text-completions-to-messages#stop-reason
|
|
440
|
+
|
|
441
|
+
Messages have a stop_reason of one of the following values:
|
|
442
|
+
"end_turn": The conversational turn ended naturally.
|
|
443
|
+
"stop_sequence": One of your specified custom stop sequences was generated.
|
|
444
|
+
"max_tokens": (unchanged)
|
|
445
|
+
|
|
446
|
+
"""
|
|
447
|
+
if stop_reason == "end_turn":
|
|
448
|
+
return "stop"
|
|
449
|
+
elif stop_reason == "stop_sequence":
|
|
450
|
+
return "stop"
|
|
451
|
+
elif stop_reason == "max_tokens":
|
|
452
|
+
return "length"
|
|
453
|
+
elif stop_reason == "tool_use":
|
|
454
|
+
return "function_call"
|
|
455
|
+
else:
|
|
456
|
+
raise ValueError(f"Unexpected stop_reason: {stop_reason}")
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
def strip_xml_tags(string: str, tag: Optional[str]) -> str:
|
|
460
|
+
if tag is None:
|
|
461
|
+
return string
|
|
462
|
+
# Construct the regular expression pattern to find the start and end tags
|
|
463
|
+
tag_pattern = f"<{tag}.*?>|</{tag}>"
|
|
464
|
+
# Use the regular expression to replace the tags with an empty string
|
|
465
|
+
return re.sub(tag_pattern, "", string)
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
def strip_xml_tags_streaming(string: str, tag: Optional[str]) -> str:
|
|
469
|
+
if tag is None:
|
|
470
|
+
return string
|
|
471
|
+
|
|
472
|
+
# Handle common partial tag cases
|
|
473
|
+
parts_to_remove = [
|
|
474
|
+
"<", # Leftover start bracket
|
|
475
|
+
f"<{tag}", # Opening tag start
|
|
476
|
+
f"</{tag}", # Closing tag start
|
|
477
|
+
f"/{tag}>", # Closing tag end
|
|
478
|
+
f"{tag}>", # Opening tag end
|
|
479
|
+
f"/{tag}", # Partial closing tag without >
|
|
480
|
+
">", # Leftover end bracket
|
|
481
|
+
]
|
|
482
|
+
|
|
483
|
+
result = string
|
|
484
|
+
for part in parts_to_remove:
|
|
485
|
+
result = result.replace(part, "")
|
|
486
|
+
|
|
487
|
+
return result
|
letta/llm_api/llm_api_tools.py
CHANGED
|
@@ -15,7 +15,6 @@ from letta.llm_api.anthropic import (
|
|
|
15
15
|
from letta.llm_api.aws_bedrock import has_valid_aws_credentials
|
|
16
16
|
from letta.llm_api.azure_openai import azure_openai_chat_completions_request
|
|
17
17
|
from letta.llm_api.deepseek import build_deepseek_chat_completions_request, convert_deepseek_response_to_chatcompletion
|
|
18
|
-
from letta.llm_api.google_ai import convert_tools_to_google_ai_format, google_ai_chat_completions_request
|
|
19
18
|
from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
|
|
20
19
|
from letta.llm_api.openai import (
|
|
21
20
|
build_openai_chat_completions_request,
|
|
@@ -27,7 +26,7 @@ from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG
|
|
|
27
26
|
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
|
28
27
|
from letta.schemas.llm_config import LLMConfig
|
|
29
28
|
from letta.schemas.message import Message
|
|
30
|
-
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest,
|
|
29
|
+
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, cast_message_to_subtype
|
|
31
30
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
|
32
31
|
from letta.settings import ModelSettings
|
|
33
32
|
from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
|
|
@@ -314,65 +313,17 @@ def create(
|
|
|
314
313
|
|
|
315
314
|
return response
|
|
316
315
|
|
|
317
|
-
elif llm_config.model_endpoint_type == "google_ai":
|
|
318
|
-
if stream:
|
|
319
|
-
raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
|
|
320
|
-
if not use_tool_naming:
|
|
321
|
-
raise NotImplementedError("Only tool calling supported on Google AI API requests")
|
|
322
|
-
|
|
323
|
-
if functions is not None:
|
|
324
|
-
tools = [{"type": "function", "function": f} for f in functions]
|
|
325
|
-
tools = [Tool(**t) for t in tools]
|
|
326
|
-
tools = convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs)
|
|
327
|
-
else:
|
|
328
|
-
tools = None
|
|
329
|
-
|
|
330
|
-
return google_ai_chat_completions_request(
|
|
331
|
-
base_url=llm_config.model_endpoint,
|
|
332
|
-
model=llm_config.model,
|
|
333
|
-
api_key=model_settings.gemini_api_key,
|
|
334
|
-
# see structure of payload here: https://ai.google.dev/docs/function_calling
|
|
335
|
-
data=dict(
|
|
336
|
-
contents=[m.to_google_ai_dict() for m in messages],
|
|
337
|
-
tools=tools,
|
|
338
|
-
generation_config={"temperature": llm_config.temperature, "max_output_tokens": llm_config.max_tokens},
|
|
339
|
-
),
|
|
340
|
-
inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
|
|
341
|
-
)
|
|
342
|
-
|
|
343
|
-
elif llm_config.model_endpoint_type == "google_vertex":
|
|
344
|
-
from letta.llm_api.google_vertex import google_vertex_chat_completions_request
|
|
345
|
-
|
|
346
|
-
if stream:
|
|
347
|
-
raise NotImplementedError(f"Streaming not yet implemented for {llm_config.model_endpoint_type}")
|
|
348
|
-
if not use_tool_naming:
|
|
349
|
-
raise NotImplementedError("Only tool calling supported on Google Vertex AI API requests")
|
|
350
|
-
|
|
351
|
-
if functions is not None:
|
|
352
|
-
tools = [{"type": "function", "function": f} for f in functions]
|
|
353
|
-
tools = [Tool(**t) for t in tools]
|
|
354
|
-
tools = convert_tools_to_google_ai_format(tools, inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs)
|
|
355
|
-
else:
|
|
356
|
-
tools = None
|
|
357
|
-
|
|
358
|
-
config = {"tools": tools, "temperature": llm_config.temperature, "max_output_tokens": llm_config.max_tokens}
|
|
359
|
-
|
|
360
|
-
return google_vertex_chat_completions_request(
|
|
361
|
-
model=llm_config.model,
|
|
362
|
-
project_id=model_settings.google_cloud_project,
|
|
363
|
-
region=model_settings.google_cloud_location,
|
|
364
|
-
contents=[m.to_google_ai_dict() for m in messages],
|
|
365
|
-
config=config,
|
|
366
|
-
inner_thoughts_in_kwargs=llm_config.put_inner_thoughts_in_kwargs,
|
|
367
|
-
)
|
|
368
|
-
|
|
369
316
|
elif llm_config.model_endpoint_type == "anthropic":
|
|
370
317
|
if not use_tool_naming:
|
|
371
318
|
raise NotImplementedError("Only tool calling supported on Anthropic API requests")
|
|
372
319
|
|
|
373
320
|
# Force tool calling
|
|
374
321
|
tool_call = None
|
|
375
|
-
if
|
|
322
|
+
if functions is None:
|
|
323
|
+
# Special case for summarization path
|
|
324
|
+
tools = None
|
|
325
|
+
tool_choice = None
|
|
326
|
+
elif force_tool_call is not None:
|
|
376
327
|
# tool_call = {"type": "function", "function": {"name": force_tool_call}}
|
|
377
328
|
tool_choice = {"type": "tool", "name": force_tool_call}
|
|
378
329
|
tools = [{"type": "function", "function": f} for f in functions if f["name"] == force_tool_call]
|
letta/llm_api/llm_client.py
CHANGED
|
@@ -44,5 +44,11 @@ class LLMClient:
|
|
|
44
44
|
return GoogleVertexClient(
|
|
45
45
|
agent_id=agent_id, llm_config=llm_config, put_inner_thoughts_first=put_inner_thoughts_first, actor_id=actor_id
|
|
46
46
|
)
|
|
47
|
+
case "anthropic":
|
|
48
|
+
from letta.llm_api.anthropic_client import AnthropicClient
|
|
49
|
+
|
|
50
|
+
return AnthropicClient(
|
|
51
|
+
agent_id=agent_id, llm_config=llm_config, put_inner_thoughts_first=put_inner_thoughts_first, actor_id=actor_id
|
|
52
|
+
)
|
|
47
53
|
case _:
|
|
48
54
|
return None
|
letta/llm_api/llm_client_base.py
CHANGED
|
@@ -29,6 +29,7 @@ class LLMClientBase:
|
|
|
29
29
|
self.llm_config = llm_config
|
|
30
30
|
self.put_inner_thoughts_first = put_inner_thoughts_first
|
|
31
31
|
self.actor_id = actor_id
|
|
32
|
+
self.use_tool_naming = use_tool_naming
|
|
32
33
|
|
|
33
34
|
def send_llm_request(
|
|
34
35
|
self,
|
|
@@ -82,6 +83,7 @@ class LLMClientBase:
|
|
|
82
83
|
messages: List[Message],
|
|
83
84
|
tools: List[dict],
|
|
84
85
|
tool_call: Optional[str],
|
|
86
|
+
force_tool_call: Optional[str] = None,
|
|
85
87
|
) -> dict:
|
|
86
88
|
"""
|
|
87
89
|
Constructs a request object in the expected data format for this client.
|
letta/schemas/agent.py
CHANGED
|
@@ -244,6 +244,14 @@ class UpdateAgent(BaseModel):
|
|
|
244
244
|
None,
|
|
245
245
|
description="If set to True, the agent will not remember previous messages (though the agent will still retain state via core memory blocks and archival/recall memory). Not recommended unless you have an advanced use case.",
|
|
246
246
|
)
|
|
247
|
+
model: Optional[str] = Field(
|
|
248
|
+
None,
|
|
249
|
+
description="The LLM configuration handle used by the agent, specified in the format "
|
|
250
|
+
"provider/model-name, as an alternative to specifying llm_config.",
|
|
251
|
+
)
|
|
252
|
+
embedding: Optional[str] = Field(
|
|
253
|
+
None, description="The embedding configuration handle used by the agent, specified in the format provider/model-name."
|
|
254
|
+
)
|
|
247
255
|
|
|
248
256
|
class Config:
|
|
249
257
|
extra = "ignore" # Ignores extra fields
|