letta-nightly 0.6.45.dev20250329104117__py3-none-any.whl → 0.6.46.dev20250330104049__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of letta-nightly might be problematic. Click here for more details.
- letta/__init__.py +1 -1
- letta/agent.py +25 -8
- letta/agents/base_agent.py +6 -5
- letta/agents/letta_agent.py +323 -0
- letta/agents/voice_agent.py +4 -3
- letta/client/client.py +2 -0
- letta/dynamic_multi_agent.py +5 -5
- letta/errors.py +20 -0
- letta/helpers/tool_execution_helper.py +1 -1
- letta/helpers/tool_rule_solver.py +1 -1
- letta/llm_api/anthropic.py +2 -0
- letta/llm_api/anthropic_client.py +153 -167
- letta/llm_api/google_ai_client.py +112 -29
- letta/llm_api/llm_api_tools.py +5 -0
- letta/llm_api/llm_client.py +6 -7
- letta/llm_api/llm_client_base.py +38 -17
- letta/llm_api/openai.py +2 -0
- letta/orm/group.py +2 -5
- letta/round_robin_multi_agent.py +18 -7
- letta/schemas/group.py +6 -0
- letta/schemas/message.py +23 -14
- letta/schemas/openai/chat_completion_request.py +6 -1
- letta/schemas/providers.py +3 -3
- letta/serialize_schemas/marshmallow_agent.py +34 -10
- letta/serialize_schemas/pydantic_agent_schema.py +23 -3
- letta/server/rest_api/app.py +9 -0
- letta/server/rest_api/interface.py +25 -2
- letta/server/rest_api/optimistic_json_parser.py +1 -1
- letta/server/rest_api/routers/v1/agents.py +57 -23
- letta/server/rest_api/routers/v1/groups.py +72 -49
- letta/server/rest_api/routers/v1/sources.py +1 -0
- letta/server/rest_api/utils.py +0 -1
- letta/server/server.py +73 -80
- letta/server/startup.sh +1 -1
- letta/services/agent_manager.py +7 -0
- letta/services/group_manager.py +87 -29
- letta/services/message_manager.py +5 -0
- letta/services/tool_executor/async_tool_execution_sandbox.py +397 -0
- letta/services/tool_executor/tool_execution_manager.py +27 -0
- letta/services/{tool_execution_sandbox.py → tool_executor/tool_execution_sandbox.py} +40 -12
- letta/services/tool_executor/tool_executor.py +23 -6
- letta/settings.py +17 -1
- letta/supervisor_multi_agent.py +3 -1
- {letta_nightly-0.6.45.dev20250329104117.dist-info → letta_nightly-0.6.46.dev20250330104049.dist-info}/METADATA +1 -1
- {letta_nightly-0.6.45.dev20250329104117.dist-info → letta_nightly-0.6.46.dev20250330104049.dist-info}/RECORD +48 -46
- {letta_nightly-0.6.45.dev20250329104117.dist-info → letta_nightly-0.6.46.dev20250330104049.dist-info}/LICENSE +0 -0
- {letta_nightly-0.6.45.dev20250329104117.dist-info → letta_nightly-0.6.46.dev20250330104049.dist-info}/WHEEL +0 -0
- {letta_nightly-0.6.45.dev20250329104117.dist-info → letta_nightly-0.6.46.dev20250330104049.dist-info}/entry_points.txt +0 -0
|
@@ -7,12 +7,11 @@ from anthropic.types import Message as AnthropicMessage
|
|
|
7
7
|
|
|
8
8
|
from letta.helpers.datetime_helpers import get_utc_time
|
|
9
9
|
from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
|
|
10
|
-
from letta.llm_api.llm_api_tools import cast_message_to_subtype
|
|
11
10
|
from letta.llm_api.llm_client_base import LLMClientBase
|
|
12
11
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
|
13
12
|
from letta.log import get_logger
|
|
14
13
|
from letta.schemas.message import Message as PydanticMessage
|
|
15
|
-
from letta.schemas.openai.chat_completion_request import
|
|
14
|
+
from letta.schemas.openai.chat_completion_request import Tool
|
|
16
15
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall
|
|
17
16
|
from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
|
|
18
17
|
from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
|
|
@@ -26,20 +25,14 @@ logger = get_logger(__name__)
|
|
|
26
25
|
class AnthropicClient(LLMClientBase):
|
|
27
26
|
|
|
28
27
|
def request(self, request_data: dict) -> dict:
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
return response.model_dump()
|
|
33
|
-
except Exception as e:
|
|
34
|
-
self._handle_anthropic_error(e)
|
|
28
|
+
client = self._get_anthropic_client(async_client=False)
|
|
29
|
+
response = client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
|
|
30
|
+
return response.model_dump()
|
|
35
31
|
|
|
36
32
|
async def request_async(self, request_data: dict) -> dict:
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
return response.model_dump()
|
|
41
|
-
except Exception as e:
|
|
42
|
-
self._handle_anthropic_error(e)
|
|
33
|
+
client = self._get_anthropic_client(async_client=True)
|
|
34
|
+
response = await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
|
|
35
|
+
return response.model_dump()
|
|
43
36
|
|
|
44
37
|
def _get_anthropic_client(self, async_client: bool = False) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
|
|
45
38
|
override_key = ProviderManager().get_anthropic_override_key()
|
|
@@ -47,15 +40,6 @@ class AnthropicClient(LLMClientBase):
|
|
|
47
40
|
return anthropic.AsyncAnthropic(api_key=override_key) if override_key else anthropic.AsyncAnthropic()
|
|
48
41
|
return anthropic.Anthropic(api_key=override_key) if override_key else anthropic.Anthropic()
|
|
49
42
|
|
|
50
|
-
def _handle_anthropic_error(self, e: Exception):
|
|
51
|
-
if isinstance(e, anthropic.APIConnectionError):
|
|
52
|
-
logger.warning(f"[Anthropic] API connection error: {e.__cause__}")
|
|
53
|
-
elif isinstance(e, anthropic.RateLimitError):
|
|
54
|
-
logger.warning("[Anthropic] Rate limited (429). Consider backoff.")
|
|
55
|
-
elif isinstance(e, anthropic.APIStatusError):
|
|
56
|
-
logger.warning(f"[Anthropic] API status error: {e.status_code}, {e.response}")
|
|
57
|
-
raise e
|
|
58
|
-
|
|
59
43
|
def build_request_data(
|
|
60
44
|
self,
|
|
61
45
|
messages: List[PydanticMessage],
|
|
@@ -63,43 +47,157 @@ class AnthropicClient(LLMClientBase):
|
|
|
63
47
|
tool_call: Optional[str],
|
|
64
48
|
force_tool_call: Optional[str] = None,
|
|
65
49
|
) -> dict:
|
|
50
|
+
prefix_fill = True
|
|
66
51
|
if not self.use_tool_naming:
|
|
67
52
|
raise NotImplementedError("Only tool calling supported on Anthropic API requests")
|
|
68
53
|
|
|
69
|
-
if
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
else:
|
|
85
|
-
tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
|
|
86
|
-
available_tools = [{"type": "function", "function": f} for f in tools]
|
|
87
|
-
|
|
88
|
-
chat_completion_request = ChatCompletionRequest(
|
|
89
|
-
model=self.llm_config.model,
|
|
90
|
-
messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
|
|
91
|
-
tools=available_tools,
|
|
92
|
-
tool_choice=tool_choice,
|
|
93
|
-
max_tokens=self.llm_config.max_tokens, # Note: max_tokens is required for Anthropic API
|
|
94
|
-
temperature=self.llm_config.temperature,
|
|
95
|
-
)
|
|
54
|
+
if not self.llm_config.max_tokens:
|
|
55
|
+
raise ValueError("Max tokens must be set for anthropic")
|
|
56
|
+
|
|
57
|
+
data = {
|
|
58
|
+
"model": self.llm_config.model,
|
|
59
|
+
"max_tokens": self.llm_config.max_tokens,
|
|
60
|
+
"temperature": self.llm_config.temperature,
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
# Extended Thinking
|
|
64
|
+
if self.llm_config.enable_reasoner:
|
|
65
|
+
assert (
|
|
66
|
+
self.llm_config.max_reasoning_tokens is not None and self.llm_config.max_reasoning_tokens < self.llm_config.max_tokens
|
|
67
|
+
), "max tokens must be greater than thinking budget"
|
|
68
|
+
assert not self.llm_config.put_inner_thoughts_in_kwargs, "extended thinking not compatible with put_inner_thoughts_in_kwargs"
|
|
96
69
|
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
70
|
+
data["thinking"] = {
|
|
71
|
+
"type": "enabled",
|
|
72
|
+
"budget_tokens": self.llm_config.max_reasoning_tokens,
|
|
73
|
+
}
|
|
74
|
+
# `temperature` may only be set to 1 when thinking is enabled. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking'
|
|
75
|
+
data["temperature"] = 1.0
|
|
76
|
+
|
|
77
|
+
# Silently disable prefix_fill for now
|
|
78
|
+
prefix_fill = False
|
|
79
|
+
|
|
80
|
+
# Tools
|
|
81
|
+
tools_for_request = (
|
|
82
|
+
[Tool(function=f) for f in tools if f["name"] == force_tool_call]
|
|
83
|
+
if force_tool_call is not None
|
|
84
|
+
else [Tool(function=f) for f in tools]
|
|
102
85
|
)
|
|
86
|
+
if force_tool_call is not None:
|
|
87
|
+
self.llm_config.put_inner_thoughts_in_kwargs = True # why do we do this ?
|
|
88
|
+
|
|
89
|
+
# Add inner thoughts kwarg
|
|
90
|
+
if len(tools_for_request) > 0 and self.llm_config.put_inner_thoughts_in_kwargs:
|
|
91
|
+
tools_with_inner_thoughts = add_inner_thoughts_to_functions(
|
|
92
|
+
functions=[t.function.model_dump() for t in tools_for_request],
|
|
93
|
+
inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
|
94
|
+
inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
|
|
95
|
+
)
|
|
96
|
+
tools_for_request = [Tool(function=f) for f in tools_with_inner_thoughts]
|
|
97
|
+
|
|
98
|
+
if len(tools_for_request) > 0:
|
|
99
|
+
# TODO eventually enable parallel tool use
|
|
100
|
+
data["tools"] = convert_tools_to_anthropic_format(tools_for_request)
|
|
101
|
+
|
|
102
|
+
# Messages
|
|
103
|
+
inner_thoughts_xml_tag = "thinking"
|
|
104
|
+
data["messages"] = [
|
|
105
|
+
m.to_anthropic_dict(
|
|
106
|
+
inner_thoughts_xml_tag=inner_thoughts_xml_tag,
|
|
107
|
+
put_inner_thoughts_in_kwargs=self.llm_config.put_inner_thoughts_in_kwargs,
|
|
108
|
+
)
|
|
109
|
+
for m in messages
|
|
110
|
+
]
|
|
111
|
+
|
|
112
|
+
# Move 'system' to the top level
|
|
113
|
+
if data["messages"][0]["role"] != "system":
|
|
114
|
+
raise RuntimeError(f"First message is not a system message, instead has role {data["messages"][0]["role"]}")
|
|
115
|
+
|
|
116
|
+
data["system"] = data["messages"][0]["content"]
|
|
117
|
+
data["messages"] = data["messages"][1:]
|
|
118
|
+
|
|
119
|
+
# Ensure first message is user
|
|
120
|
+
if data["messages"][0]["role"] != "user":
|
|
121
|
+
data["messages"] = [{"role": "user", "content": DUMMY_FIRST_USER_MESSAGE}] + data["messages"]
|
|
122
|
+
|
|
123
|
+
# Handle alternating messages
|
|
124
|
+
data["messages"] = merge_tool_results_into_user_messages(data["messages"])
|
|
125
|
+
|
|
126
|
+
# Prefix fill
|
|
127
|
+
# https://docs.anthropic.com/en/api/messages#body-messages
|
|
128
|
+
# NOTE: cannot prefill with tools for opus:
|
|
129
|
+
# Your API request included an `assistant` message in the final position, which would pre-fill the `assistant` response. When using tools with "claude-3-opus-20240229"
|
|
130
|
+
if prefix_fill and not self.llm_config.put_inner_thoughts_in_kwargs and "opus" not in data["model"]:
|
|
131
|
+
data["messages"].append(
|
|
132
|
+
# Start the thinking process for the assistant
|
|
133
|
+
{"role": "assistant", "content": f"<{inner_thoughts_xml_tag}>"},
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
return data
|
|
137
|
+
|
|
138
|
+
def handle_llm_error(self, e: Exception) -> Exception:
|
|
139
|
+
if isinstance(e, anthropic.APIConnectionError):
|
|
140
|
+
logger.warning(f"[Anthropic] API connection error: {e.__cause__}")
|
|
141
|
+
return LLMConnectionError(
|
|
142
|
+
message=f"Failed to connect to Anthropic: {str(e)}",
|
|
143
|
+
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
|
144
|
+
details={"cause": str(e.__cause__) if e.__cause__ else None},
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
if isinstance(e, anthropic.RateLimitError):
|
|
148
|
+
logger.warning("[Anthropic] Rate limited (429). Consider backoff.")
|
|
149
|
+
return LLMRateLimitError(
|
|
150
|
+
message=f"Rate limited by Anthropic: {str(e)}",
|
|
151
|
+
code=ErrorCode.RATE_LIMIT_EXCEEDED,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
if isinstance(e, anthropic.BadRequestError):
|
|
155
|
+
logger.warning(f"[Anthropic] Bad request: {str(e)}")
|
|
156
|
+
return LLMBadRequestError(
|
|
157
|
+
message=f"Bad request to Anthropic: {str(e)}",
|
|
158
|
+
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
if isinstance(e, anthropic.AuthenticationError):
|
|
162
|
+
logger.warning(f"[Anthropic] Authentication error: {str(e)}")
|
|
163
|
+
return LLMAuthenticationError(
|
|
164
|
+
message=f"Authentication failed with Anthropic: {str(e)}",
|
|
165
|
+
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
if isinstance(e, anthropic.PermissionDeniedError):
|
|
169
|
+
logger.warning(f"[Anthropic] Permission denied: {str(e)}")
|
|
170
|
+
return LLMPermissionDeniedError(
|
|
171
|
+
message=f"Permission denied by Anthropic: {str(e)}",
|
|
172
|
+
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
if isinstance(e, anthropic.NotFoundError):
|
|
176
|
+
logger.warning(f"[Anthropic] Resource not found: {str(e)}")
|
|
177
|
+
return LLMNotFoundError(
|
|
178
|
+
message=f"Resource not found in Anthropic: {str(e)}",
|
|
179
|
+
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
if isinstance(e, anthropic.UnprocessableEntityError):
|
|
183
|
+
logger.warning(f"[Anthropic] Unprocessable entity: {str(e)}")
|
|
184
|
+
return LLMUnprocessableEntityError(
|
|
185
|
+
message=f"Invalid request content for Anthropic: {str(e)}",
|
|
186
|
+
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
if isinstance(e, anthropic.APIStatusError):
|
|
190
|
+
logger.warning(f"[Anthropic] API status error: {str(e)}")
|
|
191
|
+
return LLMServerError(
|
|
192
|
+
message=f"Anthropic API error: {str(e)}",
|
|
193
|
+
code=ErrorCode.INTERNAL_SERVER_ERROR,
|
|
194
|
+
details={
|
|
195
|
+
"status_code": e.status_code if hasattr(e, "status_code") else None,
|
|
196
|
+
"response": str(e.response) if hasattr(e, "response") else None,
|
|
197
|
+
},
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
return super().handle_llm_error(e)
|
|
103
201
|
|
|
104
202
|
def convert_response_to_chat_completion(
|
|
105
203
|
self,
|
|
@@ -208,118 +306,6 @@ class AnthropicClient(LLMClientBase):
|
|
|
208
306
|
return chat_completion_response
|
|
209
307
|
|
|
210
308
|
|
|
211
|
-
def _prepare_anthropic_request(
|
|
212
|
-
data: ChatCompletionRequest,
|
|
213
|
-
inner_thoughts_xml_tag: Optional[str] = "thinking",
|
|
214
|
-
# if true, prefix fill the generation with the thinking tag
|
|
215
|
-
prefix_fill: bool = True,
|
|
216
|
-
# if true, put COT inside the tool calls instead of inside the content
|
|
217
|
-
put_inner_thoughts_in_kwargs: bool = False,
|
|
218
|
-
bedrock: bool = False,
|
|
219
|
-
# extended thinking related fields
|
|
220
|
-
# https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
|
|
221
|
-
extended_thinking: bool = False,
|
|
222
|
-
max_reasoning_tokens: Optional[int] = None,
|
|
223
|
-
) -> dict:
|
|
224
|
-
"""Prepare the request data for Anthropic API format."""
|
|
225
|
-
if extended_thinking:
|
|
226
|
-
assert (
|
|
227
|
-
max_reasoning_tokens is not None and max_reasoning_tokens < data.max_tokens
|
|
228
|
-
), "max tokens must be greater than thinking budget"
|
|
229
|
-
assert not put_inner_thoughts_in_kwargs, "extended thinking not compatible with put_inner_thoughts_in_kwargs"
|
|
230
|
-
# assert not prefix_fill, "extended thinking not compatible with prefix_fill"
|
|
231
|
-
# Silently disable prefix_fill for now
|
|
232
|
-
prefix_fill = False
|
|
233
|
-
|
|
234
|
-
# if needed, put inner thoughts as a kwarg for all tools
|
|
235
|
-
if data.tools and put_inner_thoughts_in_kwargs:
|
|
236
|
-
functions = add_inner_thoughts_to_functions(
|
|
237
|
-
functions=[t.function.model_dump() for t in data.tools],
|
|
238
|
-
inner_thoughts_key=INNER_THOUGHTS_KWARG,
|
|
239
|
-
inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
|
|
240
|
-
)
|
|
241
|
-
data.tools = [Tool(function=f) for f in functions]
|
|
242
|
-
|
|
243
|
-
# convert the tools to Anthropic's payload format
|
|
244
|
-
anthropic_tools = None if data.tools is None else convert_tools_to_anthropic_format(data.tools)
|
|
245
|
-
|
|
246
|
-
# pydantic -> dict
|
|
247
|
-
data = data.model_dump(exclude_none=True)
|
|
248
|
-
|
|
249
|
-
if extended_thinking:
|
|
250
|
-
data["thinking"] = {
|
|
251
|
-
"type": "enabled",
|
|
252
|
-
"budget_tokens": max_reasoning_tokens,
|
|
253
|
-
}
|
|
254
|
-
# `temperature` may only be set to 1 when thinking is enabled. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking'
|
|
255
|
-
data["temperature"] = 1.0
|
|
256
|
-
|
|
257
|
-
if "functions" in data:
|
|
258
|
-
raise ValueError(f"'functions' unexpected in Anthropic API payload")
|
|
259
|
-
|
|
260
|
-
# Handle tools
|
|
261
|
-
if "tools" in data and data["tools"] is None:
|
|
262
|
-
data.pop("tools")
|
|
263
|
-
data.pop("tool_choice", None)
|
|
264
|
-
elif anthropic_tools is not None:
|
|
265
|
-
# TODO eventually enable parallel tool use
|
|
266
|
-
data["tools"] = anthropic_tools
|
|
267
|
-
|
|
268
|
-
# Move 'system' to the top level
|
|
269
|
-
assert data["messages"][0]["role"] == "system", f"Expected 'system' role in messages[0]:\n{data['messages'][0]}"
|
|
270
|
-
data["system"] = data["messages"][0]["content"]
|
|
271
|
-
data["messages"] = data["messages"][1:]
|
|
272
|
-
|
|
273
|
-
# Process messages
|
|
274
|
-
for message in data["messages"]:
|
|
275
|
-
if "content" not in message:
|
|
276
|
-
message["content"] = None
|
|
277
|
-
|
|
278
|
-
# Convert to Anthropic format
|
|
279
|
-
msg_objs = [
|
|
280
|
-
PydanticMessage.dict_to_message(
|
|
281
|
-
user_id=None,
|
|
282
|
-
agent_id=None,
|
|
283
|
-
openai_message_dict=m,
|
|
284
|
-
)
|
|
285
|
-
for m in data["messages"]
|
|
286
|
-
]
|
|
287
|
-
data["messages"] = [
|
|
288
|
-
m.to_anthropic_dict(
|
|
289
|
-
inner_thoughts_xml_tag=inner_thoughts_xml_tag,
|
|
290
|
-
put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
|
|
291
|
-
)
|
|
292
|
-
for m in msg_objs
|
|
293
|
-
]
|
|
294
|
-
|
|
295
|
-
# Ensure first message is user
|
|
296
|
-
if data["messages"][0]["role"] != "user":
|
|
297
|
-
data["messages"] = [{"role": "user", "content": DUMMY_FIRST_USER_MESSAGE}] + data["messages"]
|
|
298
|
-
|
|
299
|
-
# Handle alternating messages
|
|
300
|
-
data["messages"] = merge_tool_results_into_user_messages(data["messages"])
|
|
301
|
-
|
|
302
|
-
# Handle prefix fill (not compatible with inner-thouguhts-in-kwargs)
|
|
303
|
-
# https://docs.anthropic.com/en/api/messages#body-messages
|
|
304
|
-
# NOTE: cannot prefill with tools for opus:
|
|
305
|
-
# Your API request included an `assistant` message in the final position, which would pre-fill the `assistant` response. When using tools with "claude-3-opus-20240229"
|
|
306
|
-
if prefix_fill and not put_inner_thoughts_in_kwargs and "opus" not in data["model"]:
|
|
307
|
-
if not bedrock: # not support for bedrock
|
|
308
|
-
data["messages"].append(
|
|
309
|
-
# Start the thinking process for the assistant
|
|
310
|
-
{"role": "assistant", "content": f"<{inner_thoughts_xml_tag}>"},
|
|
311
|
-
)
|
|
312
|
-
|
|
313
|
-
# Validate max_tokens
|
|
314
|
-
assert "max_tokens" in data, data
|
|
315
|
-
|
|
316
|
-
# Remove OpenAI-specific fields
|
|
317
|
-
for field in ["frequency_penalty", "logprobs", "n", "top_p", "presence_penalty", "user", "stream"]:
|
|
318
|
-
data.pop(field, None)
|
|
319
|
-
|
|
320
|
-
return data
|
|
321
|
-
|
|
322
|
-
|
|
323
309
|
def convert_tools_to_anthropic_format(tools: List[Tool]) -> List[dict]:
|
|
324
310
|
"""See: https://docs.anthropic.com/claude/docs/tool-use
|
|
325
311
|
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import uuid
|
|
2
2
|
from typing import List, Optional, Tuple
|
|
3
3
|
|
|
4
|
+
import requests
|
|
5
|
+
|
|
4
6
|
from letta.constants import NON_USER_MSG_PREFIX
|
|
5
7
|
from letta.helpers.datetime_helpers import get_utc_time
|
|
6
8
|
from letta.helpers.json_helpers import json_dumps
|
|
@@ -21,7 +23,13 @@ class GoogleAIClient(LLMClientBase):
|
|
|
21
23
|
"""
|
|
22
24
|
Performs underlying request to llm and returns raw response.
|
|
23
25
|
"""
|
|
24
|
-
url, headers =
|
|
26
|
+
url, headers = get_gemini_endpoint_and_headers(
|
|
27
|
+
base_url=str(self.llm_config.model_endpoint),
|
|
28
|
+
model=self.llm_config.model,
|
|
29
|
+
api_key=str(model_settings.gemini_api_key),
|
|
30
|
+
key_in_header=True,
|
|
31
|
+
generate_content=True,
|
|
32
|
+
)
|
|
25
33
|
return make_post_request(url, headers, request_data)
|
|
26
34
|
|
|
27
35
|
def build_request_data(
|
|
@@ -208,34 +216,6 @@ class GoogleAIClient(LLMClientBase):
|
|
|
208
216
|
except KeyError as e:
|
|
209
217
|
raise e
|
|
210
218
|
|
|
211
|
-
def get_gemini_endpoint_and_headers(
|
|
212
|
-
self,
|
|
213
|
-
key_in_header: bool = True,
|
|
214
|
-
generate_content: bool = False,
|
|
215
|
-
) -> Tuple[str, dict]:
|
|
216
|
-
"""
|
|
217
|
-
Dynamically generate the model endpoint and headers.
|
|
218
|
-
"""
|
|
219
|
-
|
|
220
|
-
url = f"{self.llm_config.model_endpoint}/v1beta/models"
|
|
221
|
-
|
|
222
|
-
# Add the model
|
|
223
|
-
url += f"/{self.llm_config.model}"
|
|
224
|
-
|
|
225
|
-
# Add extension for generating content if we're hitting the LM
|
|
226
|
-
if generate_content:
|
|
227
|
-
url += ":generateContent"
|
|
228
|
-
|
|
229
|
-
# Decide if api key should be in header or not
|
|
230
|
-
# Two ways to pass the key: https://ai.google.dev/tutorials/setup
|
|
231
|
-
if key_in_header:
|
|
232
|
-
headers = {"Content-Type": "application/json", "x-goog-api-key": model_settings.gemini_api_key}
|
|
233
|
-
else:
|
|
234
|
-
url += f"?key={model_settings.gemini_api_key}"
|
|
235
|
-
headers = {"Content-Type": "application/json"}
|
|
236
|
-
|
|
237
|
-
return url, headers
|
|
238
|
-
|
|
239
219
|
def convert_tools_to_google_ai_format(self, tools: List[Tool]) -> List[dict]:
|
|
240
220
|
"""
|
|
241
221
|
OpenAI style:
|
|
@@ -330,3 +310,106 @@ class GoogleAIClient(LLMClientBase):
|
|
|
330
310
|
messages_with_padding.append(dummy_yield_message)
|
|
331
311
|
|
|
332
312
|
return messages_with_padding
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def get_gemini_endpoint_and_headers(
|
|
316
|
+
base_url: str, model: Optional[str], api_key: str, key_in_header: bool = True, generate_content: bool = False
|
|
317
|
+
) -> Tuple[str, dict]:
|
|
318
|
+
"""
|
|
319
|
+
Dynamically generate the model endpoint and headers.
|
|
320
|
+
"""
|
|
321
|
+
url = f"{base_url}/v1beta/models"
|
|
322
|
+
|
|
323
|
+
# Add the model
|
|
324
|
+
if model is not None:
|
|
325
|
+
url += f"/{model}"
|
|
326
|
+
|
|
327
|
+
# Add extension for generating content if we're hitting the LM
|
|
328
|
+
if generate_content:
|
|
329
|
+
url += ":generateContent"
|
|
330
|
+
|
|
331
|
+
# Decide if api key should be in header or not
|
|
332
|
+
# Two ways to pass the key: https://ai.google.dev/tutorials/setup
|
|
333
|
+
if key_in_header:
|
|
334
|
+
headers = {"Content-Type": "application/json", "x-goog-api-key": api_key}
|
|
335
|
+
else:
|
|
336
|
+
url += f"?key={api_key}"
|
|
337
|
+
headers = {"Content-Type": "application/json"}
|
|
338
|
+
|
|
339
|
+
return url, headers
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
def google_ai_get_model_list(base_url: str, api_key: str, key_in_header: bool = True) -> List[dict]:
|
|
343
|
+
from letta.utils import printd
|
|
344
|
+
|
|
345
|
+
url, headers = get_gemini_endpoint_and_headers(base_url, None, api_key, key_in_header)
|
|
346
|
+
|
|
347
|
+
try:
|
|
348
|
+
response = requests.get(url, headers=headers)
|
|
349
|
+
response.raise_for_status() # Raises HTTPError for 4XX/5XX status
|
|
350
|
+
response = response.json() # convert to dict from string
|
|
351
|
+
|
|
352
|
+
# Grab the models out
|
|
353
|
+
model_list = response["models"]
|
|
354
|
+
return model_list
|
|
355
|
+
|
|
356
|
+
except requests.exceptions.HTTPError as http_err:
|
|
357
|
+
# Handle HTTP errors (e.g., response 4XX, 5XX)
|
|
358
|
+
printd(f"Got HTTPError, exception={http_err}")
|
|
359
|
+
# Print the HTTP status code
|
|
360
|
+
print(f"HTTP Error: {http_err.response.status_code}")
|
|
361
|
+
# Print the response content (error message from server)
|
|
362
|
+
print(f"Message: {http_err.response.text}")
|
|
363
|
+
raise http_err
|
|
364
|
+
|
|
365
|
+
except requests.exceptions.RequestException as req_err:
|
|
366
|
+
# Handle other requests-related errors (e.g., connection error)
|
|
367
|
+
printd(f"Got RequestException, exception={req_err}")
|
|
368
|
+
raise req_err
|
|
369
|
+
|
|
370
|
+
except Exception as e:
|
|
371
|
+
# Handle other potential errors
|
|
372
|
+
printd(f"Got unknown Exception, exception={e}")
|
|
373
|
+
raise e
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def google_ai_get_model_details(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> List[dict]:
|
|
377
|
+
from letta.utils import printd
|
|
378
|
+
|
|
379
|
+
url, headers = get_gemini_endpoint_and_headers(base_url, model, api_key, key_in_header)
|
|
380
|
+
|
|
381
|
+
try:
|
|
382
|
+
response = requests.get(url, headers=headers)
|
|
383
|
+
printd(f"response = {response}")
|
|
384
|
+
response.raise_for_status() # Raises HTTPError for 4XX/5XX status
|
|
385
|
+
response = response.json() # convert to dict from string
|
|
386
|
+
printd(f"response.json = {response}")
|
|
387
|
+
|
|
388
|
+
# Grab the models out
|
|
389
|
+
return response
|
|
390
|
+
|
|
391
|
+
except requests.exceptions.HTTPError as http_err:
|
|
392
|
+
# Handle HTTP errors (e.g., response 4XX, 5XX)
|
|
393
|
+
printd(f"Got HTTPError, exception={http_err}")
|
|
394
|
+
# Print the HTTP status code
|
|
395
|
+
print(f"HTTP Error: {http_err.response.status_code}")
|
|
396
|
+
# Print the response content (error message from server)
|
|
397
|
+
print(f"Message: {http_err.response.text}")
|
|
398
|
+
raise http_err
|
|
399
|
+
|
|
400
|
+
except requests.exceptions.RequestException as req_err:
|
|
401
|
+
# Handle other requests-related errors (e.g., connection error)
|
|
402
|
+
printd(f"Got RequestException, exception={req_err}")
|
|
403
|
+
raise req_err
|
|
404
|
+
|
|
405
|
+
except Exception as e:
|
|
406
|
+
# Handle other potential errors
|
|
407
|
+
printd(f"Got unknown Exception, exception={e}")
|
|
408
|
+
raise e
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def google_ai_get_model_context_window(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> int:
|
|
412
|
+
model_details = google_ai_get_model_details(base_url=base_url, api_key=api_key, model=model, key_in_header=key_in_header)
|
|
413
|
+
# TODO should this be:
|
|
414
|
+
# return model_details["inputTokenLimit"] + model_details["outputTokenLimit"]
|
|
415
|
+
return int(model_details["inputTokenLimit"])
|
letta/llm_api/llm_api_tools.py
CHANGED
|
@@ -140,6 +140,7 @@ def create(
|
|
|
140
140
|
stream_interface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
|
|
141
141
|
model_settings: Optional[dict] = None, # TODO: eventually pass from server
|
|
142
142
|
put_inner_thoughts_first: bool = True,
|
|
143
|
+
name: Optional[str] = None,
|
|
143
144
|
) -> ChatCompletionResponse:
|
|
144
145
|
"""Return response to chat completion with backoff"""
|
|
145
146
|
from letta.utils import printd
|
|
@@ -206,6 +207,7 @@ def create(
|
|
|
206
207
|
api_key=api_key,
|
|
207
208
|
chat_completion_request=data,
|
|
208
209
|
stream_interface=stream_interface,
|
|
210
|
+
name=name,
|
|
209
211
|
)
|
|
210
212
|
else: # Client did not request token streaming (expect a blocking backend response)
|
|
211
213
|
data.stream = False
|
|
@@ -255,6 +257,7 @@ def create(
|
|
|
255
257
|
api_key=api_key,
|
|
256
258
|
chat_completion_request=data,
|
|
257
259
|
stream_interface=stream_interface,
|
|
260
|
+
name=name,
|
|
258
261
|
)
|
|
259
262
|
else: # Client did not request token streaming (expect a blocking backend response)
|
|
260
263
|
data.stream = False
|
|
@@ -359,6 +362,7 @@ def create(
|
|
|
359
362
|
stream_interface=stream_interface,
|
|
360
363
|
extended_thinking=llm_config.enable_reasoner,
|
|
361
364
|
max_reasoning_tokens=llm_config.max_reasoning_tokens,
|
|
365
|
+
name=name,
|
|
362
366
|
)
|
|
363
367
|
|
|
364
368
|
else:
|
|
@@ -531,6 +535,7 @@ def create(
|
|
|
531
535
|
api_key=model_settings.deepseek_api_key,
|
|
532
536
|
chat_completion_request=data,
|
|
533
537
|
stream_interface=stream_interface,
|
|
538
|
+
name=name,
|
|
534
539
|
)
|
|
535
540
|
else: # Client did not request token streaming (expect a blocking backend response)
|
|
536
541
|
data.stream = False
|
letta/llm_api/llm_client.py
CHANGED
|
@@ -9,21 +9,17 @@ class LLMClient:
|
|
|
9
9
|
|
|
10
10
|
@staticmethod
|
|
11
11
|
def create(
|
|
12
|
-
agent_id: str,
|
|
13
12
|
llm_config: LLMConfig,
|
|
14
13
|
put_inner_thoughts_first: bool = True,
|
|
15
|
-
actor_id: Optional[str] = None,
|
|
16
14
|
) -> Optional[LLMClientBase]:
|
|
17
15
|
"""
|
|
18
16
|
Create an LLM client based on the model endpoint type.
|
|
19
17
|
|
|
20
18
|
Args:
|
|
21
|
-
agent_id: Unique identifier for the agent
|
|
22
19
|
llm_config: Configuration for the LLM model
|
|
23
20
|
put_inner_thoughts_first: Whether to put inner thoughts first in the response
|
|
24
21
|
use_structured_output: Whether to use structured output
|
|
25
22
|
use_tool_naming: Whether to use tool naming
|
|
26
|
-
actor_id: Optional actor identifier
|
|
27
23
|
|
|
28
24
|
Returns:
|
|
29
25
|
An instance of LLMClientBase subclass
|
|
@@ -36,19 +32,22 @@ class LLMClient:
|
|
|
36
32
|
from letta.llm_api.google_ai_client import GoogleAIClient
|
|
37
33
|
|
|
38
34
|
return GoogleAIClient(
|
|
39
|
-
|
|
35
|
+
llm_config=llm_config,
|
|
36
|
+
put_inner_thoughts_first=put_inner_thoughts_first,
|
|
40
37
|
)
|
|
41
38
|
case "google_vertex":
|
|
42
39
|
from letta.llm_api.google_vertex_client import GoogleVertexClient
|
|
43
40
|
|
|
44
41
|
return GoogleVertexClient(
|
|
45
|
-
|
|
42
|
+
llm_config=llm_config,
|
|
43
|
+
put_inner_thoughts_first=put_inner_thoughts_first,
|
|
46
44
|
)
|
|
47
45
|
case "anthropic":
|
|
48
46
|
from letta.llm_api.anthropic_client import AnthropicClient
|
|
49
47
|
|
|
50
48
|
return AnthropicClient(
|
|
51
|
-
|
|
49
|
+
llm_config=llm_config,
|
|
50
|
+
put_inner_thoughts_first=put_inner_thoughts_first,
|
|
52
51
|
)
|
|
53
52
|
case _:
|
|
54
53
|
return None
|