letta-nightly 0.6.45.dev20250328104141__py3-none-any.whl → 0.6.46.dev20250330050944__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (48) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +25 -8
  3. letta/agents/base_agent.py +6 -5
  4. letta/agents/letta_agent.py +323 -0
  5. letta/agents/voice_agent.py +4 -3
  6. letta/client/client.py +2 -0
  7. letta/dynamic_multi_agent.py +5 -5
  8. letta/errors.py +20 -0
  9. letta/helpers/tool_execution_helper.py +1 -1
  10. letta/helpers/tool_rule_solver.py +1 -1
  11. letta/llm_api/anthropic.py +2 -0
  12. letta/llm_api/anthropic_client.py +153 -167
  13. letta/llm_api/google_ai_client.py +112 -29
  14. letta/llm_api/llm_api_tools.py +5 -0
  15. letta/llm_api/llm_client.py +6 -7
  16. letta/llm_api/llm_client_base.py +38 -17
  17. letta/llm_api/openai.py +2 -0
  18. letta/orm/group.py +2 -5
  19. letta/round_robin_multi_agent.py +18 -7
  20. letta/schemas/group.py +6 -0
  21. letta/schemas/message.py +23 -14
  22. letta/schemas/openai/chat_completion_request.py +6 -1
  23. letta/schemas/providers.py +3 -3
  24. letta/serialize_schemas/marshmallow_agent.py +34 -10
  25. letta/serialize_schemas/pydantic_agent_schema.py +23 -3
  26. letta/server/rest_api/app.py +9 -0
  27. letta/server/rest_api/interface.py +25 -2
  28. letta/server/rest_api/optimistic_json_parser.py +1 -1
  29. letta/server/rest_api/routers/v1/agents.py +57 -23
  30. letta/server/rest_api/routers/v1/groups.py +72 -49
  31. letta/server/rest_api/routers/v1/sources.py +1 -0
  32. letta/server/rest_api/utils.py +0 -1
  33. letta/server/server.py +73 -80
  34. letta/server/startup.sh +1 -1
  35. letta/services/agent_manager.py +7 -0
  36. letta/services/group_manager.py +87 -29
  37. letta/services/message_manager.py +5 -0
  38. letta/services/tool_executor/async_tool_execution_sandbox.py +397 -0
  39. letta/services/tool_executor/tool_execution_manager.py +27 -0
  40. letta/services/{tool_execution_sandbox.py → tool_executor/tool_execution_sandbox.py} +40 -12
  41. letta/services/tool_executor/tool_executor.py +23 -6
  42. letta/settings.py +17 -1
  43. letta/supervisor_multi_agent.py +3 -1
  44. {letta_nightly-0.6.45.dev20250328104141.dist-info → letta_nightly-0.6.46.dev20250330050944.dist-info}/METADATA +1 -1
  45. {letta_nightly-0.6.45.dev20250328104141.dist-info → letta_nightly-0.6.46.dev20250330050944.dist-info}/RECORD +48 -46
  46. {letta_nightly-0.6.45.dev20250328104141.dist-info → letta_nightly-0.6.46.dev20250330050944.dist-info}/LICENSE +0 -0
  47. {letta_nightly-0.6.45.dev20250328104141.dist-info → letta_nightly-0.6.46.dev20250330050944.dist-info}/WHEEL +0 -0
  48. {letta_nightly-0.6.45.dev20250328104141.dist-info → letta_nightly-0.6.46.dev20250330050944.dist-info}/entry_points.txt +0 -0
@@ -7,12 +7,11 @@ from anthropic.types import Message as AnthropicMessage
7
7
 
8
8
  from letta.helpers.datetime_helpers import get_utc_time
9
9
  from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_inner_thoughts_from_kwargs
10
- from letta.llm_api.llm_api_tools import cast_message_to_subtype
11
10
  from letta.llm_api.llm_client_base import LLMClientBase
12
11
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
13
12
  from letta.log import get_logger
14
13
  from letta.schemas.message import Message as PydanticMessage
15
- from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
14
+ from letta.schemas.openai.chat_completion_request import Tool
16
15
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall
17
16
  from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
18
17
  from letta.schemas.openai.chat_completion_response import ToolCall, UsageStatistics
@@ -26,20 +25,14 @@ logger = get_logger(__name__)
26
25
  class AnthropicClient(LLMClientBase):
27
26
 
28
27
  def request(self, request_data: dict) -> dict:
29
- try:
30
- client = self._get_anthropic_client(async_client=False)
31
- response = client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
32
- return response.model_dump()
33
- except Exception as e:
34
- self._handle_anthropic_error(e)
28
+ client = self._get_anthropic_client(async_client=False)
29
+ response = client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
30
+ return response.model_dump()
35
31
 
36
32
  async def request_async(self, request_data: dict) -> dict:
37
- try:
38
- client = self._get_anthropic_client(async_client=True)
39
- response = await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
40
- return response.model_dump()
41
- except Exception as e:
42
- self._handle_anthropic_error(e)
33
+ client = self._get_anthropic_client(async_client=True)
34
+ response = await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
35
+ return response.model_dump()
43
36
 
44
37
  def _get_anthropic_client(self, async_client: bool = False) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
45
38
  override_key = ProviderManager().get_anthropic_override_key()
@@ -47,15 +40,6 @@ class AnthropicClient(LLMClientBase):
47
40
  return anthropic.AsyncAnthropic(api_key=override_key) if override_key else anthropic.AsyncAnthropic()
48
41
  return anthropic.Anthropic(api_key=override_key) if override_key else anthropic.Anthropic()
49
42
 
50
- def _handle_anthropic_error(self, e: Exception):
51
- if isinstance(e, anthropic.APIConnectionError):
52
- logger.warning(f"[Anthropic] API connection error: {e.__cause__}")
53
- elif isinstance(e, anthropic.RateLimitError):
54
- logger.warning("[Anthropic] Rate limited (429). Consider backoff.")
55
- elif isinstance(e, anthropic.APIStatusError):
56
- logger.warning(f"[Anthropic] API status error: {e.status_code}, {e.response}")
57
- raise e
58
-
59
43
  def build_request_data(
60
44
  self,
61
45
  messages: List[PydanticMessage],
@@ -63,43 +47,157 @@ class AnthropicClient(LLMClientBase):
63
47
  tool_call: Optional[str],
64
48
  force_tool_call: Optional[str] = None,
65
49
  ) -> dict:
50
+ prefix_fill = True
66
51
  if not self.use_tool_naming:
67
52
  raise NotImplementedError("Only tool calling supported on Anthropic API requests")
68
53
 
69
- if tools is None:
70
- # Special case for summarization path
71
- available_tools = None
72
- tool_choice = None
73
- elif force_tool_call is not None:
74
- assert tools is not None
75
- tool_choice = {"type": "tool", "name": force_tool_call}
76
- available_tools = [{"type": "function", "function": f} for f in tools if f["name"] == force_tool_call]
77
-
78
- # need to have this setting to be able to put inner thoughts in kwargs
79
- self.llm_config.put_inner_thoughts_in_kwargs = True
80
- else:
81
- if self.llm_config.put_inner_thoughts_in_kwargs:
82
- # tool_choice_type other than "auto" only plays nice if thinking goes inside the tool calls
83
- tool_choice = {"type": "any", "disable_parallel_tool_use": True}
84
- else:
85
- tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
86
- available_tools = [{"type": "function", "function": f} for f in tools]
87
-
88
- chat_completion_request = ChatCompletionRequest(
89
- model=self.llm_config.model,
90
- messages=[cast_message_to_subtype(m.to_openai_dict()) for m in messages],
91
- tools=available_tools,
92
- tool_choice=tool_choice,
93
- max_tokens=self.llm_config.max_tokens, # Note: max_tokens is required for Anthropic API
94
- temperature=self.llm_config.temperature,
95
- )
54
+ if not self.llm_config.max_tokens:
55
+ raise ValueError("Max tokens must be set for anthropic")
56
+
57
+ data = {
58
+ "model": self.llm_config.model,
59
+ "max_tokens": self.llm_config.max_tokens,
60
+ "temperature": self.llm_config.temperature,
61
+ }
62
+
63
+ # Extended Thinking
64
+ if self.llm_config.enable_reasoner:
65
+ assert (
66
+ self.llm_config.max_reasoning_tokens is not None and self.llm_config.max_reasoning_tokens < self.llm_config.max_tokens
67
+ ), "max tokens must be greater than thinking budget"
68
+ assert not self.llm_config.put_inner_thoughts_in_kwargs, "extended thinking not compatible with put_inner_thoughts_in_kwargs"
96
69
 
97
- return _prepare_anthropic_request(
98
- data=chat_completion_request,
99
- put_inner_thoughts_in_kwargs=self.llm_config.put_inner_thoughts_in_kwargs,
100
- extended_thinking=self.llm_config.enable_reasoner,
101
- max_reasoning_tokens=self.llm_config.max_reasoning_tokens,
70
+ data["thinking"] = {
71
+ "type": "enabled",
72
+ "budget_tokens": self.llm_config.max_reasoning_tokens,
73
+ }
74
+ # `temperature` may only be set to 1 when thinking is enabled. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking'
75
+ data["temperature"] = 1.0
76
+
77
+ # Silently disable prefix_fill for now
78
+ prefix_fill = False
79
+
80
+ # Tools
81
+ tools_for_request = (
82
+ [Tool(function=f) for f in tools if f["name"] == force_tool_call]
83
+ if force_tool_call is not None
84
+ else [Tool(function=f) for f in tools]
102
85
  )
86
+ if force_tool_call is not None:
87
+ self.llm_config.put_inner_thoughts_in_kwargs = True # why do we do this ?
88
+
89
+ # Add inner thoughts kwarg
90
+ if len(tools_for_request) > 0 and self.llm_config.put_inner_thoughts_in_kwargs:
91
+ tools_with_inner_thoughts = add_inner_thoughts_to_functions(
92
+ functions=[t.function.model_dump() for t in tools_for_request],
93
+ inner_thoughts_key=INNER_THOUGHTS_KWARG,
94
+ inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
95
+ )
96
+ tools_for_request = [Tool(function=f) for f in tools_with_inner_thoughts]
97
+
98
+ if len(tools_for_request) > 0:
99
+ # TODO eventually enable parallel tool use
100
+ data["tools"] = convert_tools_to_anthropic_format(tools_for_request)
101
+
102
+ # Messages
103
+ inner_thoughts_xml_tag = "thinking"
104
+ data["messages"] = [
105
+ m.to_anthropic_dict(
106
+ inner_thoughts_xml_tag=inner_thoughts_xml_tag,
107
+ put_inner_thoughts_in_kwargs=self.llm_config.put_inner_thoughts_in_kwargs,
108
+ )
109
+ for m in messages
110
+ ]
111
+
112
+ # Move 'system' to the top level
113
+ if data["messages"][0]["role"] != "system":
114
+ raise RuntimeError(f"First message is not a system message, instead has role {data["messages"][0]["role"]}")
115
+
116
+ data["system"] = data["messages"][0]["content"]
117
+ data["messages"] = data["messages"][1:]
118
+
119
+ # Ensure first message is user
120
+ if data["messages"][0]["role"] != "user":
121
+ data["messages"] = [{"role": "user", "content": DUMMY_FIRST_USER_MESSAGE}] + data["messages"]
122
+
123
+ # Handle alternating messages
124
+ data["messages"] = merge_tool_results_into_user_messages(data["messages"])
125
+
126
+ # Prefix fill
127
+ # https://docs.anthropic.com/en/api/messages#body-messages
128
+ # NOTE: cannot prefill with tools for opus:
129
+ # Your API request included an `assistant` message in the final position, which would pre-fill the `assistant` response. When using tools with "claude-3-opus-20240229"
130
+ if prefix_fill and not self.llm_config.put_inner_thoughts_in_kwargs and "opus" not in data["model"]:
131
+ data["messages"].append(
132
+ # Start the thinking process for the assistant
133
+ {"role": "assistant", "content": f"<{inner_thoughts_xml_tag}>"},
134
+ )
135
+
136
+ return data
137
+
138
+ def handle_llm_error(self, e: Exception) -> Exception:
139
+ if isinstance(e, anthropic.APIConnectionError):
140
+ logger.warning(f"[Anthropic] API connection error: {e.__cause__}")
141
+ return LLMConnectionError(
142
+ message=f"Failed to connect to Anthropic: {str(e)}",
143
+ code=ErrorCode.INTERNAL_SERVER_ERROR,
144
+ details={"cause": str(e.__cause__) if e.__cause__ else None},
145
+ )
146
+
147
+ if isinstance(e, anthropic.RateLimitError):
148
+ logger.warning("[Anthropic] Rate limited (429). Consider backoff.")
149
+ return LLMRateLimitError(
150
+ message=f"Rate limited by Anthropic: {str(e)}",
151
+ code=ErrorCode.RATE_LIMIT_EXCEEDED,
152
+ )
153
+
154
+ if isinstance(e, anthropic.BadRequestError):
155
+ logger.warning(f"[Anthropic] Bad request: {str(e)}")
156
+ return LLMBadRequestError(
157
+ message=f"Bad request to Anthropic: {str(e)}",
158
+ code=ErrorCode.INTERNAL_SERVER_ERROR,
159
+ )
160
+
161
+ if isinstance(e, anthropic.AuthenticationError):
162
+ logger.warning(f"[Anthropic] Authentication error: {str(e)}")
163
+ return LLMAuthenticationError(
164
+ message=f"Authentication failed with Anthropic: {str(e)}",
165
+ code=ErrorCode.INTERNAL_SERVER_ERROR,
166
+ )
167
+
168
+ if isinstance(e, anthropic.PermissionDeniedError):
169
+ logger.warning(f"[Anthropic] Permission denied: {str(e)}")
170
+ return LLMPermissionDeniedError(
171
+ message=f"Permission denied by Anthropic: {str(e)}",
172
+ code=ErrorCode.INTERNAL_SERVER_ERROR,
173
+ )
174
+
175
+ if isinstance(e, anthropic.NotFoundError):
176
+ logger.warning(f"[Anthropic] Resource not found: {str(e)}")
177
+ return LLMNotFoundError(
178
+ message=f"Resource not found in Anthropic: {str(e)}",
179
+ code=ErrorCode.INTERNAL_SERVER_ERROR,
180
+ )
181
+
182
+ if isinstance(e, anthropic.UnprocessableEntityError):
183
+ logger.warning(f"[Anthropic] Unprocessable entity: {str(e)}")
184
+ return LLMUnprocessableEntityError(
185
+ message=f"Invalid request content for Anthropic: {str(e)}",
186
+ code=ErrorCode.INTERNAL_SERVER_ERROR,
187
+ )
188
+
189
+ if isinstance(e, anthropic.APIStatusError):
190
+ logger.warning(f"[Anthropic] API status error: {str(e)}")
191
+ return LLMServerError(
192
+ message=f"Anthropic API error: {str(e)}",
193
+ code=ErrorCode.INTERNAL_SERVER_ERROR,
194
+ details={
195
+ "status_code": e.status_code if hasattr(e, "status_code") else None,
196
+ "response": str(e.response) if hasattr(e, "response") else None,
197
+ },
198
+ )
199
+
200
+ return super().handle_llm_error(e)
103
201
 
104
202
  def convert_response_to_chat_completion(
105
203
  self,
@@ -208,118 +306,6 @@ class AnthropicClient(LLMClientBase):
208
306
  return chat_completion_response
209
307
 
210
308
 
211
- def _prepare_anthropic_request(
212
- data: ChatCompletionRequest,
213
- inner_thoughts_xml_tag: Optional[str] = "thinking",
214
- # if true, prefix fill the generation with the thinking tag
215
- prefix_fill: bool = True,
216
- # if true, put COT inside the tool calls instead of inside the content
217
- put_inner_thoughts_in_kwargs: bool = False,
218
- bedrock: bool = False,
219
- # extended thinking related fields
220
- # https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking
221
- extended_thinking: bool = False,
222
- max_reasoning_tokens: Optional[int] = None,
223
- ) -> dict:
224
- """Prepare the request data for Anthropic API format."""
225
- if extended_thinking:
226
- assert (
227
- max_reasoning_tokens is not None and max_reasoning_tokens < data.max_tokens
228
- ), "max tokens must be greater than thinking budget"
229
- assert not put_inner_thoughts_in_kwargs, "extended thinking not compatible with put_inner_thoughts_in_kwargs"
230
- # assert not prefix_fill, "extended thinking not compatible with prefix_fill"
231
- # Silently disable prefix_fill for now
232
- prefix_fill = False
233
-
234
- # if needed, put inner thoughts as a kwarg for all tools
235
- if data.tools and put_inner_thoughts_in_kwargs:
236
- functions = add_inner_thoughts_to_functions(
237
- functions=[t.function.model_dump() for t in data.tools],
238
- inner_thoughts_key=INNER_THOUGHTS_KWARG,
239
- inner_thoughts_description=INNER_THOUGHTS_KWARG_DESCRIPTION,
240
- )
241
- data.tools = [Tool(function=f) for f in functions]
242
-
243
- # convert the tools to Anthropic's payload format
244
- anthropic_tools = None if data.tools is None else convert_tools_to_anthropic_format(data.tools)
245
-
246
- # pydantic -> dict
247
- data = data.model_dump(exclude_none=True)
248
-
249
- if extended_thinking:
250
- data["thinking"] = {
251
- "type": "enabled",
252
- "budget_tokens": max_reasoning_tokens,
253
- }
254
- # `temperature` may only be set to 1 when thinking is enabled. Please consult our documentation at https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#important-considerations-when-using-extended-thinking'
255
- data["temperature"] = 1.0
256
-
257
- if "functions" in data:
258
- raise ValueError(f"'functions' unexpected in Anthropic API payload")
259
-
260
- # Handle tools
261
- if "tools" in data and data["tools"] is None:
262
- data.pop("tools")
263
- data.pop("tool_choice", None)
264
- elif anthropic_tools is not None:
265
- # TODO eventually enable parallel tool use
266
- data["tools"] = anthropic_tools
267
-
268
- # Move 'system' to the top level
269
- assert data["messages"][0]["role"] == "system", f"Expected 'system' role in messages[0]:\n{data['messages'][0]}"
270
- data["system"] = data["messages"][0]["content"]
271
- data["messages"] = data["messages"][1:]
272
-
273
- # Process messages
274
- for message in data["messages"]:
275
- if "content" not in message:
276
- message["content"] = None
277
-
278
- # Convert to Anthropic format
279
- msg_objs = [
280
- PydanticMessage.dict_to_message(
281
- user_id=None,
282
- agent_id=None,
283
- openai_message_dict=m,
284
- )
285
- for m in data["messages"]
286
- ]
287
- data["messages"] = [
288
- m.to_anthropic_dict(
289
- inner_thoughts_xml_tag=inner_thoughts_xml_tag,
290
- put_inner_thoughts_in_kwargs=put_inner_thoughts_in_kwargs,
291
- )
292
- for m in msg_objs
293
- ]
294
-
295
- # Ensure first message is user
296
- if data["messages"][0]["role"] != "user":
297
- data["messages"] = [{"role": "user", "content": DUMMY_FIRST_USER_MESSAGE}] + data["messages"]
298
-
299
- # Handle alternating messages
300
- data["messages"] = merge_tool_results_into_user_messages(data["messages"])
301
-
302
- # Handle prefix fill (not compatible with inner-thouguhts-in-kwargs)
303
- # https://docs.anthropic.com/en/api/messages#body-messages
304
- # NOTE: cannot prefill with tools for opus:
305
- # Your API request included an `assistant` message in the final position, which would pre-fill the `assistant` response. When using tools with "claude-3-opus-20240229"
306
- if prefix_fill and not put_inner_thoughts_in_kwargs and "opus" not in data["model"]:
307
- if not bedrock: # not support for bedrock
308
- data["messages"].append(
309
- # Start the thinking process for the assistant
310
- {"role": "assistant", "content": f"<{inner_thoughts_xml_tag}>"},
311
- )
312
-
313
- # Validate max_tokens
314
- assert "max_tokens" in data, data
315
-
316
- # Remove OpenAI-specific fields
317
- for field in ["frequency_penalty", "logprobs", "n", "top_p", "presence_penalty", "user", "stream"]:
318
- data.pop(field, None)
319
-
320
- return data
321
-
322
-
323
309
  def convert_tools_to_anthropic_format(tools: List[Tool]) -> List[dict]:
324
310
  """See: https://docs.anthropic.com/claude/docs/tool-use
325
311
 
@@ -1,6 +1,8 @@
1
1
  import uuid
2
2
  from typing import List, Optional, Tuple
3
3
 
4
+ import requests
5
+
4
6
  from letta.constants import NON_USER_MSG_PREFIX
5
7
  from letta.helpers.datetime_helpers import get_utc_time
6
8
  from letta.helpers.json_helpers import json_dumps
@@ -21,7 +23,13 @@ class GoogleAIClient(LLMClientBase):
21
23
  """
22
24
  Performs underlying request to llm and returns raw response.
23
25
  """
24
- url, headers = self.get_gemini_endpoint_and_headers(generate_content=True)
26
+ url, headers = get_gemini_endpoint_and_headers(
27
+ base_url=str(self.llm_config.model_endpoint),
28
+ model=self.llm_config.model,
29
+ api_key=str(model_settings.gemini_api_key),
30
+ key_in_header=True,
31
+ generate_content=True,
32
+ )
25
33
  return make_post_request(url, headers, request_data)
26
34
 
27
35
  def build_request_data(
@@ -208,34 +216,6 @@ class GoogleAIClient(LLMClientBase):
208
216
  except KeyError as e:
209
217
  raise e
210
218
 
211
- def get_gemini_endpoint_and_headers(
212
- self,
213
- key_in_header: bool = True,
214
- generate_content: bool = False,
215
- ) -> Tuple[str, dict]:
216
- """
217
- Dynamically generate the model endpoint and headers.
218
- """
219
-
220
- url = f"{self.llm_config.model_endpoint}/v1beta/models"
221
-
222
- # Add the model
223
- url += f"/{self.llm_config.model}"
224
-
225
- # Add extension for generating content if we're hitting the LM
226
- if generate_content:
227
- url += ":generateContent"
228
-
229
- # Decide if api key should be in header or not
230
- # Two ways to pass the key: https://ai.google.dev/tutorials/setup
231
- if key_in_header:
232
- headers = {"Content-Type": "application/json", "x-goog-api-key": model_settings.gemini_api_key}
233
- else:
234
- url += f"?key={model_settings.gemini_api_key}"
235
- headers = {"Content-Type": "application/json"}
236
-
237
- return url, headers
238
-
239
219
  def convert_tools_to_google_ai_format(self, tools: List[Tool]) -> List[dict]:
240
220
  """
241
221
  OpenAI style:
@@ -330,3 +310,106 @@ class GoogleAIClient(LLMClientBase):
330
310
  messages_with_padding.append(dummy_yield_message)
331
311
 
332
312
  return messages_with_padding
313
+
314
+
315
+ def get_gemini_endpoint_and_headers(
316
+ base_url: str, model: Optional[str], api_key: str, key_in_header: bool = True, generate_content: bool = False
317
+ ) -> Tuple[str, dict]:
318
+ """
319
+ Dynamically generate the model endpoint and headers.
320
+ """
321
+ url = f"{base_url}/v1beta/models"
322
+
323
+ # Add the model
324
+ if model is not None:
325
+ url += f"/{model}"
326
+
327
+ # Add extension for generating content if we're hitting the LM
328
+ if generate_content:
329
+ url += ":generateContent"
330
+
331
+ # Decide if api key should be in header or not
332
+ # Two ways to pass the key: https://ai.google.dev/tutorials/setup
333
+ if key_in_header:
334
+ headers = {"Content-Type": "application/json", "x-goog-api-key": api_key}
335
+ else:
336
+ url += f"?key={api_key}"
337
+ headers = {"Content-Type": "application/json"}
338
+
339
+ return url, headers
340
+
341
+
342
+ def google_ai_get_model_list(base_url: str, api_key: str, key_in_header: bool = True) -> List[dict]:
343
+ from letta.utils import printd
344
+
345
+ url, headers = get_gemini_endpoint_and_headers(base_url, None, api_key, key_in_header)
346
+
347
+ try:
348
+ response = requests.get(url, headers=headers)
349
+ response.raise_for_status() # Raises HTTPError for 4XX/5XX status
350
+ response = response.json() # convert to dict from string
351
+
352
+ # Grab the models out
353
+ model_list = response["models"]
354
+ return model_list
355
+
356
+ except requests.exceptions.HTTPError as http_err:
357
+ # Handle HTTP errors (e.g., response 4XX, 5XX)
358
+ printd(f"Got HTTPError, exception={http_err}")
359
+ # Print the HTTP status code
360
+ print(f"HTTP Error: {http_err.response.status_code}")
361
+ # Print the response content (error message from server)
362
+ print(f"Message: {http_err.response.text}")
363
+ raise http_err
364
+
365
+ except requests.exceptions.RequestException as req_err:
366
+ # Handle other requests-related errors (e.g., connection error)
367
+ printd(f"Got RequestException, exception={req_err}")
368
+ raise req_err
369
+
370
+ except Exception as e:
371
+ # Handle other potential errors
372
+ printd(f"Got unknown Exception, exception={e}")
373
+ raise e
374
+
375
+
376
+ def google_ai_get_model_details(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> List[dict]:
377
+ from letta.utils import printd
378
+
379
+ url, headers = get_gemini_endpoint_and_headers(base_url, model, api_key, key_in_header)
380
+
381
+ try:
382
+ response = requests.get(url, headers=headers)
383
+ printd(f"response = {response}")
384
+ response.raise_for_status() # Raises HTTPError for 4XX/5XX status
385
+ response = response.json() # convert to dict from string
386
+ printd(f"response.json = {response}")
387
+
388
+ # Grab the models out
389
+ return response
390
+
391
+ except requests.exceptions.HTTPError as http_err:
392
+ # Handle HTTP errors (e.g., response 4XX, 5XX)
393
+ printd(f"Got HTTPError, exception={http_err}")
394
+ # Print the HTTP status code
395
+ print(f"HTTP Error: {http_err.response.status_code}")
396
+ # Print the response content (error message from server)
397
+ print(f"Message: {http_err.response.text}")
398
+ raise http_err
399
+
400
+ except requests.exceptions.RequestException as req_err:
401
+ # Handle other requests-related errors (e.g., connection error)
402
+ printd(f"Got RequestException, exception={req_err}")
403
+ raise req_err
404
+
405
+ except Exception as e:
406
+ # Handle other potential errors
407
+ printd(f"Got unknown Exception, exception={e}")
408
+ raise e
409
+
410
+
411
+ def google_ai_get_model_context_window(base_url: str, api_key: str, model: str, key_in_header: bool = True) -> int:
412
+ model_details = google_ai_get_model_details(base_url=base_url, api_key=api_key, model=model, key_in_header=key_in_header)
413
+ # TODO should this be:
414
+ # return model_details["inputTokenLimit"] + model_details["outputTokenLimit"]
415
+ return int(model_details["inputTokenLimit"])
@@ -140,6 +140,7 @@ def create(
140
140
  stream_interface: Optional[Union[AgentRefreshStreamingInterface, AgentChunkStreamingInterface]] = None,
141
141
  model_settings: Optional[dict] = None, # TODO: eventually pass from server
142
142
  put_inner_thoughts_first: bool = True,
143
+ name: Optional[str] = None,
143
144
  ) -> ChatCompletionResponse:
144
145
  """Return response to chat completion with backoff"""
145
146
  from letta.utils import printd
@@ -206,6 +207,7 @@ def create(
206
207
  api_key=api_key,
207
208
  chat_completion_request=data,
208
209
  stream_interface=stream_interface,
210
+ name=name,
209
211
  )
210
212
  else: # Client did not request token streaming (expect a blocking backend response)
211
213
  data.stream = False
@@ -255,6 +257,7 @@ def create(
255
257
  api_key=api_key,
256
258
  chat_completion_request=data,
257
259
  stream_interface=stream_interface,
260
+ name=name,
258
261
  )
259
262
  else: # Client did not request token streaming (expect a blocking backend response)
260
263
  data.stream = False
@@ -359,6 +362,7 @@ def create(
359
362
  stream_interface=stream_interface,
360
363
  extended_thinking=llm_config.enable_reasoner,
361
364
  max_reasoning_tokens=llm_config.max_reasoning_tokens,
365
+ name=name,
362
366
  )
363
367
 
364
368
  else:
@@ -531,6 +535,7 @@ def create(
531
535
  api_key=model_settings.deepseek_api_key,
532
536
  chat_completion_request=data,
533
537
  stream_interface=stream_interface,
538
+ name=name,
534
539
  )
535
540
  else: # Client did not request token streaming (expect a blocking backend response)
536
541
  data.stream = False
@@ -9,21 +9,17 @@ class LLMClient:
9
9
 
10
10
  @staticmethod
11
11
  def create(
12
- agent_id: str,
13
12
  llm_config: LLMConfig,
14
13
  put_inner_thoughts_first: bool = True,
15
- actor_id: Optional[str] = None,
16
14
  ) -> Optional[LLMClientBase]:
17
15
  """
18
16
  Create an LLM client based on the model endpoint type.
19
17
 
20
18
  Args:
21
- agent_id: Unique identifier for the agent
22
19
  llm_config: Configuration for the LLM model
23
20
  put_inner_thoughts_first: Whether to put inner thoughts first in the response
24
21
  use_structured_output: Whether to use structured output
25
22
  use_tool_naming: Whether to use tool naming
26
- actor_id: Optional actor identifier
27
23
 
28
24
  Returns:
29
25
  An instance of LLMClientBase subclass
@@ -36,19 +32,22 @@ class LLMClient:
36
32
  from letta.llm_api.google_ai_client import GoogleAIClient
37
33
 
38
34
  return GoogleAIClient(
39
- agent_id=agent_id, llm_config=llm_config, put_inner_thoughts_first=put_inner_thoughts_first, actor_id=actor_id
35
+ llm_config=llm_config,
36
+ put_inner_thoughts_first=put_inner_thoughts_first,
40
37
  )
41
38
  case "google_vertex":
42
39
  from letta.llm_api.google_vertex_client import GoogleVertexClient
43
40
 
44
41
  return GoogleVertexClient(
45
- agent_id=agent_id, llm_config=llm_config, put_inner_thoughts_first=put_inner_thoughts_first, actor_id=actor_id
42
+ llm_config=llm_config,
43
+ put_inner_thoughts_first=put_inner_thoughts_first,
46
44
  )
47
45
  case "anthropic":
48
46
  from letta.llm_api.anthropic_client import AnthropicClient
49
47
 
50
48
  return AnthropicClient(
51
- agent_id=agent_id, llm_config=llm_config, put_inner_thoughts_first=put_inner_thoughts_first, actor_id=actor_id
49
+ llm_config=llm_config,
50
+ put_inner_thoughts_first=put_inner_thoughts_first,
52
51
  )
53
52
  case _:
54
53
  return None