letta-nightly 0.7.10.dev20250507104304__py3-none-any.whl → 0.7.12.dev20250508044425__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +8 -4
  3. letta/agents/letta_agent.py +3 -5
  4. letta/agents/letta_agent_batch.py +2 -4
  5. letta/client/client.py +2 -2
  6. letta/functions/async_composio_toolset.py +106 -0
  7. letta/functions/composio_helpers.py +20 -24
  8. letta/llm_api/anthropic.py +31 -6
  9. letta/llm_api/anthropic_client.py +10 -8
  10. letta/llm_api/google_ai_client.py +32 -10
  11. letta/llm_api/google_constants.py +2 -0
  12. letta/llm_api/google_vertex_client.py +107 -27
  13. letta/llm_api/llm_api_tools.py +9 -3
  14. letta/llm_api/llm_client.py +9 -11
  15. letta/llm_api/llm_client_base.py +6 -5
  16. letta/llm_api/openai.py +16 -0
  17. letta/llm_api/openai_client.py +6 -6
  18. letta/local_llm/constants.py +1 -0
  19. letta/memory.py +8 -5
  20. letta/orm/provider.py +1 -0
  21. letta/schemas/enums.py +6 -0
  22. letta/schemas/llm_config.py +2 -0
  23. letta/schemas/message.py +3 -3
  24. letta/schemas/providers.py +58 -2
  25. letta/server/rest_api/routers/v1/agents.py +10 -5
  26. letta/server/rest_api/routers/v1/llms.py +16 -6
  27. letta/server/rest_api/routers/v1/providers.py +24 -4
  28. letta/server/rest_api/routers/v1/sources.py +1 -0
  29. letta/server/server.py +58 -24
  30. letta/services/provider_manager.py +26 -8
  31. letta/settings.py +2 -0
  32. {letta_nightly-0.7.10.dev20250507104304.dist-info → letta_nightly-0.7.12.dev20250508044425.dist-info}/METADATA +2 -2
  33. {letta_nightly-0.7.10.dev20250507104304.dist-info → letta_nightly-0.7.12.dev20250508044425.dist-info}/RECORD +36 -35
  34. {letta_nightly-0.7.10.dev20250507104304.dist-info → letta_nightly-0.7.12.dev20250508044425.dist-info}/LICENSE +0 -0
  35. {letta_nightly-0.7.10.dev20250507104304.dist-info → letta_nightly-0.7.12.dev20250508044425.dist-info}/WHEEL +0 -0
  36. {letta_nightly-0.7.10.dev20250507104304.dist-info → letta_nightly-0.7.12.dev20250508044425.dist-info}/entry_points.txt +0 -0
letta/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.7.10"
1
+ __version__ = "0.7.12"
2
2
 
3
3
  # import clients
4
4
  from letta.client.client import LocalClient, RESTClient, create_client
letta/agent.py CHANGED
@@ -331,10 +331,9 @@ class Agent(BaseAgent):
331
331
  log_telemetry(self.logger, "_get_ai_reply create start")
332
332
  # New LLM client flow
333
333
  llm_client = LLMClient.create(
334
- provider_name=self.agent_state.llm_config.provider_name,
335
334
  provider_type=self.agent_state.llm_config.model_endpoint_type,
336
335
  put_inner_thoughts_first=put_inner_thoughts_first,
337
- actor_id=self.user.id,
336
+ actor=self.user,
338
337
  )
339
338
 
340
339
  if llm_client and not stream:
@@ -943,7 +942,10 @@ class Agent(BaseAgent):
943
942
  model_endpoint=self.agent_state.llm_config.model_endpoint,
944
943
  context_window_limit=self.agent_state.llm_config.context_window,
945
944
  usage=response.usage,
946
- provider_id=self.provider_manager.get_provider_id_from_name(self.agent_state.llm_config.provider_name),
945
+ provider_id=self.provider_manager.get_provider_id_from_name(
946
+ self.agent_state.llm_config.provider_name,
947
+ actor=self.user,
948
+ ),
947
949
  job_id=job_id,
948
950
  )
949
951
  for message in all_new_messages:
@@ -1087,7 +1089,9 @@ class Agent(BaseAgent):
1087
1089
  LLM_MAX_TOKENS[self.model] if (self.model is not None and self.model in LLM_MAX_TOKENS) else LLM_MAX_TOKENS["DEFAULT"]
1088
1090
  )
1089
1091
 
1090
- summary = summarize_messages(agent_state=self.agent_state, message_sequence_to_summarize=message_sequence_to_summarize)
1092
+ summary = summarize_messages(
1093
+ agent_state=self.agent_state, message_sequence_to_summarize=message_sequence_to_summarize, actor=self.user
1094
+ )
1091
1095
  logger.info(f"Got summary: {summary}")
1092
1096
 
1093
1097
  # Metadata that's useful for the agent to see
@@ -75,10 +75,9 @@ class LettaAgent(BaseAgent):
75
75
  )
76
76
  tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
77
77
  llm_client = LLMClient.create(
78
- provider_name=agent_state.llm_config.provider_name,
79
78
  provider_type=agent_state.llm_config.model_endpoint_type,
80
79
  put_inner_thoughts_first=True,
81
- actor_id=self.actor.id,
80
+ actor=self.actor,
82
81
  )
83
82
  for _ in range(max_steps):
84
83
  response = await self._get_ai_reply(
@@ -120,10 +119,9 @@ class LettaAgent(BaseAgent):
120
119
  )
121
120
  tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
122
121
  llm_client = LLMClient.create(
123
- provider_name=agent_state.llm_config.provider_name,
124
122
  provider_type=agent_state.llm_config.model_endpoint_type,
125
123
  put_inner_thoughts_first=True,
126
- actor_id=self.actor.id,
124
+ actor=self.actor,
127
125
  )
128
126
 
129
127
  for _ in range(max_steps):
@@ -350,7 +348,7 @@ class LettaAgent(BaseAgent):
350
348
  results = await self._send_message_to_agents_matching_tags(**tool_args)
351
349
  log_event(name="finish_send_message_to_agents_matching_tags", attributes=tool_args)
352
350
  return json.dumps(results), True
353
- elif target_tool.type == ToolType.EXTERNAL_COMPOSIO:
351
+ elif target_tool.tool_type == ToolType.EXTERNAL_COMPOSIO:
354
352
  log_event(name=f"start_composio_{tool_name}_execution", attributes=tool_args)
355
353
  log_event(name=f"finish_compsio_{tool_name}_execution", attributes=tool_args)
356
354
  return tool_execution_result.func_return, True
@@ -172,10 +172,9 @@ class LettaAgentBatch:
172
172
 
173
173
  log_event(name="init_llm_client")
174
174
  llm_client = LLMClient.create(
175
- provider_name=agent_states[0].llm_config.provider_name,
176
175
  provider_type=agent_states[0].llm_config.model_endpoint_type,
177
176
  put_inner_thoughts_first=True,
178
- actor_id=self.actor.id,
177
+ actor=self.actor,
179
178
  )
180
179
  agent_llm_config_mapping = {s.id: s.llm_config for s in agent_states}
181
180
 
@@ -284,10 +283,9 @@ class LettaAgentBatch:
284
283
 
285
284
  # translate provider‑specific response → OpenAI‑style tool call (unchanged)
286
285
  llm_client = LLMClient.create(
287
- provider_name=item.llm_config.provider_name,
288
286
  provider_type=item.llm_config.model_endpoint_type,
289
287
  put_inner_thoughts_first=True,
290
- actor_id=self.actor.id,
288
+ actor=self.actor,
291
289
  )
292
290
  tool_call = (
293
291
  llm_client.convert_response_to_chat_completion(
letta/client/client.py CHANGED
@@ -3455,7 +3455,7 @@ class LocalClient(AbstractClient):
3455
3455
  Returns:
3456
3456
  configs (List[LLMConfig]): List of LLM configurations
3457
3457
  """
3458
- return self.server.list_llm_models()
3458
+ return self.server.list_llm_models(actor=self.user)
3459
3459
 
3460
3460
  def list_embedding_configs(self) -> List[EmbeddingConfig]:
3461
3461
  """
@@ -3464,7 +3464,7 @@ class LocalClient(AbstractClient):
3464
3464
  Returns:
3465
3465
  configs (List[EmbeddingConfig]): List of embedding configurations
3466
3466
  """
3467
- return self.server.list_embedding_models()
3467
+ return self.server.list_embedding_models(actor=self.user)
3468
3468
 
3469
3469
  def create_org(self, name: Optional[str] = None) -> Organization:
3470
3470
  return self.server.organization_manager.create_organization(pydantic_org=Organization(name=name))
@@ -0,0 +1,106 @@
1
+ import json
2
+ from typing import Any
3
+
4
+ import aiohttp
5
+ from composio import ComposioToolSet as BaseComposioToolSet
6
+ from composio.exceptions import (
7
+ ApiKeyNotProvidedError,
8
+ ComposioSDKError,
9
+ ConnectedAccountNotFoundError,
10
+ EnumMetadataNotFound,
11
+ EnumStringNotFound,
12
+ )
13
+
14
+
15
+ class AsyncComposioToolSet(BaseComposioToolSet, runtime="letta"):
16
+ """
17
+ Async version of ComposioToolSet client for interacting with Composio API
18
+ Used to asynchronously hit the execute action endpoint
19
+
20
+ https://docs.composio.dev/api-reference/api-reference/v3/tools/post-api-v-3-tools-execute-action
21
+ """
22
+
23
+ def __init__(self, api_key: str, entity_id: str, lock: bool = True):
24
+ """
25
+ Initialize the AsyncComposioToolSet client
26
+
27
+ Args:
28
+ api_key (str): Your Composio API key
29
+ entity_id (str): Your Composio entity ID
30
+ lock (bool): Whether to use locking (default: True)
31
+ """
32
+ super().__init__(api_key=api_key, entity_id=entity_id, lock=lock)
33
+
34
+ self.headers = {
35
+ "Content-Type": "application/json",
36
+ "X-API-Key": self._api_key,
37
+ }
38
+
39
+ async def execute_action(
40
+ self,
41
+ action: str,
42
+ params: dict[str, Any] = {},
43
+ ) -> dict[str, Any]:
44
+ """
45
+ Execute an action asynchronously using the Composio API
46
+
47
+ Args:
48
+ action (str): The name of the action to execute
49
+ params (dict[str, Any], optional): Parameters for the action
50
+
51
+ Returns:
52
+ dict[str, Any]: The API response
53
+
54
+ Raises:
55
+ ApiKeyNotProvidedError: if the API key is not provided
56
+ ComposioSDKError: if a general Composio SDK error occurs
57
+ ConnectedAccountNotFoundError: if the connected account is not found
58
+ EnumMetadataNotFound: if enum metadata is not found
59
+ EnumStringNotFound: if enum string is not found
60
+ aiohttp.ClientError: if a network-related error occurs
61
+ ValueError: if an error with the parameters or response occurs
62
+ """
63
+ API_VERSION = "v3"
64
+ endpoint = f"{self._base_url}/{API_VERSION}/tools/execute/{action}"
65
+
66
+ json_payload = {
67
+ "entity_id": self.entity_id,
68
+ "arguments": params or {},
69
+ }
70
+
71
+ try:
72
+ async with aiohttp.ClientSession() as session:
73
+ async with session.post(endpoint, headers=self.headers, json=json_payload) as response:
74
+ print(response, response.status, response.reason, response.content)
75
+ if response.status == 200:
76
+ return await response.json()
77
+ else:
78
+ error_text = await response.text()
79
+ try:
80
+ error_json = json.loads(error_text)
81
+ error_message = error_json.get("message", error_text)
82
+ error_code = error_json.get("code")
83
+
84
+ # Handle specific error codes from Composio API
85
+ if error_code == 10401 or "API_KEY_NOT_FOUND" in error_message:
86
+ raise ApiKeyNotProvidedError()
87
+ if "connected account not found" in error_message.lower():
88
+ raise ConnectedAccountNotFoundError(f"Connected account not found: {error_message}")
89
+ if "enum metadata not found" in error_message.lower():
90
+ raise EnumMetadataNotFound(f"Enum metadata not found: {error_message}")
91
+ if "enum string not found" in error_message.lower():
92
+ raise EnumStringNotFound(f"Enum string not found: {error_message}")
93
+ except json.JSONDecodeError:
94
+ error_message = error_text
95
+
96
+ # If no specific error was identified, raise a general error
97
+ raise ValueError(f"API request failed with status {response.status}: {error_message}")
98
+ except aiohttp.ClientError as e:
99
+ # Wrap network errors in ComposioSDKError
100
+ raise ComposioSDKError(f"Network error when calling Composio API: {str(e)}")
101
+ except ValueError:
102
+ # Re-raise ValueError (which could be our custom error message or a JSON parsing error)
103
+ raise
104
+ except Exception as e:
105
+ # Catch any other exceptions and wrap them in ComposioSDKError
106
+ raise ComposioSDKError(f"Unexpected error when calling Composio API: {str(e)}")
@@ -1,8 +1,6 @@
1
- import asyncio
2
1
  import os
3
2
  from typing import Any, Optional
4
3
 
5
- from composio import ComposioToolSet
6
4
  from composio.constants import DEFAULT_ENTITY_ID
7
5
  from composio.exceptions import (
8
6
  ApiKeyNotProvidedError,
@@ -13,6 +11,8 @@ from composio.exceptions import (
13
11
  )
14
12
 
15
13
  from letta.constants import COMPOSIO_ENTITY_ENV_VAR_KEY
14
+ from letta.functions.async_composio_toolset import AsyncComposioToolSet
15
+ from letta.utils import run_async_task
16
16
 
17
17
 
18
18
  # TODO: This is kind of hacky, as this is used to search up the action later on composio's side
@@ -61,38 +61,34 @@ def {func_name}(**kwargs):
61
61
  async def execute_composio_action_async(
62
62
  action_name: str, args: dict, api_key: Optional[str] = None, entity_id: Optional[str] = None
63
63
  ) -> tuple[str, str]:
64
- try:
65
- loop = asyncio.get_running_loop()
66
- return await loop.run_in_executor(None, execute_composio_action, action_name, args, api_key, entity_id)
67
- except Exception as e:
68
- raise RuntimeError(f"Error in execute_composio_action_async: {e}") from e
69
-
70
-
71
- def execute_composio_action(action_name: str, args: dict, api_key: Optional[str] = None, entity_id: Optional[str] = None) -> Any:
72
64
  entity_id = entity_id or os.getenv(COMPOSIO_ENTITY_ENV_VAR_KEY, DEFAULT_ENTITY_ID)
65
+ composio_toolset = AsyncComposioToolSet(api_key=api_key, entity_id=entity_id, lock=False)
73
66
  try:
74
- composio_toolset = ComposioToolSet(api_key=api_key, entity_id=entity_id, lock=False)
75
- response = composio_toolset.execute_action(action=action_name, params=args)
76
- except ApiKeyNotProvidedError:
77
- raise RuntimeError(
78
- f"Composio API key is missing for action '{action_name}'. "
79
- "Please set the sandbox environment variables either through the ADE or the API."
80
- )
81
- except ConnectedAccountNotFoundError:
82
- raise RuntimeError(f"No connected account was found for action '{action_name}'. " "Please link an account and try again.")
83
- except EnumStringNotFound as e:
84
- raise RuntimeError(f"Invalid value provided for action '{action_name}': " + str(e) + ". Please check the action parameters.")
67
+ response = await composio_toolset.execute_action(action=action_name, params=args)
68
+ except ApiKeyNotProvidedError as e:
69
+ raise RuntimeError(f"API key not provided or invalid for Composio action '{action_name}': {str(e)}")
70
+ except ConnectedAccountNotFoundError as e:
71
+ raise RuntimeError(f"Connected account not found for Composio action '{action_name}': {str(e)}")
85
72
  except EnumMetadataNotFound as e:
86
- raise RuntimeError(f"Invalid value provided for action '{action_name}': " + str(e) + ". Please check the action parameters.")
73
+ raise RuntimeError(f"Enum metadata not found for Composio action '{action_name}': {str(e)}")
74
+ except EnumStringNotFound as e:
75
+ raise RuntimeError(f"Enum string not found for Composio action '{action_name}': {str(e)}")
87
76
  except ComposioSDKError as e:
88
- raise RuntimeError(f"An unexpected error occurred in Composio SDK while executing action '{action_name}': " + str(e))
77
+ raise RuntimeError(f"Composio SDK error while executing action '{action_name}': {str(e)}")
78
+ except Exception as e:
79
+ print(type(e))
80
+ raise RuntimeError(f"An unexpected error occurred in Composio SDK while executing action '{action_name}': {str(e)}")
89
81
 
90
82
  if "error" in response and response["error"]:
91
- raise RuntimeError(f"Error while executing action '{action_name}': " + str(response["error"]))
83
+ raise RuntimeError(f"Error while executing action '{action_name}': {str(response['error'])}")
92
84
 
93
85
  return response.get("data")
94
86
 
95
87
 
88
+ def execute_composio_action(action_name: str, args: dict, api_key: Optional[str] = None, entity_id: Optional[str] = None) -> Any:
89
+ return run_async_task(execute_composio_action_async(action_name, args, api_key, entity_id))
90
+
91
+
96
92
  def _assert_code_gen_compilable(code_str):
97
93
  try:
98
94
  compile(code_str, "<string>", "exec")
@@ -19,14 +19,14 @@ from anthropic.types.beta import (
19
19
  BetaToolUseBlock,
20
20
  )
21
21
 
22
- from letta.errors import BedrockError, BedrockPermissionError
22
+ from letta.errors import BedrockError, BedrockPermissionError, ErrorCode, LLMAuthenticationError, LLMError
23
23
  from letta.helpers.datetime_helpers import get_utc_time_int, timestamp_to_datetime
24
24
  from letta.llm_api.aws_bedrock import get_bedrock_client
25
25
  from letta.llm_api.helpers import add_inner_thoughts_to_functions
26
26
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
27
27
  from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
28
28
  from letta.log import get_logger
29
- from letta.schemas.enums import ProviderType
29
+ from letta.schemas.enums import ProviderCategory
30
30
  from letta.schemas.message import Message as _Message
31
31
  from letta.schemas.message import MessageRole as _MessageRole
32
32
  from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
@@ -42,6 +42,7 @@ from letta.schemas.openai.chat_completion_response import Message
42
42
  from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
43
43
  from letta.schemas.openai.chat_completion_response import MessageDelta, ToolCall, ToolCallDelta, UsageStatistics
44
44
  from letta.services.provider_manager import ProviderManager
45
+ from letta.services.user_manager import UserManager
45
46
  from letta.settings import model_settings
46
47
  from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
47
48
  from letta.tracing import log_event
@@ -118,6 +119,20 @@ DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."
118
119
  VALID_EVENT_TYPES = {"content_block_stop", "message_stop"}
119
120
 
120
121
 
122
+ def anthropic_check_valid_api_key(api_key: Union[str, None]) -> None:
123
+ if api_key:
124
+ anthropic_client = anthropic.Anthropic(api_key=api_key)
125
+ try:
126
+ # just use a cheap model to count some tokens - as of 5/7/2025 this is faster than fetching the list of models
127
+ anthropic_client.messages.count_tokens(model=MODEL_LIST[-1]["name"], messages=[{"role": "user", "content": "a"}])
128
+ except anthropic.AuthenticationError as e:
129
+ raise LLMAuthenticationError(message=f"Failed to authenticate with Anthropic: {e}", code=ErrorCode.UNAUTHENTICATED)
130
+ except Exception as e:
131
+ raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
132
+ else:
133
+ raise ValueError("No API key provided")
134
+
135
+
121
136
  def antropic_get_model_context_window(url: str, api_key: Union[str, None], model: str) -> int:
122
137
  for model_dict in anthropic_get_model_list(url=url, api_key=api_key):
123
138
  if model_dict["name"] == model:
@@ -744,12 +759,15 @@ def anthropic_chat_completions_request(
744
759
  extended_thinking: bool = False,
745
760
  max_reasoning_tokens: Optional[int] = None,
746
761
  provider_name: Optional[str] = None,
762
+ provider_category: Optional[ProviderCategory] = None,
747
763
  betas: List[str] = ["tools-2024-04-04"],
764
+ user_id: Optional[str] = None,
748
765
  ) -> ChatCompletionResponse:
749
766
  """https://docs.anthropic.com/claude/docs/tool-use"""
750
767
  anthropic_client = None
751
- if provider_name and provider_name != ProviderType.anthropic.value:
752
- api_key = ProviderManager().get_override_key(provider_name)
768
+ if provider_category == ProviderCategory.byok:
769
+ actor = UserManager().get_user_or_default(user_id=user_id)
770
+ api_key = ProviderManager().get_override_key(provider_name, actor=actor)
753
771
  anthropic_client = anthropic.Anthropic(api_key=api_key)
754
772
  elif model_settings.anthropic_api_key:
755
773
  anthropic_client = anthropic.Anthropic()
@@ -803,7 +821,9 @@ def anthropic_chat_completions_request_stream(
803
821
  extended_thinking: bool = False,
804
822
  max_reasoning_tokens: Optional[int] = None,
805
823
  provider_name: Optional[str] = None,
824
+ provider_category: Optional[ProviderCategory] = None,
806
825
  betas: List[str] = ["tools-2024-04-04"],
826
+ user_id: Optional[str] = None,
807
827
  ) -> Generator[ChatCompletionChunkResponse, None, None]:
808
828
  """Stream chat completions from Anthropic API.
809
829
 
@@ -817,8 +837,9 @@ def anthropic_chat_completions_request_stream(
817
837
  extended_thinking=extended_thinking,
818
838
  max_reasoning_tokens=max_reasoning_tokens,
819
839
  )
820
- if provider_name and provider_name != ProviderType.anthropic.value:
821
- api_key = ProviderManager().get_override_key(provider_name)
840
+ if provider_category == ProviderCategory.byok:
841
+ actor = UserManager().get_user_or_default(user_id=user_id)
842
+ api_key = ProviderManager().get_override_key(provider_name, actor=actor)
822
843
  anthropic_client = anthropic.Anthropic(api_key=api_key)
823
844
  elif model_settings.anthropic_api_key:
824
845
  anthropic_client = anthropic.Anthropic()
@@ -867,10 +888,12 @@ def anthropic_chat_completions_process_stream(
867
888
  extended_thinking: bool = False,
868
889
  max_reasoning_tokens: Optional[int] = None,
869
890
  provider_name: Optional[str] = None,
891
+ provider_category: Optional[ProviderCategory] = None,
870
892
  create_message_id: bool = True,
871
893
  create_message_datetime: bool = True,
872
894
  betas: List[str] = ["tools-2024-04-04"],
873
895
  name: Optional[str] = None,
896
+ user_id: Optional[str] = None,
874
897
  ) -> ChatCompletionResponse:
875
898
  """Process a streaming completion response from Anthropic, similar to OpenAI's streaming.
876
899
 
@@ -952,7 +975,9 @@ def anthropic_chat_completions_process_stream(
952
975
  extended_thinking=extended_thinking,
953
976
  max_reasoning_tokens=max_reasoning_tokens,
954
977
  provider_name=provider_name,
978
+ provider_category=provider_category,
955
979
  betas=betas,
980
+ user_id=user_id,
956
981
  )
957
982
  ):
958
983
  assert isinstance(chat_completion_chunk, ChatCompletionChunkResponse), type(chat_completion_chunk)
@@ -27,7 +27,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_in
27
27
  from letta.llm_api.llm_client_base import LLMClientBase
28
28
  from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
29
29
  from letta.log import get_logger
30
- from letta.schemas.enums import ProviderType
30
+ from letta.schemas.enums import ProviderCategory
31
31
  from letta.schemas.llm_config import LLMConfig
32
32
  from letta.schemas.message import Message as PydanticMessage
33
33
  from letta.schemas.openai.chat_completion_request import Tool
@@ -45,18 +45,18 @@ logger = get_logger(__name__)
45
45
  class AnthropicClient(LLMClientBase):
46
46
 
47
47
  def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
48
- client = self._get_anthropic_client(async_client=False)
48
+ client = self._get_anthropic_client(llm_config, async_client=False)
49
49
  response = client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
50
50
  return response.model_dump()
51
51
 
52
52
  async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
53
- client = self._get_anthropic_client(async_client=True)
53
+ client = self._get_anthropic_client(llm_config, async_client=True)
54
54
  response = await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
55
55
  return response.model_dump()
56
56
 
57
57
  @trace_method
58
58
  async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[BetaRawMessageStreamEvent]:
59
- client = self._get_anthropic_client(async_client=True)
59
+ client = self._get_anthropic_client(llm_config, async_client=True)
60
60
  request_data["stream"] = True
61
61
  return await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
62
62
 
@@ -96,7 +96,7 @@ class AnthropicClient(LLMClientBase):
96
96
  for agent_id in agent_messages_mapping
97
97
  }
98
98
 
99
- client = self._get_anthropic_client(async_client=True)
99
+ client = self._get_anthropic_client(list(agent_llm_config_mapping.values())[0], async_client=True)
100
100
 
101
101
  anthropic_requests = [
102
102
  Request(custom_id=agent_id, params=MessageCreateParamsNonStreaming(**params)) for agent_id, params in requests.items()
@@ -112,10 +112,12 @@ class AnthropicClient(LLMClientBase):
112
112
  raise self.handle_llm_error(e)
113
113
 
114
114
  @trace_method
115
- def _get_anthropic_client(self, async_client: bool = False) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
115
+ def _get_anthropic_client(
116
+ self, llm_config: LLMConfig, async_client: bool = False
117
+ ) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
116
118
  override_key = None
117
- if self.provider_name and self.provider_name != ProviderType.anthropic.value:
118
- override_key = ProviderManager().get_override_key(self.provider_name)
119
+ if llm_config.provider_category == ProviderCategory.byok:
120
+ override_key = ProviderManager().get_override_key(llm_config.provider_name, actor=self.actor)
119
121
 
120
122
  if async_client:
121
123
  return anthropic.AsyncAnthropic(api_key=override_key) if override_key else anthropic.AsyncAnthropic()
@@ -3,17 +3,20 @@ import uuid
3
3
  from typing import List, Optional, Tuple
4
4
 
5
5
  import requests
6
+ from google import genai
6
7
  from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, ToolConfig
7
8
 
8
9
  from letta.constants import NON_USER_MSG_PREFIX
10
+ from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
9
11
  from letta.helpers.datetime_helpers import get_utc_time_int
10
12
  from letta.helpers.json_helpers import json_dumps
13
+ from letta.llm_api.google_constants import GOOGLE_MODEL_FOR_API_KEY_CHECK
11
14
  from letta.llm_api.helpers import make_post_request
12
15
  from letta.llm_api.llm_client_base import LLMClientBase
13
16
  from letta.local_llm.json_parser import clean_json_string_extra_backslash
14
17
  from letta.local_llm.utils import count_tokens
15
18
  from letta.log import get_logger
16
- from letta.schemas.enums import ProviderType
19
+ from letta.schemas.enums import ProviderCategory
17
20
  from letta.schemas.llm_config import LLMConfig
18
21
  from letta.schemas.message import Message as PydanticMessage
19
22
  from letta.schemas.openai.chat_completion_request import Tool
@@ -31,10 +34,10 @@ class GoogleAIClient(LLMClientBase):
31
34
  Performs underlying request to llm and returns raw response.
32
35
  """
33
36
  api_key = None
34
- if llm_config.provider_name and llm_config.provider_name != ProviderType.google_ai.value:
37
+ if llm_config.provider_category == ProviderCategory.byok:
35
38
  from letta.services.provider_manager import ProviderManager
36
39
 
37
- api_key = ProviderManager().get_override_key(llm_config.provider_name)
40
+ api_key = ProviderManager().get_override_key(llm_config.provider_name, actor=self.actor)
38
41
 
39
42
  if not api_key:
40
43
  api_key = model_settings.gemini_api_key
@@ -165,10 +168,12 @@ class GoogleAIClient(LLMClientBase):
165
168
 
166
169
  # NOTE: this also involves stripping the inner monologue out of the function
167
170
  if llm_config.put_inner_thoughts_in_kwargs:
168
- from letta.local_llm.constants import INNER_THOUGHTS_KWARG
171
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG_VERTEX
169
172
 
170
- assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}"
171
- inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG)
173
+ assert (
174
+ INNER_THOUGHTS_KWARG_VERTEX in function_args
175
+ ), f"Couldn't find inner thoughts in function args:\n{function_call}"
176
+ inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG_VERTEX)
172
177
  assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
173
178
  else:
174
179
  inner_thoughts = None
@@ -288,7 +293,7 @@ class GoogleAIClient(LLMClientBase):
288
293
  # Per https://ai.google.dev/gemini-api/docs/function-calling?example=meeting#notes_and_limitations
289
294
  # * Only a subset of the OpenAPI schema is supported.
290
295
  # * Supported parameter types in Python are limited.
291
- unsupported_keys = ["default", "exclusiveMaximum", "exclusiveMinimum"]
296
+ unsupported_keys = ["default", "exclusiveMaximum", "exclusiveMinimum", "additionalProperties"]
292
297
  keys_to_remove_at_this_level = [key for key in unsupported_keys if key in schema_part]
293
298
  for key_to_remove in keys_to_remove_at_this_level:
294
299
  logger.warning(f"Removing unsupported keyword '{key_to_remove}' from schema part.")
@@ -380,13 +385,13 @@ class GoogleAIClient(LLMClientBase):
380
385
 
381
386
  # Add inner thoughts
382
387
  if llm_config.put_inner_thoughts_in_kwargs:
383
- from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
388
+ from letta.local_llm.constants import INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_VERTEX
384
389
 
385
- func["parameters"]["properties"][INNER_THOUGHTS_KWARG] = {
390
+ func["parameters"]["properties"][INNER_THOUGHTS_KWARG_VERTEX] = {
386
391
  "type": "string",
387
392
  "description": INNER_THOUGHTS_KWARG_DESCRIPTION,
388
393
  }
389
- func["parameters"]["required"].append(INNER_THOUGHTS_KWARG)
394
+ func["parameters"]["required"].append(INNER_THOUGHTS_KWARG_VERTEX)
390
395
 
391
396
  return [{"functionDeclarations": function_list}]
392
397
 
@@ -441,6 +446,23 @@ def get_gemini_endpoint_and_headers(
441
446
  return url, headers
442
447
 
443
448
 
449
+ def google_ai_check_valid_api_key(api_key: str):
450
+ client = genai.Client(api_key=api_key)
451
+ # use the count token endpoint for a cheap model - as of 5/7/2025 this is slightly faster than fetching the list of models
452
+ try:
453
+ client.models.count_tokens(
454
+ model=GOOGLE_MODEL_FOR_API_KEY_CHECK,
455
+ contents="",
456
+ )
457
+ except genai.errors.ClientError as e:
458
+ # google api returns 400 invalid argument for invalid api key
459
+ if e.code == 400:
460
+ raise LLMAuthenticationError(message=f"Failed to authenticate with Google AI: {e}", code=ErrorCode.UNAUTHENTICATED)
461
+ raise e
462
+ except Exception as e:
463
+ raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
464
+
465
+
444
466
  def google_ai_get_model_list(base_url: str, api_key: str, key_in_header: bool = True) -> List[dict]:
445
467
  from letta.utils import printd
446
468
 
@@ -14,3 +14,5 @@ GOOGLE_MODEL_TO_CONTEXT_LENGTH = {
14
14
  GOOGLE_MODEL_TO_OUTPUT_LENGTH = {"gemini-2.0-flash-001": 8192, "gemini-2.5-pro-exp-03-25": 65536}
15
15
 
16
16
  GOOGLE_EMBEDING_MODEL_TO_DIM = {"text-embedding-005": 768, "text-multilingual-embedding-002": 768}
17
+
18
+ GOOGLE_MODEL_FOR_API_KEY_CHECK = "gemini-2.0-flash-lite"