letta-nightly 0.7.10.dev20250507104304__py3-none-any.whl → 0.7.12.dev20250508044425__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +8 -4
- letta/agents/letta_agent.py +3 -5
- letta/agents/letta_agent_batch.py +2 -4
- letta/client/client.py +2 -2
- letta/functions/async_composio_toolset.py +106 -0
- letta/functions/composio_helpers.py +20 -24
- letta/llm_api/anthropic.py +31 -6
- letta/llm_api/anthropic_client.py +10 -8
- letta/llm_api/google_ai_client.py +32 -10
- letta/llm_api/google_constants.py +2 -0
- letta/llm_api/google_vertex_client.py +107 -27
- letta/llm_api/llm_api_tools.py +9 -3
- letta/llm_api/llm_client.py +9 -11
- letta/llm_api/llm_client_base.py +6 -5
- letta/llm_api/openai.py +16 -0
- letta/llm_api/openai_client.py +6 -6
- letta/local_llm/constants.py +1 -0
- letta/memory.py +8 -5
- letta/orm/provider.py +1 -0
- letta/schemas/enums.py +6 -0
- letta/schemas/llm_config.py +2 -0
- letta/schemas/message.py +3 -3
- letta/schemas/providers.py +58 -2
- letta/server/rest_api/routers/v1/agents.py +10 -5
- letta/server/rest_api/routers/v1/llms.py +16 -6
- letta/server/rest_api/routers/v1/providers.py +24 -4
- letta/server/rest_api/routers/v1/sources.py +1 -0
- letta/server/server.py +58 -24
- letta/services/provider_manager.py +26 -8
- letta/settings.py +2 -0
- {letta_nightly-0.7.10.dev20250507104304.dist-info → letta_nightly-0.7.12.dev20250508044425.dist-info}/METADATA +2 -2
- {letta_nightly-0.7.10.dev20250507104304.dist-info → letta_nightly-0.7.12.dev20250508044425.dist-info}/RECORD +36 -35
- {letta_nightly-0.7.10.dev20250507104304.dist-info → letta_nightly-0.7.12.dev20250508044425.dist-info}/LICENSE +0 -0
- {letta_nightly-0.7.10.dev20250507104304.dist-info → letta_nightly-0.7.12.dev20250508044425.dist-info}/WHEEL +0 -0
- {letta_nightly-0.7.10.dev20250507104304.dist-info → letta_nightly-0.7.12.dev20250508044425.dist-info}/entry_points.txt +0 -0
letta/__init__.py
CHANGED
letta/agent.py
CHANGED
@@ -331,10 +331,9 @@ class Agent(BaseAgent):
|
|
331
331
|
log_telemetry(self.logger, "_get_ai_reply create start")
|
332
332
|
# New LLM client flow
|
333
333
|
llm_client = LLMClient.create(
|
334
|
-
provider_name=self.agent_state.llm_config.provider_name,
|
335
334
|
provider_type=self.agent_state.llm_config.model_endpoint_type,
|
336
335
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
337
|
-
|
336
|
+
actor=self.user,
|
338
337
|
)
|
339
338
|
|
340
339
|
if llm_client and not stream:
|
@@ -943,7 +942,10 @@ class Agent(BaseAgent):
|
|
943
942
|
model_endpoint=self.agent_state.llm_config.model_endpoint,
|
944
943
|
context_window_limit=self.agent_state.llm_config.context_window,
|
945
944
|
usage=response.usage,
|
946
|
-
provider_id=self.provider_manager.get_provider_id_from_name(
|
945
|
+
provider_id=self.provider_manager.get_provider_id_from_name(
|
946
|
+
self.agent_state.llm_config.provider_name,
|
947
|
+
actor=self.user,
|
948
|
+
),
|
947
949
|
job_id=job_id,
|
948
950
|
)
|
949
951
|
for message in all_new_messages:
|
@@ -1087,7 +1089,9 @@ class Agent(BaseAgent):
|
|
1087
1089
|
LLM_MAX_TOKENS[self.model] if (self.model is not None and self.model in LLM_MAX_TOKENS) else LLM_MAX_TOKENS["DEFAULT"]
|
1088
1090
|
)
|
1089
1091
|
|
1090
|
-
summary = summarize_messages(
|
1092
|
+
summary = summarize_messages(
|
1093
|
+
agent_state=self.agent_state, message_sequence_to_summarize=message_sequence_to_summarize, actor=self.user
|
1094
|
+
)
|
1091
1095
|
logger.info(f"Got summary: {summary}")
|
1092
1096
|
|
1093
1097
|
# Metadata that's useful for the agent to see
|
letta/agents/letta_agent.py
CHANGED
@@ -75,10 +75,9 @@ class LettaAgent(BaseAgent):
|
|
75
75
|
)
|
76
76
|
tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
|
77
77
|
llm_client = LLMClient.create(
|
78
|
-
provider_name=agent_state.llm_config.provider_name,
|
79
78
|
provider_type=agent_state.llm_config.model_endpoint_type,
|
80
79
|
put_inner_thoughts_first=True,
|
81
|
-
|
80
|
+
actor=self.actor,
|
82
81
|
)
|
83
82
|
for _ in range(max_steps):
|
84
83
|
response = await self._get_ai_reply(
|
@@ -120,10 +119,9 @@ class LettaAgent(BaseAgent):
|
|
120
119
|
)
|
121
120
|
tool_rules_solver = ToolRulesSolver(agent_state.tool_rules)
|
122
121
|
llm_client = LLMClient.create(
|
123
|
-
provider_name=agent_state.llm_config.provider_name,
|
124
122
|
provider_type=agent_state.llm_config.model_endpoint_type,
|
125
123
|
put_inner_thoughts_first=True,
|
126
|
-
|
124
|
+
actor=self.actor,
|
127
125
|
)
|
128
126
|
|
129
127
|
for _ in range(max_steps):
|
@@ -350,7 +348,7 @@ class LettaAgent(BaseAgent):
|
|
350
348
|
results = await self._send_message_to_agents_matching_tags(**tool_args)
|
351
349
|
log_event(name="finish_send_message_to_agents_matching_tags", attributes=tool_args)
|
352
350
|
return json.dumps(results), True
|
353
|
-
elif target_tool.
|
351
|
+
elif target_tool.tool_type == ToolType.EXTERNAL_COMPOSIO:
|
354
352
|
log_event(name=f"start_composio_{tool_name}_execution", attributes=tool_args)
|
355
353
|
log_event(name=f"finish_compsio_{tool_name}_execution", attributes=tool_args)
|
356
354
|
return tool_execution_result.func_return, True
|
@@ -172,10 +172,9 @@ class LettaAgentBatch:
|
|
172
172
|
|
173
173
|
log_event(name="init_llm_client")
|
174
174
|
llm_client = LLMClient.create(
|
175
|
-
provider_name=agent_states[0].llm_config.provider_name,
|
176
175
|
provider_type=agent_states[0].llm_config.model_endpoint_type,
|
177
176
|
put_inner_thoughts_first=True,
|
178
|
-
|
177
|
+
actor=self.actor,
|
179
178
|
)
|
180
179
|
agent_llm_config_mapping = {s.id: s.llm_config for s in agent_states}
|
181
180
|
|
@@ -284,10 +283,9 @@ class LettaAgentBatch:
|
|
284
283
|
|
285
284
|
# translate provider‑specific response → OpenAI‑style tool call (unchanged)
|
286
285
|
llm_client = LLMClient.create(
|
287
|
-
provider_name=item.llm_config.provider_name,
|
288
286
|
provider_type=item.llm_config.model_endpoint_type,
|
289
287
|
put_inner_thoughts_first=True,
|
290
|
-
|
288
|
+
actor=self.actor,
|
291
289
|
)
|
292
290
|
tool_call = (
|
293
291
|
llm_client.convert_response_to_chat_completion(
|
letta/client/client.py
CHANGED
@@ -3455,7 +3455,7 @@ class LocalClient(AbstractClient):
|
|
3455
3455
|
Returns:
|
3456
3456
|
configs (List[LLMConfig]): List of LLM configurations
|
3457
3457
|
"""
|
3458
|
-
return self.server.list_llm_models()
|
3458
|
+
return self.server.list_llm_models(actor=self.user)
|
3459
3459
|
|
3460
3460
|
def list_embedding_configs(self) -> List[EmbeddingConfig]:
|
3461
3461
|
"""
|
@@ -3464,7 +3464,7 @@ class LocalClient(AbstractClient):
|
|
3464
3464
|
Returns:
|
3465
3465
|
configs (List[EmbeddingConfig]): List of embedding configurations
|
3466
3466
|
"""
|
3467
|
-
return self.server.list_embedding_models()
|
3467
|
+
return self.server.list_embedding_models(actor=self.user)
|
3468
3468
|
|
3469
3469
|
def create_org(self, name: Optional[str] = None) -> Organization:
|
3470
3470
|
return self.server.organization_manager.create_organization(pydantic_org=Organization(name=name))
|
@@ -0,0 +1,106 @@
|
|
1
|
+
import json
|
2
|
+
from typing import Any
|
3
|
+
|
4
|
+
import aiohttp
|
5
|
+
from composio import ComposioToolSet as BaseComposioToolSet
|
6
|
+
from composio.exceptions import (
|
7
|
+
ApiKeyNotProvidedError,
|
8
|
+
ComposioSDKError,
|
9
|
+
ConnectedAccountNotFoundError,
|
10
|
+
EnumMetadataNotFound,
|
11
|
+
EnumStringNotFound,
|
12
|
+
)
|
13
|
+
|
14
|
+
|
15
|
+
class AsyncComposioToolSet(BaseComposioToolSet, runtime="letta"):
|
16
|
+
"""
|
17
|
+
Async version of ComposioToolSet client for interacting with Composio API
|
18
|
+
Used to asynchronously hit the execute action endpoint
|
19
|
+
|
20
|
+
https://docs.composio.dev/api-reference/api-reference/v3/tools/post-api-v-3-tools-execute-action
|
21
|
+
"""
|
22
|
+
|
23
|
+
def __init__(self, api_key: str, entity_id: str, lock: bool = True):
|
24
|
+
"""
|
25
|
+
Initialize the AsyncComposioToolSet client
|
26
|
+
|
27
|
+
Args:
|
28
|
+
api_key (str): Your Composio API key
|
29
|
+
entity_id (str): Your Composio entity ID
|
30
|
+
lock (bool): Whether to use locking (default: True)
|
31
|
+
"""
|
32
|
+
super().__init__(api_key=api_key, entity_id=entity_id, lock=lock)
|
33
|
+
|
34
|
+
self.headers = {
|
35
|
+
"Content-Type": "application/json",
|
36
|
+
"X-API-Key": self._api_key,
|
37
|
+
}
|
38
|
+
|
39
|
+
async def execute_action(
|
40
|
+
self,
|
41
|
+
action: str,
|
42
|
+
params: dict[str, Any] = {},
|
43
|
+
) -> dict[str, Any]:
|
44
|
+
"""
|
45
|
+
Execute an action asynchronously using the Composio API
|
46
|
+
|
47
|
+
Args:
|
48
|
+
action (str): The name of the action to execute
|
49
|
+
params (dict[str, Any], optional): Parameters for the action
|
50
|
+
|
51
|
+
Returns:
|
52
|
+
dict[str, Any]: The API response
|
53
|
+
|
54
|
+
Raises:
|
55
|
+
ApiKeyNotProvidedError: if the API key is not provided
|
56
|
+
ComposioSDKError: if a general Composio SDK error occurs
|
57
|
+
ConnectedAccountNotFoundError: if the connected account is not found
|
58
|
+
EnumMetadataNotFound: if enum metadata is not found
|
59
|
+
EnumStringNotFound: if enum string is not found
|
60
|
+
aiohttp.ClientError: if a network-related error occurs
|
61
|
+
ValueError: if an error with the parameters or response occurs
|
62
|
+
"""
|
63
|
+
API_VERSION = "v3"
|
64
|
+
endpoint = f"{self._base_url}/{API_VERSION}/tools/execute/{action}"
|
65
|
+
|
66
|
+
json_payload = {
|
67
|
+
"entity_id": self.entity_id,
|
68
|
+
"arguments": params or {},
|
69
|
+
}
|
70
|
+
|
71
|
+
try:
|
72
|
+
async with aiohttp.ClientSession() as session:
|
73
|
+
async with session.post(endpoint, headers=self.headers, json=json_payload) as response:
|
74
|
+
print(response, response.status, response.reason, response.content)
|
75
|
+
if response.status == 200:
|
76
|
+
return await response.json()
|
77
|
+
else:
|
78
|
+
error_text = await response.text()
|
79
|
+
try:
|
80
|
+
error_json = json.loads(error_text)
|
81
|
+
error_message = error_json.get("message", error_text)
|
82
|
+
error_code = error_json.get("code")
|
83
|
+
|
84
|
+
# Handle specific error codes from Composio API
|
85
|
+
if error_code == 10401 or "API_KEY_NOT_FOUND" in error_message:
|
86
|
+
raise ApiKeyNotProvidedError()
|
87
|
+
if "connected account not found" in error_message.lower():
|
88
|
+
raise ConnectedAccountNotFoundError(f"Connected account not found: {error_message}")
|
89
|
+
if "enum metadata not found" in error_message.lower():
|
90
|
+
raise EnumMetadataNotFound(f"Enum metadata not found: {error_message}")
|
91
|
+
if "enum string not found" in error_message.lower():
|
92
|
+
raise EnumStringNotFound(f"Enum string not found: {error_message}")
|
93
|
+
except json.JSONDecodeError:
|
94
|
+
error_message = error_text
|
95
|
+
|
96
|
+
# If no specific error was identified, raise a general error
|
97
|
+
raise ValueError(f"API request failed with status {response.status}: {error_message}")
|
98
|
+
except aiohttp.ClientError as e:
|
99
|
+
# Wrap network errors in ComposioSDKError
|
100
|
+
raise ComposioSDKError(f"Network error when calling Composio API: {str(e)}")
|
101
|
+
except ValueError:
|
102
|
+
# Re-raise ValueError (which could be our custom error message or a JSON parsing error)
|
103
|
+
raise
|
104
|
+
except Exception as e:
|
105
|
+
# Catch any other exceptions and wrap them in ComposioSDKError
|
106
|
+
raise ComposioSDKError(f"Unexpected error when calling Composio API: {str(e)}")
|
@@ -1,8 +1,6 @@
|
|
1
|
-
import asyncio
|
2
1
|
import os
|
3
2
|
from typing import Any, Optional
|
4
3
|
|
5
|
-
from composio import ComposioToolSet
|
6
4
|
from composio.constants import DEFAULT_ENTITY_ID
|
7
5
|
from composio.exceptions import (
|
8
6
|
ApiKeyNotProvidedError,
|
@@ -13,6 +11,8 @@ from composio.exceptions import (
|
|
13
11
|
)
|
14
12
|
|
15
13
|
from letta.constants import COMPOSIO_ENTITY_ENV_VAR_KEY
|
14
|
+
from letta.functions.async_composio_toolset import AsyncComposioToolSet
|
15
|
+
from letta.utils import run_async_task
|
16
16
|
|
17
17
|
|
18
18
|
# TODO: This is kind of hacky, as this is used to search up the action later on composio's side
|
@@ -61,38 +61,34 @@ def {func_name}(**kwargs):
|
|
61
61
|
async def execute_composio_action_async(
|
62
62
|
action_name: str, args: dict, api_key: Optional[str] = None, entity_id: Optional[str] = None
|
63
63
|
) -> tuple[str, str]:
|
64
|
-
try:
|
65
|
-
loop = asyncio.get_running_loop()
|
66
|
-
return await loop.run_in_executor(None, execute_composio_action, action_name, args, api_key, entity_id)
|
67
|
-
except Exception as e:
|
68
|
-
raise RuntimeError(f"Error in execute_composio_action_async: {e}") from e
|
69
|
-
|
70
|
-
|
71
|
-
def execute_composio_action(action_name: str, args: dict, api_key: Optional[str] = None, entity_id: Optional[str] = None) -> Any:
|
72
64
|
entity_id = entity_id or os.getenv(COMPOSIO_ENTITY_ENV_VAR_KEY, DEFAULT_ENTITY_ID)
|
65
|
+
composio_toolset = AsyncComposioToolSet(api_key=api_key, entity_id=entity_id, lock=False)
|
73
66
|
try:
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
"Please set the sandbox environment variables either through the ADE or the API."
|
80
|
-
)
|
81
|
-
except ConnectedAccountNotFoundError:
|
82
|
-
raise RuntimeError(f"No connected account was found for action '{action_name}'. " "Please link an account and try again.")
|
83
|
-
except EnumStringNotFound as e:
|
84
|
-
raise RuntimeError(f"Invalid value provided for action '{action_name}': " + str(e) + ". Please check the action parameters.")
|
67
|
+
response = await composio_toolset.execute_action(action=action_name, params=args)
|
68
|
+
except ApiKeyNotProvidedError as e:
|
69
|
+
raise RuntimeError(f"API key not provided or invalid for Composio action '{action_name}': {str(e)}")
|
70
|
+
except ConnectedAccountNotFoundError as e:
|
71
|
+
raise RuntimeError(f"Connected account not found for Composio action '{action_name}': {str(e)}")
|
85
72
|
except EnumMetadataNotFound as e:
|
86
|
-
raise RuntimeError(f"
|
73
|
+
raise RuntimeError(f"Enum metadata not found for Composio action '{action_name}': {str(e)}")
|
74
|
+
except EnumStringNotFound as e:
|
75
|
+
raise RuntimeError(f"Enum string not found for Composio action '{action_name}': {str(e)}")
|
87
76
|
except ComposioSDKError as e:
|
88
|
-
raise RuntimeError(f"
|
77
|
+
raise RuntimeError(f"Composio SDK error while executing action '{action_name}': {str(e)}")
|
78
|
+
except Exception as e:
|
79
|
+
print(type(e))
|
80
|
+
raise RuntimeError(f"An unexpected error occurred in Composio SDK while executing action '{action_name}': {str(e)}")
|
89
81
|
|
90
82
|
if "error" in response and response["error"]:
|
91
|
-
raise RuntimeError(f"Error while executing action '{action_name}':
|
83
|
+
raise RuntimeError(f"Error while executing action '{action_name}': {str(response['error'])}")
|
92
84
|
|
93
85
|
return response.get("data")
|
94
86
|
|
95
87
|
|
88
|
+
def execute_composio_action(action_name: str, args: dict, api_key: Optional[str] = None, entity_id: Optional[str] = None) -> Any:
|
89
|
+
return run_async_task(execute_composio_action_async(action_name, args, api_key, entity_id))
|
90
|
+
|
91
|
+
|
96
92
|
def _assert_code_gen_compilable(code_str):
|
97
93
|
try:
|
98
94
|
compile(code_str, "<string>", "exec")
|
letta/llm_api/anthropic.py
CHANGED
@@ -19,14 +19,14 @@ from anthropic.types.beta import (
|
|
19
19
|
BetaToolUseBlock,
|
20
20
|
)
|
21
21
|
|
22
|
-
from letta.errors import BedrockError, BedrockPermissionError
|
22
|
+
from letta.errors import BedrockError, BedrockPermissionError, ErrorCode, LLMAuthenticationError, LLMError
|
23
23
|
from letta.helpers.datetime_helpers import get_utc_time_int, timestamp_to_datetime
|
24
24
|
from letta.llm_api.aws_bedrock import get_bedrock_client
|
25
25
|
from letta.llm_api.helpers import add_inner_thoughts_to_functions
|
26
26
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
27
27
|
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
28
28
|
from letta.log import get_logger
|
29
|
-
from letta.schemas.enums import
|
29
|
+
from letta.schemas.enums import ProviderCategory
|
30
30
|
from letta.schemas.message import Message as _Message
|
31
31
|
from letta.schemas.message import MessageRole as _MessageRole
|
32
32
|
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
|
@@ -42,6 +42,7 @@ from letta.schemas.openai.chat_completion_response import Message
|
|
42
42
|
from letta.schemas.openai.chat_completion_response import Message as ChoiceMessage
|
43
43
|
from letta.schemas.openai.chat_completion_response import MessageDelta, ToolCall, ToolCallDelta, UsageStatistics
|
44
44
|
from letta.services.provider_manager import ProviderManager
|
45
|
+
from letta.services.user_manager import UserManager
|
45
46
|
from letta.settings import model_settings
|
46
47
|
from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
|
47
48
|
from letta.tracing import log_event
|
@@ -118,6 +119,20 @@ DUMMY_FIRST_USER_MESSAGE = "User initializing bootup sequence."
|
|
118
119
|
VALID_EVENT_TYPES = {"content_block_stop", "message_stop"}
|
119
120
|
|
120
121
|
|
122
|
+
def anthropic_check_valid_api_key(api_key: Union[str, None]) -> None:
|
123
|
+
if api_key:
|
124
|
+
anthropic_client = anthropic.Anthropic(api_key=api_key)
|
125
|
+
try:
|
126
|
+
# just use a cheap model to count some tokens - as of 5/7/2025 this is faster than fetching the list of models
|
127
|
+
anthropic_client.messages.count_tokens(model=MODEL_LIST[-1]["name"], messages=[{"role": "user", "content": "a"}])
|
128
|
+
except anthropic.AuthenticationError as e:
|
129
|
+
raise LLMAuthenticationError(message=f"Failed to authenticate with Anthropic: {e}", code=ErrorCode.UNAUTHENTICATED)
|
130
|
+
except Exception as e:
|
131
|
+
raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
|
132
|
+
else:
|
133
|
+
raise ValueError("No API key provided")
|
134
|
+
|
135
|
+
|
121
136
|
def antropic_get_model_context_window(url: str, api_key: Union[str, None], model: str) -> int:
|
122
137
|
for model_dict in anthropic_get_model_list(url=url, api_key=api_key):
|
123
138
|
if model_dict["name"] == model:
|
@@ -744,12 +759,15 @@ def anthropic_chat_completions_request(
|
|
744
759
|
extended_thinking: bool = False,
|
745
760
|
max_reasoning_tokens: Optional[int] = None,
|
746
761
|
provider_name: Optional[str] = None,
|
762
|
+
provider_category: Optional[ProviderCategory] = None,
|
747
763
|
betas: List[str] = ["tools-2024-04-04"],
|
764
|
+
user_id: Optional[str] = None,
|
748
765
|
) -> ChatCompletionResponse:
|
749
766
|
"""https://docs.anthropic.com/claude/docs/tool-use"""
|
750
767
|
anthropic_client = None
|
751
|
-
if
|
752
|
-
|
768
|
+
if provider_category == ProviderCategory.byok:
|
769
|
+
actor = UserManager().get_user_or_default(user_id=user_id)
|
770
|
+
api_key = ProviderManager().get_override_key(provider_name, actor=actor)
|
753
771
|
anthropic_client = anthropic.Anthropic(api_key=api_key)
|
754
772
|
elif model_settings.anthropic_api_key:
|
755
773
|
anthropic_client = anthropic.Anthropic()
|
@@ -803,7 +821,9 @@ def anthropic_chat_completions_request_stream(
|
|
803
821
|
extended_thinking: bool = False,
|
804
822
|
max_reasoning_tokens: Optional[int] = None,
|
805
823
|
provider_name: Optional[str] = None,
|
824
|
+
provider_category: Optional[ProviderCategory] = None,
|
806
825
|
betas: List[str] = ["tools-2024-04-04"],
|
826
|
+
user_id: Optional[str] = None,
|
807
827
|
) -> Generator[ChatCompletionChunkResponse, None, None]:
|
808
828
|
"""Stream chat completions from Anthropic API.
|
809
829
|
|
@@ -817,8 +837,9 @@ def anthropic_chat_completions_request_stream(
|
|
817
837
|
extended_thinking=extended_thinking,
|
818
838
|
max_reasoning_tokens=max_reasoning_tokens,
|
819
839
|
)
|
820
|
-
if
|
821
|
-
|
840
|
+
if provider_category == ProviderCategory.byok:
|
841
|
+
actor = UserManager().get_user_or_default(user_id=user_id)
|
842
|
+
api_key = ProviderManager().get_override_key(provider_name, actor=actor)
|
822
843
|
anthropic_client = anthropic.Anthropic(api_key=api_key)
|
823
844
|
elif model_settings.anthropic_api_key:
|
824
845
|
anthropic_client = anthropic.Anthropic()
|
@@ -867,10 +888,12 @@ def anthropic_chat_completions_process_stream(
|
|
867
888
|
extended_thinking: bool = False,
|
868
889
|
max_reasoning_tokens: Optional[int] = None,
|
869
890
|
provider_name: Optional[str] = None,
|
891
|
+
provider_category: Optional[ProviderCategory] = None,
|
870
892
|
create_message_id: bool = True,
|
871
893
|
create_message_datetime: bool = True,
|
872
894
|
betas: List[str] = ["tools-2024-04-04"],
|
873
895
|
name: Optional[str] = None,
|
896
|
+
user_id: Optional[str] = None,
|
874
897
|
) -> ChatCompletionResponse:
|
875
898
|
"""Process a streaming completion response from Anthropic, similar to OpenAI's streaming.
|
876
899
|
|
@@ -952,7 +975,9 @@ def anthropic_chat_completions_process_stream(
|
|
952
975
|
extended_thinking=extended_thinking,
|
953
976
|
max_reasoning_tokens=max_reasoning_tokens,
|
954
977
|
provider_name=provider_name,
|
978
|
+
provider_category=provider_category,
|
955
979
|
betas=betas,
|
980
|
+
user_id=user_id,
|
956
981
|
)
|
957
982
|
):
|
958
983
|
assert isinstance(chat_completion_chunk, ChatCompletionChunkResponse), type(chat_completion_chunk)
|
@@ -27,7 +27,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, unpack_all_in
|
|
27
27
|
from letta.llm_api.llm_client_base import LLMClientBase
|
28
28
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
29
29
|
from letta.log import get_logger
|
30
|
-
from letta.schemas.enums import
|
30
|
+
from letta.schemas.enums import ProviderCategory
|
31
31
|
from letta.schemas.llm_config import LLMConfig
|
32
32
|
from letta.schemas.message import Message as PydanticMessage
|
33
33
|
from letta.schemas.openai.chat_completion_request import Tool
|
@@ -45,18 +45,18 @@ logger = get_logger(__name__)
|
|
45
45
|
class AnthropicClient(LLMClientBase):
|
46
46
|
|
47
47
|
def request(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
48
|
-
client = self._get_anthropic_client(async_client=False)
|
48
|
+
client = self._get_anthropic_client(llm_config, async_client=False)
|
49
49
|
response = client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
|
50
50
|
return response.model_dump()
|
51
51
|
|
52
52
|
async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
53
|
-
client = self._get_anthropic_client(async_client=True)
|
53
|
+
client = self._get_anthropic_client(llm_config, async_client=True)
|
54
54
|
response = await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
|
55
55
|
return response.model_dump()
|
56
56
|
|
57
57
|
@trace_method
|
58
58
|
async def stream_async(self, request_data: dict, llm_config: LLMConfig) -> AsyncStream[BetaRawMessageStreamEvent]:
|
59
|
-
client = self._get_anthropic_client(async_client=True)
|
59
|
+
client = self._get_anthropic_client(llm_config, async_client=True)
|
60
60
|
request_data["stream"] = True
|
61
61
|
return await client.beta.messages.create(**request_data, betas=["tools-2024-04-04"])
|
62
62
|
|
@@ -96,7 +96,7 @@ class AnthropicClient(LLMClientBase):
|
|
96
96
|
for agent_id in agent_messages_mapping
|
97
97
|
}
|
98
98
|
|
99
|
-
client = self._get_anthropic_client(async_client=True)
|
99
|
+
client = self._get_anthropic_client(list(agent_llm_config_mapping.values())[0], async_client=True)
|
100
100
|
|
101
101
|
anthropic_requests = [
|
102
102
|
Request(custom_id=agent_id, params=MessageCreateParamsNonStreaming(**params)) for agent_id, params in requests.items()
|
@@ -112,10 +112,12 @@ class AnthropicClient(LLMClientBase):
|
|
112
112
|
raise self.handle_llm_error(e)
|
113
113
|
|
114
114
|
@trace_method
|
115
|
-
def _get_anthropic_client(
|
115
|
+
def _get_anthropic_client(
|
116
|
+
self, llm_config: LLMConfig, async_client: bool = False
|
117
|
+
) -> Union[anthropic.AsyncAnthropic, anthropic.Anthropic]:
|
116
118
|
override_key = None
|
117
|
-
if
|
118
|
-
override_key = ProviderManager().get_override_key(self.
|
119
|
+
if llm_config.provider_category == ProviderCategory.byok:
|
120
|
+
override_key = ProviderManager().get_override_key(llm_config.provider_name, actor=self.actor)
|
119
121
|
|
120
122
|
if async_client:
|
121
123
|
return anthropic.AsyncAnthropic(api_key=override_key) if override_key else anthropic.AsyncAnthropic()
|
@@ -3,17 +3,20 @@ import uuid
|
|
3
3
|
from typing import List, Optional, Tuple
|
4
4
|
|
5
5
|
import requests
|
6
|
+
from google import genai
|
6
7
|
from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, ToolConfig
|
7
8
|
|
8
9
|
from letta.constants import NON_USER_MSG_PREFIX
|
10
|
+
from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
|
9
11
|
from letta.helpers.datetime_helpers import get_utc_time_int
|
10
12
|
from letta.helpers.json_helpers import json_dumps
|
13
|
+
from letta.llm_api.google_constants import GOOGLE_MODEL_FOR_API_KEY_CHECK
|
11
14
|
from letta.llm_api.helpers import make_post_request
|
12
15
|
from letta.llm_api.llm_client_base import LLMClientBase
|
13
16
|
from letta.local_llm.json_parser import clean_json_string_extra_backslash
|
14
17
|
from letta.local_llm.utils import count_tokens
|
15
18
|
from letta.log import get_logger
|
16
|
-
from letta.schemas.enums import
|
19
|
+
from letta.schemas.enums import ProviderCategory
|
17
20
|
from letta.schemas.llm_config import LLMConfig
|
18
21
|
from letta.schemas.message import Message as PydanticMessage
|
19
22
|
from letta.schemas.openai.chat_completion_request import Tool
|
@@ -31,10 +34,10 @@ class GoogleAIClient(LLMClientBase):
|
|
31
34
|
Performs underlying request to llm and returns raw response.
|
32
35
|
"""
|
33
36
|
api_key = None
|
34
|
-
if llm_config.
|
37
|
+
if llm_config.provider_category == ProviderCategory.byok:
|
35
38
|
from letta.services.provider_manager import ProviderManager
|
36
39
|
|
37
|
-
api_key = ProviderManager().get_override_key(llm_config.provider_name)
|
40
|
+
api_key = ProviderManager().get_override_key(llm_config.provider_name, actor=self.actor)
|
38
41
|
|
39
42
|
if not api_key:
|
40
43
|
api_key = model_settings.gemini_api_key
|
@@ -165,10 +168,12 @@ class GoogleAIClient(LLMClientBase):
|
|
165
168
|
|
166
169
|
# NOTE: this also involves stripping the inner monologue out of the function
|
167
170
|
if llm_config.put_inner_thoughts_in_kwargs:
|
168
|
-
from letta.local_llm.constants import
|
171
|
+
from letta.local_llm.constants import INNER_THOUGHTS_KWARG_VERTEX
|
169
172
|
|
170
|
-
assert
|
171
|
-
|
173
|
+
assert (
|
174
|
+
INNER_THOUGHTS_KWARG_VERTEX in function_args
|
175
|
+
), f"Couldn't find inner thoughts in function args:\n{function_call}"
|
176
|
+
inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG_VERTEX)
|
172
177
|
assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
|
173
178
|
else:
|
174
179
|
inner_thoughts = None
|
@@ -288,7 +293,7 @@ class GoogleAIClient(LLMClientBase):
|
|
288
293
|
# Per https://ai.google.dev/gemini-api/docs/function-calling?example=meeting#notes_and_limitations
|
289
294
|
# * Only a subset of the OpenAPI schema is supported.
|
290
295
|
# * Supported parameter types in Python are limited.
|
291
|
-
unsupported_keys = ["default", "exclusiveMaximum", "exclusiveMinimum"]
|
296
|
+
unsupported_keys = ["default", "exclusiveMaximum", "exclusiveMinimum", "additionalProperties"]
|
292
297
|
keys_to_remove_at_this_level = [key for key in unsupported_keys if key in schema_part]
|
293
298
|
for key_to_remove in keys_to_remove_at_this_level:
|
294
299
|
logger.warning(f"Removing unsupported keyword '{key_to_remove}' from schema part.")
|
@@ -380,13 +385,13 @@ class GoogleAIClient(LLMClientBase):
|
|
380
385
|
|
381
386
|
# Add inner thoughts
|
382
387
|
if llm_config.put_inner_thoughts_in_kwargs:
|
383
|
-
from letta.local_llm.constants import
|
388
|
+
from letta.local_llm.constants import INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_VERTEX
|
384
389
|
|
385
|
-
func["parameters"]["properties"][
|
390
|
+
func["parameters"]["properties"][INNER_THOUGHTS_KWARG_VERTEX] = {
|
386
391
|
"type": "string",
|
387
392
|
"description": INNER_THOUGHTS_KWARG_DESCRIPTION,
|
388
393
|
}
|
389
|
-
func["parameters"]["required"].append(
|
394
|
+
func["parameters"]["required"].append(INNER_THOUGHTS_KWARG_VERTEX)
|
390
395
|
|
391
396
|
return [{"functionDeclarations": function_list}]
|
392
397
|
|
@@ -441,6 +446,23 @@ def get_gemini_endpoint_and_headers(
|
|
441
446
|
return url, headers
|
442
447
|
|
443
448
|
|
449
|
+
def google_ai_check_valid_api_key(api_key: str):
|
450
|
+
client = genai.Client(api_key=api_key)
|
451
|
+
# use the count token endpoint for a cheap model - as of 5/7/2025 this is slightly faster than fetching the list of models
|
452
|
+
try:
|
453
|
+
client.models.count_tokens(
|
454
|
+
model=GOOGLE_MODEL_FOR_API_KEY_CHECK,
|
455
|
+
contents="",
|
456
|
+
)
|
457
|
+
except genai.errors.ClientError as e:
|
458
|
+
# google api returns 400 invalid argument for invalid api key
|
459
|
+
if e.code == 400:
|
460
|
+
raise LLMAuthenticationError(message=f"Failed to authenticate with Google AI: {e}", code=ErrorCode.UNAUTHENTICATED)
|
461
|
+
raise e
|
462
|
+
except Exception as e:
|
463
|
+
raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
|
464
|
+
|
465
|
+
|
444
466
|
def google_ai_get_model_list(base_url: str, api_key: str, key_in_header: bool = True) -> List[dict]:
|
445
467
|
from letta.utils import printd
|
446
468
|
|
@@ -14,3 +14,5 @@ GOOGLE_MODEL_TO_CONTEXT_LENGTH = {
|
|
14
14
|
GOOGLE_MODEL_TO_OUTPUT_LENGTH = {"gemini-2.0-flash-001": 8192, "gemini-2.5-pro-exp-03-25": 65536}
|
15
15
|
|
16
16
|
GOOGLE_EMBEDING_MODEL_TO_DIM = {"text-embedding-005": 768, "text-multilingual-embedding-002": 768}
|
17
|
+
|
18
|
+
GOOGLE_MODEL_FOR_API_KEY_CHECK = "gemini-2.0-flash-lite"
|