letta-nightly 0.7.10.dev20250507104304__py3-none-any.whl → 0.7.12.dev20250508044425__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +8 -4
- letta/agents/letta_agent.py +3 -5
- letta/agents/letta_agent_batch.py +2 -4
- letta/client/client.py +2 -2
- letta/functions/async_composio_toolset.py +106 -0
- letta/functions/composio_helpers.py +20 -24
- letta/llm_api/anthropic.py +31 -6
- letta/llm_api/anthropic_client.py +10 -8
- letta/llm_api/google_ai_client.py +32 -10
- letta/llm_api/google_constants.py +2 -0
- letta/llm_api/google_vertex_client.py +107 -27
- letta/llm_api/llm_api_tools.py +9 -3
- letta/llm_api/llm_client.py +9 -11
- letta/llm_api/llm_client_base.py +6 -5
- letta/llm_api/openai.py +16 -0
- letta/llm_api/openai_client.py +6 -6
- letta/local_llm/constants.py +1 -0
- letta/memory.py +8 -5
- letta/orm/provider.py +1 -0
- letta/schemas/enums.py +6 -0
- letta/schemas/llm_config.py +2 -0
- letta/schemas/message.py +3 -3
- letta/schemas/providers.py +58 -2
- letta/server/rest_api/routers/v1/agents.py +10 -5
- letta/server/rest_api/routers/v1/llms.py +16 -6
- letta/server/rest_api/routers/v1/providers.py +24 -4
- letta/server/rest_api/routers/v1/sources.py +1 -0
- letta/server/server.py +58 -24
- letta/services/provider_manager.py +26 -8
- letta/settings.py +2 -0
- {letta_nightly-0.7.10.dev20250507104304.dist-info → letta_nightly-0.7.12.dev20250508044425.dist-info}/METADATA +2 -2
- {letta_nightly-0.7.10.dev20250507104304.dist-info → letta_nightly-0.7.12.dev20250508044425.dist-info}/RECORD +36 -35
- {letta_nightly-0.7.10.dev20250507104304.dist-info → letta_nightly-0.7.12.dev20250508044425.dist-info}/LICENSE +0 -0
- {letta_nightly-0.7.10.dev20250507104304.dist-info → letta_nightly-0.7.12.dev20250508044425.dist-info}/WHEEL +0 -0
- {letta_nightly-0.7.10.dev20250507104304.dist-info → letta_nightly-0.7.12.dev20250508044425.dist-info}/entry_points.txt +0 -0
@@ -5,16 +5,19 @@ from google import genai
|
|
5
5
|
from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, GenerateContentResponse, ThinkingConfig, ToolConfig
|
6
6
|
|
7
7
|
from letta.helpers.datetime_helpers import get_utc_time_int
|
8
|
-
from letta.helpers.json_helpers import json_dumps
|
8
|
+
from letta.helpers.json_helpers import json_dumps, json_loads
|
9
9
|
from letta.llm_api.google_ai_client import GoogleAIClient
|
10
10
|
from letta.local_llm.json_parser import clean_json_string_extra_backslash
|
11
11
|
from letta.local_llm.utils import count_tokens
|
12
|
+
from letta.log import get_logger
|
12
13
|
from letta.schemas.llm_config import LLMConfig
|
13
14
|
from letta.schemas.message import Message as PydanticMessage
|
14
15
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
|
15
|
-
from letta.settings import model_settings
|
16
|
+
from letta.settings import model_settings, settings
|
16
17
|
from letta.utils import get_tool_call_id
|
17
18
|
|
19
|
+
logger = get_logger(__name__)
|
20
|
+
|
18
21
|
|
19
22
|
class GoogleVertexClient(GoogleAIClient):
|
20
23
|
|
@@ -35,6 +38,23 @@ class GoogleVertexClient(GoogleAIClient):
|
|
35
38
|
)
|
36
39
|
return response.model_dump()
|
37
40
|
|
41
|
+
async def request_async(self, request_data: dict, llm_config: LLMConfig) -> dict:
|
42
|
+
"""
|
43
|
+
Performs underlying request to llm and returns raw response.
|
44
|
+
"""
|
45
|
+
client = genai.Client(
|
46
|
+
vertexai=True,
|
47
|
+
project=model_settings.google_cloud_project,
|
48
|
+
location=model_settings.google_cloud_location,
|
49
|
+
http_options={"api_version": "v1"},
|
50
|
+
)
|
51
|
+
response = await client.aio.models.generate_content(
|
52
|
+
model=llm_config.model,
|
53
|
+
contents=request_data["contents"],
|
54
|
+
config=request_data["config"],
|
55
|
+
)
|
56
|
+
return response.model_dump()
|
57
|
+
|
38
58
|
def build_request_data(
|
39
59
|
self,
|
40
60
|
messages: List[PydanticMessage],
|
@@ -49,16 +69,21 @@ class GoogleVertexClient(GoogleAIClient):
|
|
49
69
|
request_data["config"] = request_data.pop("generation_config")
|
50
70
|
request_data["config"]["tools"] = request_data.pop("tools")
|
51
71
|
|
52
|
-
tool_names = [t["name"] for t in tools]
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
72
|
+
tool_names = [t["name"] for t in tools] if tools else []
|
73
|
+
if len(tool_names) == 1 and settings.use_vertex_structured_outputs_experimental:
|
74
|
+
request_data["config"]["response_mime_type"] = "application/json"
|
75
|
+
request_data["config"]["response_schema"] = self.get_function_call_response_schema(tools[0])
|
76
|
+
del request_data["config"]["tools"]
|
77
|
+
else:
|
78
|
+
tool_config = ToolConfig(
|
79
|
+
function_calling_config=FunctionCallingConfig(
|
80
|
+
# ANY mode forces the model to predict only function calls
|
81
|
+
mode=FunctionCallingConfigMode.ANY,
|
82
|
+
# Provide the list of tools (though empty should also work, it seems not to)
|
83
|
+
allowed_function_names=tool_names,
|
84
|
+
)
|
59
85
|
)
|
60
|
-
|
61
|
-
request_data["config"]["tool_config"] = tool_config.model_dump()
|
86
|
+
request_data["config"]["tool_config"] = tool_config.model_dump()
|
62
87
|
|
63
88
|
# Add thinking_config
|
64
89
|
# If enable_reasoner is False, set thinking_budget to 0
|
@@ -110,12 +135,16 @@ class GoogleVertexClient(GoogleAIClient):
|
|
110
135
|
for candidate in response.candidates:
|
111
136
|
content = candidate.content
|
112
137
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
138
|
+
if content.role is None or content.parts is None:
|
139
|
+
# This means the response is malformed like MALFORMED_FUNCTION_CALL
|
140
|
+
# NOTE: must be a ValueError to trigger a retry
|
141
|
+
if candidate.finish_reason == "MALFORMED_FUNCTION_CALL":
|
142
|
+
raise ValueError(f"Error in response data from LLM: {candidate.finish_message[:350]}...")
|
143
|
+
else:
|
144
|
+
raise ValueError(f"Error in response data from LLM: {response_data}")
|
145
|
+
|
146
|
+
role = content.role
|
147
|
+
assert role == "model", f"Unknown role in response: {role}"
|
119
148
|
|
120
149
|
parts = content.parts
|
121
150
|
|
@@ -142,10 +171,12 @@ class GoogleVertexClient(GoogleAIClient):
|
|
142
171
|
|
143
172
|
# NOTE: this also involves stripping the inner monologue out of the function
|
144
173
|
if llm_config.put_inner_thoughts_in_kwargs:
|
145
|
-
from letta.local_llm.constants import
|
174
|
+
from letta.local_llm.constants import INNER_THOUGHTS_KWARG_VERTEX
|
146
175
|
|
147
|
-
assert
|
148
|
-
|
176
|
+
assert (
|
177
|
+
INNER_THOUGHTS_KWARG_VERTEX in function_args
|
178
|
+
), f"Couldn't find inner thoughts in function args:\n{function_call}"
|
179
|
+
inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG_VERTEX)
|
149
180
|
assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
|
150
181
|
else:
|
151
182
|
inner_thoughts = None
|
@@ -167,15 +198,50 @@ class GoogleVertexClient(GoogleAIClient):
|
|
167
198
|
)
|
168
199
|
|
169
200
|
else:
|
201
|
+
try:
|
202
|
+
# Structured output tool call
|
203
|
+
function_call = json_loads(response_message.text)
|
204
|
+
function_name = function_call["name"]
|
205
|
+
function_args = function_call["args"]
|
206
|
+
assert isinstance(function_args, dict), function_args
|
170
207
|
|
171
|
-
|
172
|
-
|
208
|
+
# NOTE: this also involves stripping the inner monologue out of the function
|
209
|
+
if llm_config.put_inner_thoughts_in_kwargs:
|
210
|
+
from letta.local_llm.constants import INNER_THOUGHTS_KWARG_VERTEX
|
173
211
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
212
|
+
assert (
|
213
|
+
INNER_THOUGHTS_KWARG_VERTEX in function_args
|
214
|
+
), f"Couldn't find inner thoughts in function args:\n{function_call}"
|
215
|
+
inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG_VERTEX)
|
216
|
+
assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
|
217
|
+
else:
|
218
|
+
inner_thoughts = None
|
219
|
+
|
220
|
+
# Google AI API doesn't generate tool call IDs
|
221
|
+
openai_response_message = Message(
|
222
|
+
role="assistant", # NOTE: "model" -> "assistant"
|
223
|
+
content=inner_thoughts,
|
224
|
+
tool_calls=[
|
225
|
+
ToolCall(
|
226
|
+
id=get_tool_call_id(),
|
227
|
+
type="function",
|
228
|
+
function=FunctionCall(
|
229
|
+
name=function_name,
|
230
|
+
arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
|
231
|
+
),
|
232
|
+
)
|
233
|
+
],
|
234
|
+
)
|
235
|
+
|
236
|
+
except json.decoder.JSONDecodeError:
|
237
|
+
# Inner thoughts are the content by default
|
238
|
+
inner_thoughts = response_message.text
|
239
|
+
|
240
|
+
# Google AI API doesn't generate tool call IDs
|
241
|
+
openai_response_message = Message(
|
242
|
+
role="assistant", # NOTE: "model" -> "assistant"
|
243
|
+
content=inner_thoughts,
|
244
|
+
)
|
179
245
|
|
180
246
|
# Google AI API uses different finish reason strings than OpenAI
|
181
247
|
# OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
|
@@ -244,3 +310,17 @@ class GoogleVertexClient(GoogleAIClient):
|
|
244
310
|
)
|
245
311
|
except KeyError as e:
|
246
312
|
raise e
|
313
|
+
|
314
|
+
def get_function_call_response_schema(self, tool: dict) -> dict:
|
315
|
+
return {
|
316
|
+
"type": "OBJECT",
|
317
|
+
"properties": {
|
318
|
+
"name": {"type": "STRING", "enum": [tool["name"]]},
|
319
|
+
"args": {
|
320
|
+
"type": "OBJECT",
|
321
|
+
"properties": tool["parameters"]["properties"],
|
322
|
+
"required": tool["parameters"]["required"],
|
323
|
+
},
|
324
|
+
},
|
325
|
+
"required": ["name", "args"],
|
326
|
+
}
|
letta/llm_api/llm_api_tools.py
CHANGED
@@ -24,7 +24,7 @@ from letta.llm_api.openai import (
|
|
24
24
|
from letta.local_llm.chat_completion_proxy import get_chat_completion
|
25
25
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION
|
26
26
|
from letta.local_llm.utils import num_tokens_from_functions, num_tokens_from_messages
|
27
|
-
from letta.schemas.enums import
|
27
|
+
from letta.schemas.enums import ProviderCategory
|
28
28
|
from letta.schemas.llm_config import LLMConfig
|
29
29
|
from letta.schemas.message import Message
|
30
30
|
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, cast_message_to_subtype
|
@@ -172,10 +172,12 @@ def create(
|
|
172
172
|
if model_settings.openai_api_key is None and llm_config.model_endpoint == "https://api.openai.com/v1":
|
173
173
|
# only is a problem if we are *not* using an openai proxy
|
174
174
|
raise LettaConfigurationError(message="OpenAI key is missing from letta config file", missing_fields=["openai_api_key"])
|
175
|
-
elif llm_config.
|
175
|
+
elif llm_config.provider_category == ProviderCategory.byok:
|
176
176
|
from letta.services.provider_manager import ProviderManager
|
177
|
+
from letta.services.user_manager import UserManager
|
177
178
|
|
178
|
-
|
179
|
+
actor = UserManager().get_user_or_default(user_id=user_id)
|
180
|
+
api_key = ProviderManager().get_override_key(llm_config.provider_name, actor=actor)
|
179
181
|
elif model_settings.openai_api_key is None:
|
180
182
|
# the openai python client requires a dummy API key
|
181
183
|
api_key = "DUMMY_API_KEY"
|
@@ -379,7 +381,9 @@ def create(
|
|
379
381
|
extended_thinking=llm_config.enable_reasoner,
|
380
382
|
max_reasoning_tokens=llm_config.max_reasoning_tokens,
|
381
383
|
provider_name=llm_config.provider_name,
|
384
|
+
provider_category=llm_config.provider_category,
|
382
385
|
name=name,
|
386
|
+
user_id=user_id,
|
383
387
|
)
|
384
388
|
|
385
389
|
else:
|
@@ -390,6 +394,8 @@ def create(
|
|
390
394
|
extended_thinking=llm_config.enable_reasoner,
|
391
395
|
max_reasoning_tokens=llm_config.max_reasoning_tokens,
|
392
396
|
provider_name=llm_config.provider_name,
|
397
|
+
provider_category=llm_config.provider_category,
|
398
|
+
user_id=user_id,
|
393
399
|
)
|
394
400
|
|
395
401
|
if llm_config.put_inner_thoughts_in_kwargs:
|
letta/llm_api/llm_client.py
CHANGED
@@ -1,8 +1,11 @@
|
|
1
|
-
from typing import Optional
|
1
|
+
from typing import TYPE_CHECKING, Optional
|
2
2
|
|
3
3
|
from letta.llm_api.llm_client_base import LLMClientBase
|
4
4
|
from letta.schemas.enums import ProviderType
|
5
5
|
|
6
|
+
if TYPE_CHECKING:
|
7
|
+
from letta.orm import User
|
8
|
+
|
6
9
|
|
7
10
|
class LLMClient:
|
8
11
|
"""Factory class for creating LLM clients based on the model endpoint type."""
|
@@ -10,9 +13,8 @@ class LLMClient:
|
|
10
13
|
@staticmethod
|
11
14
|
def create(
|
12
15
|
provider_type: ProviderType,
|
13
|
-
provider_name: Optional[str] = None,
|
14
16
|
put_inner_thoughts_first: bool = True,
|
15
|
-
|
17
|
+
actor: Optional["User"] = None,
|
16
18
|
) -> Optional[LLMClientBase]:
|
17
19
|
"""
|
18
20
|
Create an LLM client based on the model endpoint type.
|
@@ -32,33 +34,29 @@ class LLMClient:
|
|
32
34
|
from letta.llm_api.google_ai_client import GoogleAIClient
|
33
35
|
|
34
36
|
return GoogleAIClient(
|
35
|
-
provider_name=provider_name,
|
36
37
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
37
|
-
|
38
|
+
actor=actor,
|
38
39
|
)
|
39
40
|
case ProviderType.google_vertex:
|
40
41
|
from letta.llm_api.google_vertex_client import GoogleVertexClient
|
41
42
|
|
42
43
|
return GoogleVertexClient(
|
43
|
-
provider_name=provider_name,
|
44
44
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
45
|
-
|
45
|
+
actor=actor,
|
46
46
|
)
|
47
47
|
case ProviderType.anthropic:
|
48
48
|
from letta.llm_api.anthropic_client import AnthropicClient
|
49
49
|
|
50
50
|
return AnthropicClient(
|
51
|
-
provider_name=provider_name,
|
52
51
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
53
|
-
|
52
|
+
actor=actor,
|
54
53
|
)
|
55
54
|
case ProviderType.openai:
|
56
55
|
from letta.llm_api.openai_client import OpenAIClient
|
57
56
|
|
58
57
|
return OpenAIClient(
|
59
|
-
provider_name=provider_name,
|
60
58
|
put_inner_thoughts_first=put_inner_thoughts_first,
|
61
|
-
|
59
|
+
actor=actor,
|
62
60
|
)
|
63
61
|
case _:
|
64
62
|
return None
|
letta/llm_api/llm_client_base.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
from abc import abstractmethod
|
2
|
-
from typing import Dict, List, Optional, Union
|
2
|
+
from typing import TYPE_CHECKING, Dict, List, Optional, Union
|
3
3
|
|
4
4
|
from anthropic.types.beta.messages import BetaMessageBatch
|
5
5
|
from openai import AsyncStream, Stream
|
@@ -11,6 +11,9 @@ from letta.schemas.message import Message
|
|
11
11
|
from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
|
12
12
|
from letta.tracing import log_event
|
13
13
|
|
14
|
+
if TYPE_CHECKING:
|
15
|
+
from letta.orm import User
|
16
|
+
|
14
17
|
|
15
18
|
class LLMClientBase:
|
16
19
|
"""
|
@@ -20,13 +23,11 @@ class LLMClientBase:
|
|
20
23
|
|
21
24
|
def __init__(
|
22
25
|
self,
|
23
|
-
provider_name: Optional[str] = None,
|
24
26
|
put_inner_thoughts_first: Optional[bool] = True,
|
25
27
|
use_tool_naming: bool = True,
|
26
|
-
|
28
|
+
actor: Optional["User"] = None,
|
27
29
|
):
|
28
|
-
self.
|
29
|
-
self.provider_name = provider_name
|
30
|
+
self.actor = actor
|
30
31
|
self.put_inner_thoughts_first = put_inner_thoughts_first
|
31
32
|
self.use_tool_naming = use_tool_naming
|
32
33
|
|
letta/llm_api/openai.py
CHANGED
@@ -5,6 +5,7 @@ import requests
|
|
5
5
|
from openai import OpenAI
|
6
6
|
|
7
7
|
from letta.constants import LETTA_MODEL_ENDPOINT
|
8
|
+
from letta.errors import ErrorCode, LLMAuthenticationError, LLMError
|
8
9
|
from letta.helpers.datetime_helpers import timestamp_to_datetime
|
9
10
|
from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_structured_output, make_post_request
|
10
11
|
from letta.llm_api.openai_client import accepts_developer_role, supports_parallel_tool_calling, supports_temperature_param
|
@@ -34,6 +35,21 @@ from letta.utils import get_tool_call_id, smart_urljoin
|
|
34
35
|
logger = get_logger(__name__)
|
35
36
|
|
36
37
|
|
38
|
+
def openai_check_valid_api_key(base_url: str, api_key: Union[str, None]) -> None:
|
39
|
+
if api_key:
|
40
|
+
try:
|
41
|
+
# just get model list to check if the api key is valid until we find a cheaper / quicker endpoint
|
42
|
+
openai_get_model_list(url=base_url, api_key=api_key)
|
43
|
+
except requests.HTTPError as e:
|
44
|
+
if e.response.status_code == 401:
|
45
|
+
raise LLMAuthenticationError(message=f"Failed to authenticate with OpenAI: {e}", code=ErrorCode.UNAUTHENTICATED)
|
46
|
+
raise e
|
47
|
+
except Exception as e:
|
48
|
+
raise LLMError(message=f"{e}", code=ErrorCode.INTERNAL_SERVER_ERROR)
|
49
|
+
else:
|
50
|
+
raise ValueError("No API key provided")
|
51
|
+
|
52
|
+
|
37
53
|
def openai_get_model_list(
|
38
54
|
url: str, api_key: Optional[str] = None, fix_url: Optional[bool] = False, extra_params: Optional[dict] = None
|
39
55
|
) -> dict:
|
letta/llm_api/openai_client.py
CHANGED
@@ -22,7 +22,7 @@ from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_st
|
|
22
22
|
from letta.llm_api.llm_client_base import LLMClientBase
|
23
23
|
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
|
24
24
|
from letta.log import get_logger
|
25
|
-
from letta.schemas.enums import
|
25
|
+
from letta.schemas.enums import ProviderCategory
|
26
26
|
from letta.schemas.llm_config import LLMConfig
|
27
27
|
from letta.schemas.message import Message as PydanticMessage
|
28
28
|
from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
|
@@ -78,10 +78,10 @@ def supports_parallel_tool_calling(model: str) -> bool:
|
|
78
78
|
class OpenAIClient(LLMClientBase):
|
79
79
|
def _prepare_client_kwargs(self, llm_config: LLMConfig) -> dict:
|
80
80
|
api_key = None
|
81
|
-
if llm_config.
|
81
|
+
if llm_config.provider_category == ProviderCategory.byok:
|
82
82
|
from letta.services.provider_manager import ProviderManager
|
83
83
|
|
84
|
-
api_key = ProviderManager().get_override_key(llm_config.provider_name)
|
84
|
+
api_key = ProviderManager().get_override_key(llm_config.provider_name, actor=self.actor)
|
85
85
|
|
86
86
|
if not api_key:
|
87
87
|
api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
|
@@ -156,11 +156,11 @@ class OpenAIClient(LLMClientBase):
|
|
156
156
|
)
|
157
157
|
|
158
158
|
# always set user id for openai requests
|
159
|
-
if self.
|
160
|
-
data.user = self.
|
159
|
+
if self.actor:
|
160
|
+
data.user = self.actor.id
|
161
161
|
|
162
162
|
if llm_config.model_endpoint == LETTA_MODEL_ENDPOINT:
|
163
|
-
if not self.
|
163
|
+
if not self.actor:
|
164
164
|
# override user id for inference.letta.com
|
165
165
|
import uuid
|
166
166
|
|
letta/local_llm/constants.py
CHANGED
@@ -26,6 +26,7 @@ DEFAULT_WRAPPER = ChatMLInnerMonologueWrapper
|
|
26
26
|
DEFAULT_WRAPPER_NAME = "chatml"
|
27
27
|
|
28
28
|
INNER_THOUGHTS_KWARG = "inner_thoughts"
|
29
|
+
INNER_THOUGHTS_KWARG_VERTEX = "thinking"
|
29
30
|
INNER_THOUGHTS_KWARG_DESCRIPTION = "Deep inner monologue private to you only."
|
30
31
|
INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST = f"Deep inner monologue private to you only. Think before you act, so always generate arg '{INNER_THOUGHTS_KWARG}' first before any other arg."
|
31
32
|
INNER_THOUGHTS_CLI_SYMBOL = "💭"
|
letta/memory.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Callable, Dict, List
|
1
|
+
from typing import TYPE_CHECKING, Callable, Dict, List
|
2
2
|
|
3
3
|
from letta.constants import MESSAGE_SUMMARY_REQUEST_ACK
|
4
4
|
from letta.llm_api.llm_api_tools import create
|
@@ -13,6 +13,9 @@ from letta.settings import summarizer_settings
|
|
13
13
|
from letta.tracing import trace_method
|
14
14
|
from letta.utils import count_tokens, printd
|
15
15
|
|
16
|
+
if TYPE_CHECKING:
|
17
|
+
from letta.orm import User
|
18
|
+
|
16
19
|
|
17
20
|
def get_memory_functions(cls: Memory) -> Dict[str, Callable]:
|
18
21
|
"""Get memory functions for a memory class"""
|
@@ -51,6 +54,7 @@ def _format_summary_history(message_history: List[Message]):
|
|
51
54
|
def summarize_messages(
|
52
55
|
agent_state: AgentState,
|
53
56
|
message_sequence_to_summarize: List[Message],
|
57
|
+
actor: "User",
|
54
58
|
):
|
55
59
|
"""Summarize a message sequence using GPT"""
|
56
60
|
# we need the context_window
|
@@ -63,7 +67,7 @@ def summarize_messages(
|
|
63
67
|
trunc_ratio = (summarizer_settings.memory_warning_threshold * context_window / summary_input_tkns) * 0.8 # For good measure...
|
64
68
|
cutoff = int(len(message_sequence_to_summarize) * trunc_ratio)
|
65
69
|
summary_input = str(
|
66
|
-
[summarize_messages(agent_state, message_sequence_to_summarize=message_sequence_to_summarize[:cutoff])]
|
70
|
+
[summarize_messages(agent_state, message_sequence_to_summarize=message_sequence_to_summarize[:cutoff], actor=actor)]
|
67
71
|
+ message_sequence_to_summarize[cutoff:]
|
68
72
|
)
|
69
73
|
|
@@ -79,10 +83,9 @@ def summarize_messages(
|
|
79
83
|
llm_config_no_inner_thoughts.put_inner_thoughts_in_kwargs = False
|
80
84
|
|
81
85
|
llm_client = LLMClient.create(
|
82
|
-
|
83
|
-
provider_type=llm_config_no_inner_thoughts.model_endpoint_type,
|
86
|
+
provider_type=agent_state.llm_config.model_endpoint_type,
|
84
87
|
put_inner_thoughts_first=False,
|
85
|
-
|
88
|
+
actor=actor,
|
86
89
|
)
|
87
90
|
# try to use new client, otherwise fallback to old flow
|
88
91
|
# TODO: we can just directly call the LLM here?
|
letta/orm/provider.py
CHANGED
@@ -26,6 +26,7 @@ class Provider(SqlalchemyBase, OrganizationMixin):
|
|
26
26
|
|
27
27
|
name: Mapped[str] = mapped_column(nullable=False, doc="The name of the provider")
|
28
28
|
provider_type: Mapped[str] = mapped_column(nullable=True, doc="The type of the provider")
|
29
|
+
provider_category: Mapped[str] = mapped_column(nullable=True, doc="The category of the provider (base or byok)")
|
29
30
|
api_key: Mapped[str] = mapped_column(nullable=True, doc="API key used for requests to the provider.")
|
30
31
|
base_url: Mapped[str] = mapped_column(nullable=True, doc="Base URL for the provider.")
|
31
32
|
|
letta/schemas/enums.py
CHANGED
@@ -3,6 +3,7 @@ from enum import Enum
|
|
3
3
|
|
4
4
|
class ProviderType(str, Enum):
|
5
5
|
anthropic = "anthropic"
|
6
|
+
anthropic_bedrock = "bedrock"
|
6
7
|
google_ai = "google_ai"
|
7
8
|
google_vertex = "google_vertex"
|
8
9
|
openai = "openai"
|
@@ -19,6 +20,11 @@ class ProviderType(str, Enum):
|
|
19
20
|
bedrock = "bedrock"
|
20
21
|
|
21
22
|
|
23
|
+
class ProviderCategory(str, Enum):
|
24
|
+
base = "base"
|
25
|
+
byok = "byok"
|
26
|
+
|
27
|
+
|
22
28
|
class MessageRole(str, Enum):
|
23
29
|
assistant = "assistant"
|
24
30
|
user = "user"
|
letta/schemas/llm_config.py
CHANGED
@@ -4,6 +4,7 @@ from pydantic import BaseModel, ConfigDict, Field, model_validator
|
|
4
4
|
|
5
5
|
from letta.constants import LETTA_MODEL_ENDPOINT
|
6
6
|
from letta.log import get_logger
|
7
|
+
from letta.schemas.enums import ProviderCategory
|
7
8
|
|
8
9
|
logger = get_logger(__name__)
|
9
10
|
|
@@ -51,6 +52,7 @@ class LLMConfig(BaseModel):
|
|
51
52
|
] = Field(..., description="The endpoint type for the model.")
|
52
53
|
model_endpoint: Optional[str] = Field(None, description="The endpoint for the model.")
|
53
54
|
provider_name: Optional[str] = Field(None, description="The provider name for the model.")
|
55
|
+
provider_category: Optional[ProviderCategory] = Field(None, description="The provider category for the model.")
|
54
56
|
model_wrapper: Optional[str] = Field(None, description="The wrapper for the model.")
|
55
57
|
context_window: int = Field(..., description="The context window size for the model.")
|
56
58
|
put_inner_thoughts_in_kwargs: Optional[bool] = Field(
|
letta/schemas/message.py
CHANGED
@@ -16,7 +16,7 @@ from pydantic import BaseModel, Field, field_validator
|
|
16
16
|
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, TOOL_CALL_ID_MAX_LEN
|
17
17
|
from letta.helpers.datetime_helpers import get_utc_time, is_utc_datetime
|
18
18
|
from letta.helpers.json_helpers import json_dumps
|
19
|
-
from letta.local_llm.constants import INNER_THOUGHTS_KWARG
|
19
|
+
from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_VERTEX
|
20
20
|
from letta.schemas.enums import MessageRole
|
21
21
|
from letta.schemas.letta_base import OrmMetadataBase
|
22
22
|
from letta.schemas.letta_message import (
|
@@ -914,9 +914,9 @@ class Message(BaseMessage):
|
|
914
914
|
function_args = {"args": function_args}
|
915
915
|
|
916
916
|
if put_inner_thoughts_in_kwargs and text_content is not None:
|
917
|
-
assert
|
917
|
+
assert INNER_THOUGHTS_KWARG not in function_args, function_args
|
918
918
|
assert len(self.tool_calls) == 1
|
919
|
-
function_args[
|
919
|
+
function_args[INNER_THOUGHTS_KWARG_VERTEX] = text_content
|
920
920
|
|
921
921
|
parts.append(
|
922
922
|
{
|