letta-nightly 0.7.7.dev20250430205840__py3-none-any.whl → 0.7.8.dev20250501064110__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +8 -12
- letta/agents/exceptions.py +6 -0
- letta/agents/letta_agent.py +48 -35
- letta/agents/letta_agent_batch.py +6 -2
- letta/agents/voice_agent.py +10 -7
- letta/constants.py +5 -1
- letta/functions/composio_helpers.py +100 -0
- letta/functions/functions.py +4 -2
- letta/functions/helpers.py +19 -99
- letta/groups/helpers.py +1 -0
- letta/groups/sleeptime_multi_agent.py +5 -1
- letta/helpers/message_helper.py +21 -4
- letta/helpers/tool_execution_helper.py +1 -1
- letta/interfaces/anthropic_streaming_interface.py +165 -158
- letta/interfaces/openai_chat_completions_streaming_interface.py +1 -1
- letta/llm_api/anthropic.py +15 -10
- letta/llm_api/anthropic_client.py +5 -1
- letta/llm_api/google_vertex_client.py +1 -1
- letta/llm_api/llm_api_tools.py +7 -0
- letta/llm_api/llm_client.py +12 -2
- letta/llm_api/llm_client_base.py +4 -0
- letta/llm_api/openai.py +9 -3
- letta/llm_api/openai_client.py +18 -4
- letta/memory.py +3 -1
- letta/orm/group.py +2 -0
- letta/orm/provider.py +10 -0
- letta/schemas/agent.py +0 -1
- letta/schemas/enums.py +11 -0
- letta/schemas/group.py +24 -0
- letta/schemas/llm_config.py +1 -0
- letta/schemas/llm_config_overrides.py +2 -2
- letta/schemas/providers.py +75 -20
- letta/schemas/tool.py +3 -8
- letta/server/rest_api/app.py +12 -0
- letta/server/rest_api/chat_completions_interface.py +1 -1
- letta/server/rest_api/interface.py +8 -10
- letta/server/rest_api/{optimistic_json_parser.py → json_parser.py} +62 -26
- letta/server/rest_api/routers/v1/agents.py +1 -1
- letta/server/rest_api/routers/v1/llms.py +4 -3
- letta/server/rest_api/routers/v1/providers.py +4 -1
- letta/server/rest_api/routers/v1/voice.py +0 -2
- letta/server/rest_api/utils.py +8 -19
- letta/server/server.py +25 -11
- letta/services/group_manager.py +58 -0
- letta/services/provider_manager.py +25 -14
- letta/services/summarizer/summarizer.py +15 -7
- letta/services/tool_executor/tool_execution_manager.py +1 -1
- letta/services/tool_executor/tool_executor.py +3 -3
- {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/METADATA +4 -5
- {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/RECORD +54 -52
- {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/LICENSE +0 -0
- {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/WHEEL +0 -0
- {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/entry_points.txt +0 -0
@@ -28,7 +28,7 @@ from letta.schemas.letta_message import (
|
|
28
28
|
from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
|
29
29
|
from letta.schemas.message import Message
|
30
30
|
from letta.schemas.openai.chat_completion_response import ChatCompletionChunkResponse
|
31
|
-
from letta.server.rest_api.
|
31
|
+
from letta.server.rest_api.json_parser import OptimisticJSONParser
|
32
32
|
from letta.streaming_interface import AgentChunkStreamingInterface
|
33
33
|
from letta.streaming_utils import FunctionArgumentsStreamHandler, JSONInnerThoughtsExtractor
|
34
34
|
from letta.utils import parse_json
|
@@ -291,7 +291,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
291
291
|
self.streaming_chat_completion_json_reader = FunctionArgumentsStreamHandler(json_key=assistant_message_tool_kwarg)
|
292
292
|
|
293
293
|
# @matt's changes here, adopting new optimistic json parser
|
294
|
-
self.current_function_arguments =
|
294
|
+
self.current_function_arguments = ""
|
295
295
|
self.optimistic_json_parser = OptimisticJSONParser()
|
296
296
|
self.current_json_parse_result = {}
|
297
297
|
|
@@ -387,7 +387,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
387
387
|
def stream_start(self):
|
388
388
|
"""Initialize streaming by activating the generator and clearing any old chunks."""
|
389
389
|
self.streaming_chat_completion_mode_function_name = None
|
390
|
-
self.current_function_arguments =
|
390
|
+
self.current_function_arguments = ""
|
391
391
|
self.current_json_parse_result = {}
|
392
392
|
|
393
393
|
if not self._active:
|
@@ -398,7 +398,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
398
398
|
def stream_end(self):
|
399
399
|
"""Clean up the stream by deactivating and clearing chunks."""
|
400
400
|
self.streaming_chat_completion_mode_function_name = None
|
401
|
-
self.current_function_arguments =
|
401
|
+
self.current_function_arguments = ""
|
402
402
|
self.current_json_parse_result = {}
|
403
403
|
|
404
404
|
# if not self.streaming_chat_completion_mode and not self.nonstreaming_legacy_mode:
|
@@ -609,14 +609,13 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
609
609
|
# early exit to turn into content mode
|
610
610
|
return None
|
611
611
|
if tool_call.function.arguments:
|
612
|
-
self.current_function_arguments
|
612
|
+
self.current_function_arguments += tool_call.function.arguments
|
613
613
|
|
614
614
|
# if we're in the middle of parsing a send_message, we'll keep processing the JSON chunks
|
615
615
|
if tool_call.function.arguments and self.streaming_chat_completion_mode_function_name == self.assistant_message_tool_name:
|
616
616
|
# Strip out any extras tokens
|
617
617
|
# In the case that we just have the prefix of something, no message yet, then we should early exit to move to the next chunk
|
618
|
-
|
619
|
-
parsed_args = self.optimistic_json_parser.parse(combined_args)
|
618
|
+
parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
|
620
619
|
|
621
620
|
if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
|
622
621
|
self.assistant_message_tool_kwarg
|
@@ -686,7 +685,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
686
685
|
# updates_inner_thoughts = ""
|
687
686
|
# else: # OpenAI
|
688
687
|
# updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
|
689
|
-
self.current_function_arguments
|
688
|
+
self.current_function_arguments += tool_call.function.arguments
|
690
689
|
updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
|
691
690
|
|
692
691
|
# If we have inner thoughts, we should output them as a chunk
|
@@ -805,8 +804,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
|
|
805
804
|
# TODO: THIS IS HORRIBLE
|
806
805
|
# TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
|
807
806
|
# TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
|
808
|
-
|
809
|
-
parsed_args = self.optimistic_json_parser.parse(combined_args)
|
807
|
+
parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
|
810
808
|
|
811
809
|
if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
|
812
810
|
self.assistant_message_tool_kwarg
|
@@ -1,7 +1,43 @@
|
|
1
1
|
import json
|
2
|
+
from abc import ABC, abstractmethod
|
3
|
+
from typing import Any
|
2
4
|
|
5
|
+
from pydantic_core import from_json
|
3
6
|
|
4
|
-
|
7
|
+
from letta.log import get_logger
|
8
|
+
|
9
|
+
logger = get_logger(__name__)
|
10
|
+
|
11
|
+
|
12
|
+
class JSONParser(ABC):
|
13
|
+
@abstractmethod
|
14
|
+
def parse(self, input_str: str) -> Any:
|
15
|
+
raise NotImplementedError()
|
16
|
+
|
17
|
+
|
18
|
+
class PydanticJSONParser(JSONParser):
|
19
|
+
"""
|
20
|
+
https://docs.pydantic.dev/latest/concepts/json/#json-parsing
|
21
|
+
If `strict` is True, we will not allow for partial parsing of JSON.
|
22
|
+
|
23
|
+
Compared with `OptimisticJSONParser`, this parser is more strict.
|
24
|
+
Note: This will not partially parse strings which may be decrease parsing speed for message strings
|
25
|
+
"""
|
26
|
+
|
27
|
+
def __init__(self, strict=False):
|
28
|
+
self.strict = strict
|
29
|
+
|
30
|
+
def parse(self, input_str: str) -> Any:
|
31
|
+
if not input_str:
|
32
|
+
return {}
|
33
|
+
try:
|
34
|
+
return from_json(input_str, allow_partial="trailing-strings" if not self.strict else False)
|
35
|
+
except ValueError as e:
|
36
|
+
logger.error(f"Failed to parse JSON: {e}")
|
37
|
+
raise
|
38
|
+
|
39
|
+
|
40
|
+
class OptimisticJSONParser(JSONParser):
|
5
41
|
"""
|
6
42
|
A JSON parser that attempts to parse a given string using `json.loads`,
|
7
43
|
and if that fails, it parses as much valid JSON as possible while
|
@@ -13,25 +49,25 @@ class OptimisticJSONParser:
|
|
13
49
|
def __init__(self, strict=False):
|
14
50
|
self.strict = strict
|
15
51
|
self.parsers = {
|
16
|
-
" ": self.
|
17
|
-
"\r": self.
|
18
|
-
"\n": self.
|
19
|
-
"\t": self.
|
20
|
-
"[": self.
|
21
|
-
"{": self.
|
22
|
-
'"': self.
|
23
|
-
"t": self.
|
24
|
-
"f": self.
|
25
|
-
"n": self.
|
52
|
+
" ": self._parse_space,
|
53
|
+
"\r": self._parse_space,
|
54
|
+
"\n": self._parse_space,
|
55
|
+
"\t": self._parse_space,
|
56
|
+
"[": self._parse_array,
|
57
|
+
"{": self._parse_object,
|
58
|
+
'"': self._parse_string,
|
59
|
+
"t": self._parse_true,
|
60
|
+
"f": self._parse_false,
|
61
|
+
"n": self._parse_null,
|
26
62
|
}
|
27
63
|
# Register number parser for digits and signs
|
28
64
|
for char in "0123456789.-":
|
29
65
|
self.parsers[char] = self.parse_number
|
30
66
|
|
31
67
|
self.last_parse_reminding = None
|
32
|
-
self.on_extra_token = self.
|
68
|
+
self.on_extra_token = self._default_on_extra_token
|
33
69
|
|
34
|
-
def
|
70
|
+
def _default_on_extra_token(self, text, data, reminding):
|
35
71
|
print(f"Parsed JSON with extra tokens: {data}, remaining: {reminding}")
|
36
72
|
|
37
73
|
def parse(self, input_str):
|
@@ -45,7 +81,7 @@ class OptimisticJSONParser:
|
|
45
81
|
try:
|
46
82
|
return json.loads(input_str)
|
47
83
|
except json.JSONDecodeError as decode_error:
|
48
|
-
data, reminding = self.
|
84
|
+
data, reminding = self._parse_any(input_str, decode_error)
|
49
85
|
self.last_parse_reminding = reminding
|
50
86
|
if self.on_extra_token and reminding:
|
51
87
|
self.on_extra_token(input_str, data, reminding)
|
@@ -53,7 +89,7 @@ class OptimisticJSONParser:
|
|
53
89
|
else:
|
54
90
|
return json.loads("{}")
|
55
91
|
|
56
|
-
def
|
92
|
+
def _parse_any(self, input_str, decode_error):
|
57
93
|
"""Determine which parser to use based on the first character."""
|
58
94
|
if not input_str:
|
59
95
|
raise decode_error
|
@@ -62,11 +98,11 @@ class OptimisticJSONParser:
|
|
62
98
|
raise decode_error
|
63
99
|
return parser(input_str, decode_error)
|
64
100
|
|
65
|
-
def
|
101
|
+
def _parse_space(self, input_str, decode_error):
|
66
102
|
"""Strip leading whitespace and parse again."""
|
67
|
-
return self.
|
103
|
+
return self._parse_any(input_str.strip(), decode_error)
|
68
104
|
|
69
|
-
def
|
105
|
+
def _parse_array(self, input_str, decode_error):
|
70
106
|
"""Parse a JSON array, returning the list and remaining string."""
|
71
107
|
# Skip the '['
|
72
108
|
input_str = input_str[1:]
|
@@ -77,7 +113,7 @@ class OptimisticJSONParser:
|
|
77
113
|
# Skip the ']'
|
78
114
|
input_str = input_str[1:]
|
79
115
|
break
|
80
|
-
value, input_str = self.
|
116
|
+
value, input_str = self._parse_any(input_str, decode_error)
|
81
117
|
array_values.append(value)
|
82
118
|
input_str = input_str.strip()
|
83
119
|
if input_str.startswith(","):
|
@@ -85,7 +121,7 @@ class OptimisticJSONParser:
|
|
85
121
|
input_str = input_str[1:].strip()
|
86
122
|
return array_values, input_str
|
87
123
|
|
88
|
-
def
|
124
|
+
def _parse_object(self, input_str, decode_error):
|
89
125
|
"""Parse a JSON object, returning the dict and remaining string."""
|
90
126
|
# Skip the '{'
|
91
127
|
input_str = input_str[1:]
|
@@ -96,7 +132,7 @@ class OptimisticJSONParser:
|
|
96
132
|
# Skip the '}'
|
97
133
|
input_str = input_str[1:]
|
98
134
|
break
|
99
|
-
key, input_str = self.
|
135
|
+
key, input_str = self._parse_any(input_str, decode_error)
|
100
136
|
input_str = input_str.strip()
|
101
137
|
|
102
138
|
if not input_str or input_str[0] == "}":
|
@@ -113,7 +149,7 @@ class OptimisticJSONParser:
|
|
113
149
|
input_str = input_str[1:]
|
114
150
|
break
|
115
151
|
|
116
|
-
value, input_str = self.
|
152
|
+
value, input_str = self._parse_any(input_str, decode_error)
|
117
153
|
obj[key] = value
|
118
154
|
input_str = input_str.strip()
|
119
155
|
if input_str.startswith(","):
|
@@ -121,7 +157,7 @@ class OptimisticJSONParser:
|
|
121
157
|
input_str = input_str[1:].strip()
|
122
158
|
return obj, input_str
|
123
159
|
|
124
|
-
def
|
160
|
+
def _parse_string(self, input_str, decode_error):
|
125
161
|
"""Parse a JSON string, respecting escaped quotes if present."""
|
126
162
|
end = input_str.find('"', 1)
|
127
163
|
while end != -1 and input_str[end - 1] == "\\":
|
@@ -166,19 +202,19 @@ class OptimisticJSONParser:
|
|
166
202
|
|
167
203
|
return num, remainder
|
168
204
|
|
169
|
-
def
|
205
|
+
def _parse_true(self, input_str, decode_error):
|
170
206
|
"""Parse a 'true' value."""
|
171
207
|
if input_str.startswith(("t", "T")):
|
172
208
|
return True, input_str[4:]
|
173
209
|
raise decode_error
|
174
210
|
|
175
|
-
def
|
211
|
+
def _parse_false(self, input_str, decode_error):
|
176
212
|
"""Parse a 'false' value."""
|
177
213
|
if input_str.startswith(("f", "F")):
|
178
214
|
return False, input_str[5:]
|
179
215
|
raise decode_error
|
180
216
|
|
181
|
-
def
|
217
|
+
def _parse_null(self, input_str, decode_error):
|
182
218
|
"""Parse a 'null' value."""
|
183
219
|
if input_str.startswith("n"):
|
184
220
|
return None, input_str[4:]
|
@@ -678,7 +678,7 @@ async def send_message_streaming(
|
|
678
678
|
server: SyncServer = Depends(get_letta_server),
|
679
679
|
request: LettaStreamingRequest = Body(...),
|
680
680
|
actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
|
681
|
-
):
|
681
|
+
) -> StreamingResponse | LettaResponse:
|
682
682
|
"""
|
683
683
|
Process a user message and return the agent's response.
|
684
684
|
This endpoint accepts a message from a user and processes it through the agent.
|
@@ -1,6 +1,6 @@
|
|
1
|
-
from typing import TYPE_CHECKING, List
|
1
|
+
from typing import TYPE_CHECKING, List, Optional
|
2
2
|
|
3
|
-
from fastapi import APIRouter, Depends
|
3
|
+
from fastapi import APIRouter, Depends, Query
|
4
4
|
|
5
5
|
from letta.schemas.embedding_config import EmbeddingConfig
|
6
6
|
from letta.schemas.llm_config import LLMConfig
|
@@ -14,10 +14,11 @@ router = APIRouter(prefix="/models", tags=["models", "llms"])
|
|
14
14
|
|
15
15
|
@router.get("/", response_model=List[LLMConfig], operation_id="list_models")
|
16
16
|
def list_llm_models(
|
17
|
+
byok_only: Optional[bool] = Query(None),
|
17
18
|
server: "SyncServer" = Depends(get_letta_server),
|
18
19
|
):
|
19
20
|
|
20
|
-
models = server.list_llm_models()
|
21
|
+
models = server.list_llm_models(byok_only=byok_only)
|
21
22
|
# print(models)
|
22
23
|
return models
|
23
24
|
|
@@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, List, Optional
|
|
2
2
|
|
3
3
|
from fastapi import APIRouter, Body, Depends, Header, HTTPException, Query
|
4
4
|
|
5
|
+
from letta.schemas.enums import ProviderType
|
5
6
|
from letta.schemas.providers import Provider, ProviderCreate, ProviderUpdate
|
6
7
|
from letta.server.rest_api.utils import get_letta_server
|
7
8
|
|
@@ -13,6 +14,8 @@ router = APIRouter(prefix="/providers", tags=["providers"])
|
|
13
14
|
|
14
15
|
@router.get("/", response_model=List[Provider], operation_id="list_providers")
|
15
16
|
def list_providers(
|
17
|
+
name: Optional[str] = Query(None),
|
18
|
+
provider_type: Optional[ProviderType] = Query(None),
|
16
19
|
after: Optional[str] = Query(None),
|
17
20
|
limit: Optional[int] = Query(50),
|
18
21
|
actor_id: Optional[str] = Header(None, alias="user_id"),
|
@@ -23,7 +26,7 @@ def list_providers(
|
|
23
26
|
"""
|
24
27
|
try:
|
25
28
|
actor = server.user_manager.get_user_or_default(user_id=actor_id)
|
26
|
-
providers = server.provider_manager.list_providers(after=after, limit=limit, actor=actor)
|
29
|
+
providers = server.provider_manager.list_providers(after=after, limit=limit, actor=actor, name=name, provider_type=provider_type)
|
27
30
|
except HTTPException:
|
28
31
|
raise
|
29
32
|
except Exception as e:
|
letta/server/rest_api/utils.py
CHANGED
@@ -16,6 +16,7 @@ from pydantic import BaseModel
|
|
16
16
|
from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE
|
17
17
|
from letta.errors import ContextWindowExceededError, RateLimitExceededError
|
18
18
|
from letta.helpers.datetime_helpers import get_utc_time
|
19
|
+
from letta.helpers.message_helper import convert_message_creates_to_messages
|
19
20
|
from letta.log import get_logger
|
20
21
|
from letta.schemas.enums import MessageRole
|
21
22
|
from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
|
@@ -143,27 +144,15 @@ def log_error_to_sentry(e):
|
|
143
144
|
def create_input_messages(input_messages: List[MessageCreate], agent_id: str, actor: User) -> List[Message]:
|
144
145
|
"""
|
145
146
|
Converts a user input message into the internal structured format.
|
147
|
+
|
148
|
+
TODO (cliandy): this effectively duplicates the functionality of `convert_message_creates_to_messages`,
|
149
|
+
we should unify this when it's clear what message attributes we need.
|
146
150
|
"""
|
147
|
-
new_messages = []
|
148
|
-
for input_message in input_messages:
|
149
|
-
# Construct the Message object
|
150
|
-
new_message = Message(
|
151
|
-
id=f"message-{uuid.uuid4()}",
|
152
|
-
role=input_message.role,
|
153
|
-
content=input_message.content,
|
154
|
-
name=input_message.name,
|
155
|
-
otid=input_message.otid,
|
156
|
-
sender_id=input_message.sender_id,
|
157
|
-
organization_id=actor.organization_id,
|
158
|
-
agent_id=agent_id,
|
159
|
-
model=None,
|
160
|
-
tool_calls=None,
|
161
|
-
tool_call_id=None,
|
162
|
-
created_at=get_utc_time(),
|
163
|
-
)
|
164
|
-
new_messages.append(new_message)
|
165
151
|
|
166
|
-
|
152
|
+
messages = convert_message_creates_to_messages(input_messages, agent_id, wrap_user_message=False, wrap_system_message=False)
|
153
|
+
for message in messages:
|
154
|
+
message.organization_id = actor.organization_id
|
155
|
+
return messages
|
167
156
|
|
168
157
|
|
169
158
|
def create_letta_messages_from_llm_response(
|
letta/server/server.py
CHANGED
@@ -268,10 +268,11 @@ class SyncServer(Server):
|
|
268
268
|
)
|
269
269
|
|
270
270
|
# collect providers (always has Letta as a default)
|
271
|
-
self._enabled_providers: List[Provider] = [LettaProvider()]
|
271
|
+
self._enabled_providers: List[Provider] = [LettaProvider(name="letta")]
|
272
272
|
if model_settings.openai_api_key:
|
273
273
|
self._enabled_providers.append(
|
274
274
|
OpenAIProvider(
|
275
|
+
name="openai",
|
275
276
|
api_key=model_settings.openai_api_key,
|
276
277
|
base_url=model_settings.openai_api_base,
|
277
278
|
)
|
@@ -279,12 +280,14 @@ class SyncServer(Server):
|
|
279
280
|
if model_settings.anthropic_api_key:
|
280
281
|
self._enabled_providers.append(
|
281
282
|
AnthropicProvider(
|
283
|
+
name="anthropic",
|
282
284
|
api_key=model_settings.anthropic_api_key,
|
283
285
|
)
|
284
286
|
)
|
285
287
|
if model_settings.ollama_base_url:
|
286
288
|
self._enabled_providers.append(
|
287
289
|
OllamaProvider(
|
290
|
+
name="ollama",
|
288
291
|
base_url=model_settings.ollama_base_url,
|
289
292
|
api_key=None,
|
290
293
|
default_prompt_formatter=model_settings.default_prompt_formatter,
|
@@ -293,12 +296,14 @@ class SyncServer(Server):
|
|
293
296
|
if model_settings.gemini_api_key:
|
294
297
|
self._enabled_providers.append(
|
295
298
|
GoogleAIProvider(
|
299
|
+
name="google_ai",
|
296
300
|
api_key=model_settings.gemini_api_key,
|
297
301
|
)
|
298
302
|
)
|
299
303
|
if model_settings.google_cloud_location and model_settings.google_cloud_project:
|
300
304
|
self._enabled_providers.append(
|
301
305
|
GoogleVertexProvider(
|
306
|
+
name="google_vertex",
|
302
307
|
google_cloud_project=model_settings.google_cloud_project,
|
303
308
|
google_cloud_location=model_settings.google_cloud_location,
|
304
309
|
)
|
@@ -307,6 +312,7 @@ class SyncServer(Server):
|
|
307
312
|
assert model_settings.azure_api_version, "AZURE_API_VERSION is required"
|
308
313
|
self._enabled_providers.append(
|
309
314
|
AzureProvider(
|
315
|
+
name="azure",
|
310
316
|
api_key=model_settings.azure_api_key,
|
311
317
|
base_url=model_settings.azure_base_url,
|
312
318
|
api_version=model_settings.azure_api_version,
|
@@ -315,12 +321,14 @@ class SyncServer(Server):
|
|
315
321
|
if model_settings.groq_api_key:
|
316
322
|
self._enabled_providers.append(
|
317
323
|
GroqProvider(
|
324
|
+
name="groq",
|
318
325
|
api_key=model_settings.groq_api_key,
|
319
326
|
)
|
320
327
|
)
|
321
328
|
if model_settings.together_api_key:
|
322
329
|
self._enabled_providers.append(
|
323
330
|
TogetherProvider(
|
331
|
+
name="together",
|
324
332
|
api_key=model_settings.together_api_key,
|
325
333
|
default_prompt_formatter=model_settings.default_prompt_formatter,
|
326
334
|
)
|
@@ -329,6 +337,7 @@ class SyncServer(Server):
|
|
329
337
|
# vLLM exposes both a /chat/completions and a /completions endpoint
|
330
338
|
self._enabled_providers.append(
|
331
339
|
VLLMCompletionsProvider(
|
340
|
+
name="vllm",
|
332
341
|
base_url=model_settings.vllm_api_base,
|
333
342
|
default_prompt_formatter=model_settings.default_prompt_formatter,
|
334
343
|
)
|
@@ -338,12 +347,14 @@ class SyncServer(Server):
|
|
338
347
|
# e.g. "... --enable-auto-tool-choice --tool-call-parser hermes"
|
339
348
|
self._enabled_providers.append(
|
340
349
|
VLLMChatCompletionsProvider(
|
350
|
+
name="vllm",
|
341
351
|
base_url=model_settings.vllm_api_base,
|
342
352
|
)
|
343
353
|
)
|
344
354
|
if model_settings.aws_access_key and model_settings.aws_secret_access_key and model_settings.aws_region:
|
345
355
|
self._enabled_providers.append(
|
346
356
|
AnthropicBedrockProvider(
|
357
|
+
name="bedrock",
|
347
358
|
aws_region=model_settings.aws_region,
|
348
359
|
)
|
349
360
|
)
|
@@ -355,11 +366,11 @@ class SyncServer(Server):
|
|
355
366
|
if model_settings.lmstudio_base_url.endswith("/v1")
|
356
367
|
else model_settings.lmstudio_base_url + "/v1"
|
357
368
|
)
|
358
|
-
self._enabled_providers.append(LMStudioOpenAIProvider(base_url=lmstudio_url))
|
369
|
+
self._enabled_providers.append(LMStudioOpenAIProvider(name="lmstudio_openai", base_url=lmstudio_url))
|
359
370
|
if model_settings.deepseek_api_key:
|
360
|
-
self._enabled_providers.append(DeepSeekProvider(api_key=model_settings.deepseek_api_key))
|
371
|
+
self._enabled_providers.append(DeepSeekProvider(name="deepseek", api_key=model_settings.deepseek_api_key))
|
361
372
|
if model_settings.xai_api_key:
|
362
|
-
self._enabled_providers.append(XAIProvider(api_key=model_settings.xai_api_key))
|
373
|
+
self._enabled_providers.append(XAIProvider(name="xai", api_key=model_settings.xai_api_key))
|
363
374
|
|
364
375
|
# For MCP
|
365
376
|
"""Initialize the MCP clients (there may be multiple)"""
|
@@ -862,6 +873,8 @@ class SyncServer(Server):
|
|
862
873
|
agent_ids=[voice_sleeptime_agent.id],
|
863
874
|
manager_config=VoiceSleeptimeManager(
|
864
875
|
manager_agent_id=main_agent.id,
|
876
|
+
max_message_buffer_length=constants.DEFAULT_MAX_MESSAGE_BUFFER_LENGTH,
|
877
|
+
min_message_buffer_length=constants.DEFAULT_MIN_MESSAGE_BUFFER_LENGTH,
|
865
878
|
),
|
866
879
|
),
|
867
880
|
actor=actor,
|
@@ -1182,10 +1195,10 @@ class SyncServer(Server):
|
|
1182
1195
|
except NoResultFound:
|
1183
1196
|
raise HTTPException(status_code=404, detail=f"Organization with id {org_id} not found")
|
1184
1197
|
|
1185
|
-
def list_llm_models(self) -> List[LLMConfig]:
|
1198
|
+
def list_llm_models(self, byok_only: bool = False) -> List[LLMConfig]:
|
1186
1199
|
"""List available models"""
|
1187
1200
|
llm_models = []
|
1188
|
-
for provider in self.get_enabled_providers():
|
1201
|
+
for provider in self.get_enabled_providers(byok_only=byok_only):
|
1189
1202
|
try:
|
1190
1203
|
llm_models.extend(provider.list_llm_models())
|
1191
1204
|
except Exception as e:
|
@@ -1205,11 +1218,12 @@ class SyncServer(Server):
|
|
1205
1218
|
warnings.warn(f"An error occurred while listing embedding models for provider {provider}: {e}")
|
1206
1219
|
return embedding_models
|
1207
1220
|
|
1208
|
-
def get_enabled_providers(self):
|
1221
|
+
def get_enabled_providers(self, byok_only: bool = False):
|
1222
|
+
providers_from_db = {p.name: p.cast_to_subtype() for p in self.provider_manager.list_providers()}
|
1223
|
+
if byok_only:
|
1224
|
+
return list(providers_from_db.values())
|
1209
1225
|
providers_from_env = {p.name: p for p in self._enabled_providers}
|
1210
|
-
|
1211
|
-
# Merge the two dictionaries, keeping the values from providers_from_db where conflicts occur
|
1212
|
-
return {**providers_from_env, **providers_from_db}.values()
|
1226
|
+
return list(providers_from_env.values()) + list(providers_from_db.values())
|
1213
1227
|
|
1214
1228
|
@trace_method
|
1215
1229
|
def get_llm_config_from_handle(
|
@@ -1294,7 +1308,7 @@ class SyncServer(Server):
|
|
1294
1308
|
return embedding_config
|
1295
1309
|
|
1296
1310
|
def get_provider_from_name(self, provider_name: str) -> Provider:
|
1297
|
-
providers = [provider for provider in self.
|
1311
|
+
providers = [provider for provider in self.get_enabled_providers() if provider.name == provider_name]
|
1298
1312
|
if not providers:
|
1299
1313
|
raise ValueError(f"Provider {provider_name} is not supported")
|
1300
1314
|
elif len(providers) > 1:
|
letta/services/group_manager.py
CHANGED
@@ -80,6 +80,12 @@ class GroupManager:
|
|
80
80
|
case ManagerType.voice_sleeptime:
|
81
81
|
new_group.manager_type = ManagerType.voice_sleeptime
|
82
82
|
new_group.manager_agent_id = group.manager_config.manager_agent_id
|
83
|
+
max_message_buffer_length = group.manager_config.max_message_buffer_length
|
84
|
+
min_message_buffer_length = group.manager_config.min_message_buffer_length
|
85
|
+
# Safety check for buffer length range
|
86
|
+
self.ensure_buffer_length_range_valid(max_value=max_message_buffer_length, min_value=min_message_buffer_length)
|
87
|
+
new_group.max_message_buffer_length = max_message_buffer_length
|
88
|
+
new_group.min_message_buffer_length = min_message_buffer_length
|
83
89
|
case _:
|
84
90
|
raise ValueError(f"Unsupported manager type: {group.manager_config.manager_type}")
|
85
91
|
|
@@ -97,6 +103,8 @@ class GroupManager:
|
|
97
103
|
group = GroupModel.read(db_session=session, identifier=group_id, actor=actor)
|
98
104
|
|
99
105
|
sleeptime_agent_frequency = None
|
106
|
+
max_message_buffer_length = None
|
107
|
+
min_message_buffer_length = None
|
100
108
|
max_turns = None
|
101
109
|
termination_token = None
|
102
110
|
manager_agent_id = None
|
@@ -117,11 +125,24 @@ class GroupManager:
|
|
117
125
|
sleeptime_agent_frequency = group_update.manager_config.sleeptime_agent_frequency
|
118
126
|
if sleeptime_agent_frequency and group.turns_counter is None:
|
119
127
|
group.turns_counter = -1
|
128
|
+
case ManagerType.voice_sleeptime:
|
129
|
+
manager_agent_id = group_update.manager_config.manager_agent_id
|
130
|
+
max_message_buffer_length = group_update.manager_config.max_message_buffer_length or group.max_message_buffer_length
|
131
|
+
min_message_buffer_length = group_update.manager_config.min_message_buffer_length or group.min_message_buffer_length
|
132
|
+
if sleeptime_agent_frequency and group.turns_counter is None:
|
133
|
+
group.turns_counter = -1
|
120
134
|
case _:
|
121
135
|
raise ValueError(f"Unsupported manager type: {group_update.manager_config.manager_type}")
|
122
136
|
|
137
|
+
# Safety check for buffer length range
|
138
|
+
self.ensure_buffer_length_range_valid(max_value=max_message_buffer_length, min_value=min_message_buffer_length)
|
139
|
+
|
123
140
|
if sleeptime_agent_frequency:
|
124
141
|
group.sleeptime_agent_frequency = sleeptime_agent_frequency
|
142
|
+
if max_message_buffer_length:
|
143
|
+
group.max_message_buffer_length = max_message_buffer_length
|
144
|
+
if min_message_buffer_length:
|
145
|
+
group.min_message_buffer_length = min_message_buffer_length
|
125
146
|
if max_turns:
|
126
147
|
group.max_turns = max_turns
|
127
148
|
if termination_token:
|
@@ -274,3 +295,40 @@ class GroupManager:
|
|
274
295
|
if manager_agent:
|
275
296
|
for block in blocks:
|
276
297
|
session.add(BlocksAgents(agent_id=manager_agent.id, block_id=block.id, block_label=block.label))
|
298
|
+
|
299
|
+
@staticmethod
|
300
|
+
def ensure_buffer_length_range_valid(
|
301
|
+
max_value: Optional[int],
|
302
|
+
min_value: Optional[int],
|
303
|
+
max_name: str = "max_message_buffer_length",
|
304
|
+
min_name: str = "min_message_buffer_length",
|
305
|
+
) -> None:
|
306
|
+
"""
|
307
|
+
1) Both-or-none: if one is set, the other must be set.
|
308
|
+
2) Both must be ints > 4.
|
309
|
+
3) max_value must be strictly greater than min_value.
|
310
|
+
"""
|
311
|
+
# 1) require both-or-none
|
312
|
+
if (max_value is None) != (min_value is None):
|
313
|
+
raise ValueError(
|
314
|
+
f"Both '{max_name}' and '{min_name}' must be provided together " f"(got {max_name}={max_value}, {min_name}={min_value})"
|
315
|
+
)
|
316
|
+
|
317
|
+
# no further checks if neither is provided
|
318
|
+
if max_value is None:
|
319
|
+
return
|
320
|
+
|
321
|
+
# 2) type & lower‐bound checks
|
322
|
+
if not isinstance(max_value, int) or not isinstance(min_value, int):
|
323
|
+
raise ValueError(
|
324
|
+
f"Both '{max_name}' and '{min_name}' must be integers "
|
325
|
+
f"(got {max_name}={type(max_value).__name__}, {min_name}={type(min_value).__name__})"
|
326
|
+
)
|
327
|
+
if max_value <= 4 or min_value <= 4:
|
328
|
+
raise ValueError(
|
329
|
+
f"Both '{max_name}' and '{min_name}' must be greater than 4 " f"(got {max_name}={max_value}, {min_name}={min_value})"
|
330
|
+
)
|
331
|
+
|
332
|
+
# 3) ordering
|
333
|
+
if max_value <= min_value:
|
334
|
+
raise ValueError(f"'{max_name}' must be greater than '{min_name}' " f"(got {max_name}={max_value} <= {min_name}={min_value})")
|