letta-nightly 0.7.7.dev20250430205840__py3-none-any.whl → 0.7.8.dev20250501064110__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +8 -12
  3. letta/agents/exceptions.py +6 -0
  4. letta/agents/letta_agent.py +48 -35
  5. letta/agents/letta_agent_batch.py +6 -2
  6. letta/agents/voice_agent.py +10 -7
  7. letta/constants.py +5 -1
  8. letta/functions/composio_helpers.py +100 -0
  9. letta/functions/functions.py +4 -2
  10. letta/functions/helpers.py +19 -99
  11. letta/groups/helpers.py +1 -0
  12. letta/groups/sleeptime_multi_agent.py +5 -1
  13. letta/helpers/message_helper.py +21 -4
  14. letta/helpers/tool_execution_helper.py +1 -1
  15. letta/interfaces/anthropic_streaming_interface.py +165 -158
  16. letta/interfaces/openai_chat_completions_streaming_interface.py +1 -1
  17. letta/llm_api/anthropic.py +15 -10
  18. letta/llm_api/anthropic_client.py +5 -1
  19. letta/llm_api/google_vertex_client.py +1 -1
  20. letta/llm_api/llm_api_tools.py +7 -0
  21. letta/llm_api/llm_client.py +12 -2
  22. letta/llm_api/llm_client_base.py +4 -0
  23. letta/llm_api/openai.py +9 -3
  24. letta/llm_api/openai_client.py +18 -4
  25. letta/memory.py +3 -1
  26. letta/orm/group.py +2 -0
  27. letta/orm/provider.py +10 -0
  28. letta/schemas/agent.py +0 -1
  29. letta/schemas/enums.py +11 -0
  30. letta/schemas/group.py +24 -0
  31. letta/schemas/llm_config.py +1 -0
  32. letta/schemas/llm_config_overrides.py +2 -2
  33. letta/schemas/providers.py +75 -20
  34. letta/schemas/tool.py +3 -8
  35. letta/server/rest_api/app.py +12 -0
  36. letta/server/rest_api/chat_completions_interface.py +1 -1
  37. letta/server/rest_api/interface.py +8 -10
  38. letta/server/rest_api/{optimistic_json_parser.py → json_parser.py} +62 -26
  39. letta/server/rest_api/routers/v1/agents.py +1 -1
  40. letta/server/rest_api/routers/v1/llms.py +4 -3
  41. letta/server/rest_api/routers/v1/providers.py +4 -1
  42. letta/server/rest_api/routers/v1/voice.py +0 -2
  43. letta/server/rest_api/utils.py +8 -19
  44. letta/server/server.py +25 -11
  45. letta/services/group_manager.py +58 -0
  46. letta/services/provider_manager.py +25 -14
  47. letta/services/summarizer/summarizer.py +15 -7
  48. letta/services/tool_executor/tool_execution_manager.py +1 -1
  49. letta/services/tool_executor/tool_executor.py +3 -3
  50. {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/METADATA +4 -5
  51. {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/RECORD +54 -52
  52. {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/LICENSE +0 -0
  53. {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/WHEEL +0 -0
  54. {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/entry_points.txt +0 -0
@@ -28,7 +28,7 @@ from letta.schemas.letta_message import (
28
28
  from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
29
29
  from letta.schemas.message import Message
30
30
  from letta.schemas.openai.chat_completion_response import ChatCompletionChunkResponse
31
- from letta.server.rest_api.optimistic_json_parser import OptimisticJSONParser
31
+ from letta.server.rest_api.json_parser import OptimisticJSONParser
32
32
  from letta.streaming_interface import AgentChunkStreamingInterface
33
33
  from letta.streaming_utils import FunctionArgumentsStreamHandler, JSONInnerThoughtsExtractor
34
34
  from letta.utils import parse_json
@@ -291,7 +291,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
291
291
  self.streaming_chat_completion_json_reader = FunctionArgumentsStreamHandler(json_key=assistant_message_tool_kwarg)
292
292
 
293
293
  # @matt's changes here, adopting new optimistic json parser
294
- self.current_function_arguments = []
294
+ self.current_function_arguments = ""
295
295
  self.optimistic_json_parser = OptimisticJSONParser()
296
296
  self.current_json_parse_result = {}
297
297
 
@@ -387,7 +387,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
387
387
  def stream_start(self):
388
388
  """Initialize streaming by activating the generator and clearing any old chunks."""
389
389
  self.streaming_chat_completion_mode_function_name = None
390
- self.current_function_arguments = []
390
+ self.current_function_arguments = ""
391
391
  self.current_json_parse_result = {}
392
392
 
393
393
  if not self._active:
@@ -398,7 +398,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
398
398
  def stream_end(self):
399
399
  """Clean up the stream by deactivating and clearing chunks."""
400
400
  self.streaming_chat_completion_mode_function_name = None
401
- self.current_function_arguments = []
401
+ self.current_function_arguments = ""
402
402
  self.current_json_parse_result = {}
403
403
 
404
404
  # if not self.streaming_chat_completion_mode and not self.nonstreaming_legacy_mode:
@@ -609,14 +609,13 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
609
609
  # early exit to turn into content mode
610
610
  return None
611
611
  if tool_call.function.arguments:
612
- self.current_function_arguments.append(tool_call.function.arguments)
612
+ self.current_function_arguments += tool_call.function.arguments
613
613
 
614
614
  # if we're in the middle of parsing a send_message, we'll keep processing the JSON chunks
615
615
  if tool_call.function.arguments and self.streaming_chat_completion_mode_function_name == self.assistant_message_tool_name:
616
616
  # Strip out any extras tokens
617
617
  # In the case that we just have the prefix of something, no message yet, then we should early exit to move to the next chunk
618
- combined_args = "".join(self.current_function_arguments)
619
- parsed_args = self.optimistic_json_parser.parse(combined_args)
618
+ parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
620
619
 
621
620
  if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
622
621
  self.assistant_message_tool_kwarg
@@ -686,7 +685,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
686
685
  # updates_inner_thoughts = ""
687
686
  # else: # OpenAI
688
687
  # updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
689
- self.current_function_arguments.append(tool_call.function.arguments)
688
+ self.current_function_arguments += tool_call.function.arguments
690
689
  updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
691
690
 
692
691
  # If we have inner thoughts, we should output them as a chunk
@@ -805,8 +804,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
805
804
  # TODO: THIS IS HORRIBLE
806
805
  # TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
807
806
  # TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
808
- combined_args = "".join(self.current_function_arguments)
809
- parsed_args = self.optimistic_json_parser.parse(combined_args)
807
+ parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
810
808
 
811
809
  if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
812
810
  self.assistant_message_tool_kwarg
@@ -1,7 +1,43 @@
1
1
  import json
2
+ from abc import ABC, abstractmethod
3
+ from typing import Any
2
4
 
5
+ from pydantic_core import from_json
3
6
 
4
- class OptimisticJSONParser:
7
+ from letta.log import get_logger
8
+
9
+ logger = get_logger(__name__)
10
+
11
+
12
+ class JSONParser(ABC):
13
+ @abstractmethod
14
+ def parse(self, input_str: str) -> Any:
15
+ raise NotImplementedError()
16
+
17
+
18
+ class PydanticJSONParser(JSONParser):
19
+ """
20
+ https://docs.pydantic.dev/latest/concepts/json/#json-parsing
21
+ If `strict` is True, we will not allow for partial parsing of JSON.
22
+
23
+ Compared with `OptimisticJSONParser`, this parser is more strict.
24
+ Note: This will not partially parse strings which may be decrease parsing speed for message strings
25
+ """
26
+
27
+ def __init__(self, strict=False):
28
+ self.strict = strict
29
+
30
+ def parse(self, input_str: str) -> Any:
31
+ if not input_str:
32
+ return {}
33
+ try:
34
+ return from_json(input_str, allow_partial="trailing-strings" if not self.strict else False)
35
+ except ValueError as e:
36
+ logger.error(f"Failed to parse JSON: {e}")
37
+ raise
38
+
39
+
40
+ class OptimisticJSONParser(JSONParser):
5
41
  """
6
42
  A JSON parser that attempts to parse a given string using `json.loads`,
7
43
  and if that fails, it parses as much valid JSON as possible while
@@ -13,25 +49,25 @@ class OptimisticJSONParser:
13
49
  def __init__(self, strict=False):
14
50
  self.strict = strict
15
51
  self.parsers = {
16
- " ": self.parse_space,
17
- "\r": self.parse_space,
18
- "\n": self.parse_space,
19
- "\t": self.parse_space,
20
- "[": self.parse_array,
21
- "{": self.parse_object,
22
- '"': self.parse_string,
23
- "t": self.parse_true,
24
- "f": self.parse_false,
25
- "n": self.parse_null,
52
+ " ": self._parse_space,
53
+ "\r": self._parse_space,
54
+ "\n": self._parse_space,
55
+ "\t": self._parse_space,
56
+ "[": self._parse_array,
57
+ "{": self._parse_object,
58
+ '"': self._parse_string,
59
+ "t": self._parse_true,
60
+ "f": self._parse_false,
61
+ "n": self._parse_null,
26
62
  }
27
63
  # Register number parser for digits and signs
28
64
  for char in "0123456789.-":
29
65
  self.parsers[char] = self.parse_number
30
66
 
31
67
  self.last_parse_reminding = None
32
- self.on_extra_token = self.default_on_extra_token
68
+ self.on_extra_token = self._default_on_extra_token
33
69
 
34
- def default_on_extra_token(self, text, data, reminding):
70
+ def _default_on_extra_token(self, text, data, reminding):
35
71
  print(f"Parsed JSON with extra tokens: {data}, remaining: {reminding}")
36
72
 
37
73
  def parse(self, input_str):
@@ -45,7 +81,7 @@ class OptimisticJSONParser:
45
81
  try:
46
82
  return json.loads(input_str)
47
83
  except json.JSONDecodeError as decode_error:
48
- data, reminding = self.parse_any(input_str, decode_error)
84
+ data, reminding = self._parse_any(input_str, decode_error)
49
85
  self.last_parse_reminding = reminding
50
86
  if self.on_extra_token and reminding:
51
87
  self.on_extra_token(input_str, data, reminding)
@@ -53,7 +89,7 @@ class OptimisticJSONParser:
53
89
  else:
54
90
  return json.loads("{}")
55
91
 
56
- def parse_any(self, input_str, decode_error):
92
+ def _parse_any(self, input_str, decode_error):
57
93
  """Determine which parser to use based on the first character."""
58
94
  if not input_str:
59
95
  raise decode_error
@@ -62,11 +98,11 @@ class OptimisticJSONParser:
62
98
  raise decode_error
63
99
  return parser(input_str, decode_error)
64
100
 
65
- def parse_space(self, input_str, decode_error):
101
+ def _parse_space(self, input_str, decode_error):
66
102
  """Strip leading whitespace and parse again."""
67
- return self.parse_any(input_str.strip(), decode_error)
103
+ return self._parse_any(input_str.strip(), decode_error)
68
104
 
69
- def parse_array(self, input_str, decode_error):
105
+ def _parse_array(self, input_str, decode_error):
70
106
  """Parse a JSON array, returning the list and remaining string."""
71
107
  # Skip the '['
72
108
  input_str = input_str[1:]
@@ -77,7 +113,7 @@ class OptimisticJSONParser:
77
113
  # Skip the ']'
78
114
  input_str = input_str[1:]
79
115
  break
80
- value, input_str = self.parse_any(input_str, decode_error)
116
+ value, input_str = self._parse_any(input_str, decode_error)
81
117
  array_values.append(value)
82
118
  input_str = input_str.strip()
83
119
  if input_str.startswith(","):
@@ -85,7 +121,7 @@ class OptimisticJSONParser:
85
121
  input_str = input_str[1:].strip()
86
122
  return array_values, input_str
87
123
 
88
- def parse_object(self, input_str, decode_error):
124
+ def _parse_object(self, input_str, decode_error):
89
125
  """Parse a JSON object, returning the dict and remaining string."""
90
126
  # Skip the '{'
91
127
  input_str = input_str[1:]
@@ -96,7 +132,7 @@ class OptimisticJSONParser:
96
132
  # Skip the '}'
97
133
  input_str = input_str[1:]
98
134
  break
99
- key, input_str = self.parse_any(input_str, decode_error)
135
+ key, input_str = self._parse_any(input_str, decode_error)
100
136
  input_str = input_str.strip()
101
137
 
102
138
  if not input_str or input_str[0] == "}":
@@ -113,7 +149,7 @@ class OptimisticJSONParser:
113
149
  input_str = input_str[1:]
114
150
  break
115
151
 
116
- value, input_str = self.parse_any(input_str, decode_error)
152
+ value, input_str = self._parse_any(input_str, decode_error)
117
153
  obj[key] = value
118
154
  input_str = input_str.strip()
119
155
  if input_str.startswith(","):
@@ -121,7 +157,7 @@ class OptimisticJSONParser:
121
157
  input_str = input_str[1:].strip()
122
158
  return obj, input_str
123
159
 
124
- def parse_string(self, input_str, decode_error):
160
+ def _parse_string(self, input_str, decode_error):
125
161
  """Parse a JSON string, respecting escaped quotes if present."""
126
162
  end = input_str.find('"', 1)
127
163
  while end != -1 and input_str[end - 1] == "\\":
@@ -166,19 +202,19 @@ class OptimisticJSONParser:
166
202
 
167
203
  return num, remainder
168
204
 
169
- def parse_true(self, input_str, decode_error):
205
+ def _parse_true(self, input_str, decode_error):
170
206
  """Parse a 'true' value."""
171
207
  if input_str.startswith(("t", "T")):
172
208
  return True, input_str[4:]
173
209
  raise decode_error
174
210
 
175
- def parse_false(self, input_str, decode_error):
211
+ def _parse_false(self, input_str, decode_error):
176
212
  """Parse a 'false' value."""
177
213
  if input_str.startswith(("f", "F")):
178
214
  return False, input_str[5:]
179
215
  raise decode_error
180
216
 
181
- def parse_null(self, input_str, decode_error):
217
+ def _parse_null(self, input_str, decode_error):
182
218
  """Parse a 'null' value."""
183
219
  if input_str.startswith("n"):
184
220
  return None, input_str[4:]
@@ -678,7 +678,7 @@ async def send_message_streaming(
678
678
  server: SyncServer = Depends(get_letta_server),
679
679
  request: LettaStreamingRequest = Body(...),
680
680
  actor_id: Optional[str] = Header(None, alias="user_id"), # Extract user_id from header, default to None if not present
681
- ):
681
+ ) -> StreamingResponse | LettaResponse:
682
682
  """
683
683
  Process a user message and return the agent's response.
684
684
  This endpoint accepts a message from a user and processes it through the agent.
@@ -1,6 +1,6 @@
1
- from typing import TYPE_CHECKING, List
1
+ from typing import TYPE_CHECKING, List, Optional
2
2
 
3
- from fastapi import APIRouter, Depends
3
+ from fastapi import APIRouter, Depends, Query
4
4
 
5
5
  from letta.schemas.embedding_config import EmbeddingConfig
6
6
  from letta.schemas.llm_config import LLMConfig
@@ -14,10 +14,11 @@ router = APIRouter(prefix="/models", tags=["models", "llms"])
14
14
 
15
15
  @router.get("/", response_model=List[LLMConfig], operation_id="list_models")
16
16
  def list_llm_models(
17
+ byok_only: Optional[bool] = Query(None),
17
18
  server: "SyncServer" = Depends(get_letta_server),
18
19
  ):
19
20
 
20
- models = server.list_llm_models()
21
+ models = server.list_llm_models(byok_only=byok_only)
21
22
  # print(models)
22
23
  return models
23
24
 
@@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, List, Optional
2
2
 
3
3
  from fastapi import APIRouter, Body, Depends, Header, HTTPException, Query
4
4
 
5
+ from letta.schemas.enums import ProviderType
5
6
  from letta.schemas.providers import Provider, ProviderCreate, ProviderUpdate
6
7
  from letta.server.rest_api.utils import get_letta_server
7
8
 
@@ -13,6 +14,8 @@ router = APIRouter(prefix="/providers", tags=["providers"])
13
14
 
14
15
  @router.get("/", response_model=List[Provider], operation_id="list_providers")
15
16
  def list_providers(
17
+ name: Optional[str] = Query(None),
18
+ provider_type: Optional[ProviderType] = Query(None),
16
19
  after: Optional[str] = Query(None),
17
20
  limit: Optional[int] = Query(50),
18
21
  actor_id: Optional[str] = Header(None, alias="user_id"),
@@ -23,7 +26,7 @@ def list_providers(
23
26
  """
24
27
  try:
25
28
  actor = server.user_manager.get_user_or_default(user_id=actor_id)
26
- providers = server.provider_manager.list_providers(after=after, limit=limit, actor=actor)
29
+ providers = server.provider_manager.list_providers(after=after, limit=limit, actor=actor, name=name, provider_type=provider_type)
27
30
  except HTTPException:
28
31
  raise
29
32
  except Exception as e:
@@ -54,8 +54,6 @@ async def create_voice_chat_completions(
54
54
  block_manager=server.block_manager,
55
55
  passage_manager=server.passage_manager,
56
56
  actor=actor,
57
- message_buffer_limit=8,
58
- message_buffer_min=4,
59
57
  )
60
58
 
61
59
  # Return the streaming generator
@@ -16,6 +16,7 @@ from pydantic import BaseModel
16
16
  from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE
17
17
  from letta.errors import ContextWindowExceededError, RateLimitExceededError
18
18
  from letta.helpers.datetime_helpers import get_utc_time
19
+ from letta.helpers.message_helper import convert_message_creates_to_messages
19
20
  from letta.log import get_logger
20
21
  from letta.schemas.enums import MessageRole
21
22
  from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
@@ -143,27 +144,15 @@ def log_error_to_sentry(e):
143
144
  def create_input_messages(input_messages: List[MessageCreate], agent_id: str, actor: User) -> List[Message]:
144
145
  """
145
146
  Converts a user input message into the internal structured format.
147
+
148
+ TODO (cliandy): this effectively duplicates the functionality of `convert_message_creates_to_messages`,
149
+ we should unify this when it's clear what message attributes we need.
146
150
  """
147
- new_messages = []
148
- for input_message in input_messages:
149
- # Construct the Message object
150
- new_message = Message(
151
- id=f"message-{uuid.uuid4()}",
152
- role=input_message.role,
153
- content=input_message.content,
154
- name=input_message.name,
155
- otid=input_message.otid,
156
- sender_id=input_message.sender_id,
157
- organization_id=actor.organization_id,
158
- agent_id=agent_id,
159
- model=None,
160
- tool_calls=None,
161
- tool_call_id=None,
162
- created_at=get_utc_time(),
163
- )
164
- new_messages.append(new_message)
165
151
 
166
- return new_messages
152
+ messages = convert_message_creates_to_messages(input_messages, agent_id, wrap_user_message=False, wrap_system_message=False)
153
+ for message in messages:
154
+ message.organization_id = actor.organization_id
155
+ return messages
167
156
 
168
157
 
169
158
  def create_letta_messages_from_llm_response(
letta/server/server.py CHANGED
@@ -268,10 +268,11 @@ class SyncServer(Server):
268
268
  )
269
269
 
270
270
  # collect providers (always has Letta as a default)
271
- self._enabled_providers: List[Provider] = [LettaProvider()]
271
+ self._enabled_providers: List[Provider] = [LettaProvider(name="letta")]
272
272
  if model_settings.openai_api_key:
273
273
  self._enabled_providers.append(
274
274
  OpenAIProvider(
275
+ name="openai",
275
276
  api_key=model_settings.openai_api_key,
276
277
  base_url=model_settings.openai_api_base,
277
278
  )
@@ -279,12 +280,14 @@ class SyncServer(Server):
279
280
  if model_settings.anthropic_api_key:
280
281
  self._enabled_providers.append(
281
282
  AnthropicProvider(
283
+ name="anthropic",
282
284
  api_key=model_settings.anthropic_api_key,
283
285
  )
284
286
  )
285
287
  if model_settings.ollama_base_url:
286
288
  self._enabled_providers.append(
287
289
  OllamaProvider(
290
+ name="ollama",
288
291
  base_url=model_settings.ollama_base_url,
289
292
  api_key=None,
290
293
  default_prompt_formatter=model_settings.default_prompt_formatter,
@@ -293,12 +296,14 @@ class SyncServer(Server):
293
296
  if model_settings.gemini_api_key:
294
297
  self._enabled_providers.append(
295
298
  GoogleAIProvider(
299
+ name="google_ai",
296
300
  api_key=model_settings.gemini_api_key,
297
301
  )
298
302
  )
299
303
  if model_settings.google_cloud_location and model_settings.google_cloud_project:
300
304
  self._enabled_providers.append(
301
305
  GoogleVertexProvider(
306
+ name="google_vertex",
302
307
  google_cloud_project=model_settings.google_cloud_project,
303
308
  google_cloud_location=model_settings.google_cloud_location,
304
309
  )
@@ -307,6 +312,7 @@ class SyncServer(Server):
307
312
  assert model_settings.azure_api_version, "AZURE_API_VERSION is required"
308
313
  self._enabled_providers.append(
309
314
  AzureProvider(
315
+ name="azure",
310
316
  api_key=model_settings.azure_api_key,
311
317
  base_url=model_settings.azure_base_url,
312
318
  api_version=model_settings.azure_api_version,
@@ -315,12 +321,14 @@ class SyncServer(Server):
315
321
  if model_settings.groq_api_key:
316
322
  self._enabled_providers.append(
317
323
  GroqProvider(
324
+ name="groq",
318
325
  api_key=model_settings.groq_api_key,
319
326
  )
320
327
  )
321
328
  if model_settings.together_api_key:
322
329
  self._enabled_providers.append(
323
330
  TogetherProvider(
331
+ name="together",
324
332
  api_key=model_settings.together_api_key,
325
333
  default_prompt_formatter=model_settings.default_prompt_formatter,
326
334
  )
@@ -329,6 +337,7 @@ class SyncServer(Server):
329
337
  # vLLM exposes both a /chat/completions and a /completions endpoint
330
338
  self._enabled_providers.append(
331
339
  VLLMCompletionsProvider(
340
+ name="vllm",
332
341
  base_url=model_settings.vllm_api_base,
333
342
  default_prompt_formatter=model_settings.default_prompt_formatter,
334
343
  )
@@ -338,12 +347,14 @@ class SyncServer(Server):
338
347
  # e.g. "... --enable-auto-tool-choice --tool-call-parser hermes"
339
348
  self._enabled_providers.append(
340
349
  VLLMChatCompletionsProvider(
350
+ name="vllm",
341
351
  base_url=model_settings.vllm_api_base,
342
352
  )
343
353
  )
344
354
  if model_settings.aws_access_key and model_settings.aws_secret_access_key and model_settings.aws_region:
345
355
  self._enabled_providers.append(
346
356
  AnthropicBedrockProvider(
357
+ name="bedrock",
347
358
  aws_region=model_settings.aws_region,
348
359
  )
349
360
  )
@@ -355,11 +366,11 @@ class SyncServer(Server):
355
366
  if model_settings.lmstudio_base_url.endswith("/v1")
356
367
  else model_settings.lmstudio_base_url + "/v1"
357
368
  )
358
- self._enabled_providers.append(LMStudioOpenAIProvider(base_url=lmstudio_url))
369
+ self._enabled_providers.append(LMStudioOpenAIProvider(name="lmstudio_openai", base_url=lmstudio_url))
359
370
  if model_settings.deepseek_api_key:
360
- self._enabled_providers.append(DeepSeekProvider(api_key=model_settings.deepseek_api_key))
371
+ self._enabled_providers.append(DeepSeekProvider(name="deepseek", api_key=model_settings.deepseek_api_key))
361
372
  if model_settings.xai_api_key:
362
- self._enabled_providers.append(XAIProvider(api_key=model_settings.xai_api_key))
373
+ self._enabled_providers.append(XAIProvider(name="xai", api_key=model_settings.xai_api_key))
363
374
 
364
375
  # For MCP
365
376
  """Initialize the MCP clients (there may be multiple)"""
@@ -862,6 +873,8 @@ class SyncServer(Server):
862
873
  agent_ids=[voice_sleeptime_agent.id],
863
874
  manager_config=VoiceSleeptimeManager(
864
875
  manager_agent_id=main_agent.id,
876
+ max_message_buffer_length=constants.DEFAULT_MAX_MESSAGE_BUFFER_LENGTH,
877
+ min_message_buffer_length=constants.DEFAULT_MIN_MESSAGE_BUFFER_LENGTH,
865
878
  ),
866
879
  ),
867
880
  actor=actor,
@@ -1182,10 +1195,10 @@ class SyncServer(Server):
1182
1195
  except NoResultFound:
1183
1196
  raise HTTPException(status_code=404, detail=f"Organization with id {org_id} not found")
1184
1197
 
1185
- def list_llm_models(self) -> List[LLMConfig]:
1198
+ def list_llm_models(self, byok_only: bool = False) -> List[LLMConfig]:
1186
1199
  """List available models"""
1187
1200
  llm_models = []
1188
- for provider in self.get_enabled_providers():
1201
+ for provider in self.get_enabled_providers(byok_only=byok_only):
1189
1202
  try:
1190
1203
  llm_models.extend(provider.list_llm_models())
1191
1204
  except Exception as e:
@@ -1205,11 +1218,12 @@ class SyncServer(Server):
1205
1218
  warnings.warn(f"An error occurred while listing embedding models for provider {provider}: {e}")
1206
1219
  return embedding_models
1207
1220
 
1208
- def get_enabled_providers(self):
1221
+ def get_enabled_providers(self, byok_only: bool = False):
1222
+ providers_from_db = {p.name: p.cast_to_subtype() for p in self.provider_manager.list_providers()}
1223
+ if byok_only:
1224
+ return list(providers_from_db.values())
1209
1225
  providers_from_env = {p.name: p for p in self._enabled_providers}
1210
- providers_from_db = {p.name: p for p in self.provider_manager.list_providers()}
1211
- # Merge the two dictionaries, keeping the values from providers_from_db where conflicts occur
1212
- return {**providers_from_env, **providers_from_db}.values()
1226
+ return list(providers_from_env.values()) + list(providers_from_db.values())
1213
1227
 
1214
1228
  @trace_method
1215
1229
  def get_llm_config_from_handle(
@@ -1294,7 +1308,7 @@ class SyncServer(Server):
1294
1308
  return embedding_config
1295
1309
 
1296
1310
  def get_provider_from_name(self, provider_name: str) -> Provider:
1297
- providers = [provider for provider in self._enabled_providers if provider.name == provider_name]
1311
+ providers = [provider for provider in self.get_enabled_providers() if provider.name == provider_name]
1298
1312
  if not providers:
1299
1313
  raise ValueError(f"Provider {provider_name} is not supported")
1300
1314
  elif len(providers) > 1:
@@ -80,6 +80,12 @@ class GroupManager:
80
80
  case ManagerType.voice_sleeptime:
81
81
  new_group.manager_type = ManagerType.voice_sleeptime
82
82
  new_group.manager_agent_id = group.manager_config.manager_agent_id
83
+ max_message_buffer_length = group.manager_config.max_message_buffer_length
84
+ min_message_buffer_length = group.manager_config.min_message_buffer_length
85
+ # Safety check for buffer length range
86
+ self.ensure_buffer_length_range_valid(max_value=max_message_buffer_length, min_value=min_message_buffer_length)
87
+ new_group.max_message_buffer_length = max_message_buffer_length
88
+ new_group.min_message_buffer_length = min_message_buffer_length
83
89
  case _:
84
90
  raise ValueError(f"Unsupported manager type: {group.manager_config.manager_type}")
85
91
 
@@ -97,6 +103,8 @@ class GroupManager:
97
103
  group = GroupModel.read(db_session=session, identifier=group_id, actor=actor)
98
104
 
99
105
  sleeptime_agent_frequency = None
106
+ max_message_buffer_length = None
107
+ min_message_buffer_length = None
100
108
  max_turns = None
101
109
  termination_token = None
102
110
  manager_agent_id = None
@@ -117,11 +125,24 @@ class GroupManager:
117
125
  sleeptime_agent_frequency = group_update.manager_config.sleeptime_agent_frequency
118
126
  if sleeptime_agent_frequency and group.turns_counter is None:
119
127
  group.turns_counter = -1
128
+ case ManagerType.voice_sleeptime:
129
+ manager_agent_id = group_update.manager_config.manager_agent_id
130
+ max_message_buffer_length = group_update.manager_config.max_message_buffer_length or group.max_message_buffer_length
131
+ min_message_buffer_length = group_update.manager_config.min_message_buffer_length or group.min_message_buffer_length
132
+ if sleeptime_agent_frequency and group.turns_counter is None:
133
+ group.turns_counter = -1
120
134
  case _:
121
135
  raise ValueError(f"Unsupported manager type: {group_update.manager_config.manager_type}")
122
136
 
137
+ # Safety check for buffer length range
138
+ self.ensure_buffer_length_range_valid(max_value=max_message_buffer_length, min_value=min_message_buffer_length)
139
+
123
140
  if sleeptime_agent_frequency:
124
141
  group.sleeptime_agent_frequency = sleeptime_agent_frequency
142
+ if max_message_buffer_length:
143
+ group.max_message_buffer_length = max_message_buffer_length
144
+ if min_message_buffer_length:
145
+ group.min_message_buffer_length = min_message_buffer_length
125
146
  if max_turns:
126
147
  group.max_turns = max_turns
127
148
  if termination_token:
@@ -274,3 +295,40 @@ class GroupManager:
274
295
  if manager_agent:
275
296
  for block in blocks:
276
297
  session.add(BlocksAgents(agent_id=manager_agent.id, block_id=block.id, block_label=block.label))
298
+
299
+ @staticmethod
300
+ def ensure_buffer_length_range_valid(
301
+ max_value: Optional[int],
302
+ min_value: Optional[int],
303
+ max_name: str = "max_message_buffer_length",
304
+ min_name: str = "min_message_buffer_length",
305
+ ) -> None:
306
+ """
307
+ 1) Both-or-none: if one is set, the other must be set.
308
+ 2) Both must be ints > 4.
309
+ 3) max_value must be strictly greater than min_value.
310
+ """
311
+ # 1) require both-or-none
312
+ if (max_value is None) != (min_value is None):
313
+ raise ValueError(
314
+ f"Both '{max_name}' and '{min_name}' must be provided together " f"(got {max_name}={max_value}, {min_name}={min_value})"
315
+ )
316
+
317
+ # no further checks if neither is provided
318
+ if max_value is None:
319
+ return
320
+
321
+ # 2) type & lower‐bound checks
322
+ if not isinstance(max_value, int) or not isinstance(min_value, int):
323
+ raise ValueError(
324
+ f"Both '{max_name}' and '{min_name}' must be integers "
325
+ f"(got {max_name}={type(max_value).__name__}, {min_name}={type(min_value).__name__})"
326
+ )
327
+ if max_value <= 4 or min_value <= 4:
328
+ raise ValueError(
329
+ f"Both '{max_name}' and '{min_name}' must be greater than 4 " f"(got {max_name}={max_value}, {min_name}={min_value})"
330
+ )
331
+
332
+ # 3) ordering
333
+ if max_value <= min_value:
334
+ raise ValueError(f"'{max_name}' must be greater than '{min_name}' " f"(got {max_name}={max_value} <= {min_name}={min_value})")