letta-nightly 0.7.7.dev20250430205840__py3-none-any.whl → 0.7.8.dev20250501064110__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- letta/__init__.py +1 -1
- letta/agent.py +8 -12
- letta/agents/exceptions.py +6 -0
- letta/agents/letta_agent.py +48 -35
- letta/agents/letta_agent_batch.py +6 -2
- letta/agents/voice_agent.py +10 -7
- letta/constants.py +5 -1
- letta/functions/composio_helpers.py +100 -0
- letta/functions/functions.py +4 -2
- letta/functions/helpers.py +19 -99
- letta/groups/helpers.py +1 -0
- letta/groups/sleeptime_multi_agent.py +5 -1
- letta/helpers/message_helper.py +21 -4
- letta/helpers/tool_execution_helper.py +1 -1
- letta/interfaces/anthropic_streaming_interface.py +165 -158
- letta/interfaces/openai_chat_completions_streaming_interface.py +1 -1
- letta/llm_api/anthropic.py +15 -10
- letta/llm_api/anthropic_client.py +5 -1
- letta/llm_api/google_vertex_client.py +1 -1
- letta/llm_api/llm_api_tools.py +7 -0
- letta/llm_api/llm_client.py +12 -2
- letta/llm_api/llm_client_base.py +4 -0
- letta/llm_api/openai.py +9 -3
- letta/llm_api/openai_client.py +18 -4
- letta/memory.py +3 -1
- letta/orm/group.py +2 -0
- letta/orm/provider.py +10 -0
- letta/schemas/agent.py +0 -1
- letta/schemas/enums.py +11 -0
- letta/schemas/group.py +24 -0
- letta/schemas/llm_config.py +1 -0
- letta/schemas/llm_config_overrides.py +2 -2
- letta/schemas/providers.py +75 -20
- letta/schemas/tool.py +3 -8
- letta/server/rest_api/app.py +12 -0
- letta/server/rest_api/chat_completions_interface.py +1 -1
- letta/server/rest_api/interface.py +8 -10
- letta/server/rest_api/{optimistic_json_parser.py → json_parser.py} +62 -26
- letta/server/rest_api/routers/v1/agents.py +1 -1
- letta/server/rest_api/routers/v1/llms.py +4 -3
- letta/server/rest_api/routers/v1/providers.py +4 -1
- letta/server/rest_api/routers/v1/voice.py +0 -2
- letta/server/rest_api/utils.py +8 -19
- letta/server/server.py +25 -11
- letta/services/group_manager.py +58 -0
- letta/services/provider_manager.py +25 -14
- letta/services/summarizer/summarizer.py +15 -7
- letta/services/tool_executor/tool_execution_manager.py +1 -1
- letta/services/tool_executor/tool_executor.py +3 -3
- {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/METADATA +4 -5
- {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/RECORD +54 -52
- {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/LICENSE +0 -0
- {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/WHEEL +0 -0
- {letta_nightly-0.7.7.dev20250430205840.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/entry_points.txt +0 -0
| @@ -28,7 +28,7 @@ from letta.schemas.letta_message import ( | |
| 28 28 | 
             
            from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
         | 
| 29 29 | 
             
            from letta.schemas.message import Message
         | 
| 30 30 | 
             
            from letta.schemas.openai.chat_completion_response import ChatCompletionChunkResponse
         | 
| 31 | 
            -
            from letta.server.rest_api. | 
| 31 | 
            +
            from letta.server.rest_api.json_parser import OptimisticJSONParser
         | 
| 32 32 | 
             
            from letta.streaming_interface import AgentChunkStreamingInterface
         | 
| 33 33 | 
             
            from letta.streaming_utils import FunctionArgumentsStreamHandler, JSONInnerThoughtsExtractor
         | 
| 34 34 | 
             
            from letta.utils import parse_json
         | 
| @@ -291,7 +291,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface): | |
| 291 291 | 
             
                    self.streaming_chat_completion_json_reader = FunctionArgumentsStreamHandler(json_key=assistant_message_tool_kwarg)
         | 
| 292 292 |  | 
| 293 293 | 
             
                    # @matt's changes here, adopting new optimistic json parser
         | 
| 294 | 
            -
                    self.current_function_arguments =  | 
| 294 | 
            +
                    self.current_function_arguments = ""
         | 
| 295 295 | 
             
                    self.optimistic_json_parser = OptimisticJSONParser()
         | 
| 296 296 | 
             
                    self.current_json_parse_result = {}
         | 
| 297 297 |  | 
| @@ -387,7 +387,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface): | |
| 387 387 | 
             
                def stream_start(self):
         | 
| 388 388 | 
             
                    """Initialize streaming by activating the generator and clearing any old chunks."""
         | 
| 389 389 | 
             
                    self.streaming_chat_completion_mode_function_name = None
         | 
| 390 | 
            -
                    self.current_function_arguments =  | 
| 390 | 
            +
                    self.current_function_arguments = ""
         | 
| 391 391 | 
             
                    self.current_json_parse_result = {}
         | 
| 392 392 |  | 
| 393 393 | 
             
                    if not self._active:
         | 
| @@ -398,7 +398,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface): | |
| 398 398 | 
             
                def stream_end(self):
         | 
| 399 399 | 
             
                    """Clean up the stream by deactivating and clearing chunks."""
         | 
| 400 400 | 
             
                    self.streaming_chat_completion_mode_function_name = None
         | 
| 401 | 
            -
                    self.current_function_arguments =  | 
| 401 | 
            +
                    self.current_function_arguments = ""
         | 
| 402 402 | 
             
                    self.current_json_parse_result = {}
         | 
| 403 403 |  | 
| 404 404 | 
             
                    # if not self.streaming_chat_completion_mode and not self.nonstreaming_legacy_mode:
         | 
| @@ -609,14 +609,13 @@ class StreamingServerInterface(AgentChunkStreamingInterface): | |
| 609 609 | 
             
                                # early exit to turn into content mode
         | 
| 610 610 | 
             
                                return None
         | 
| 611 611 | 
             
                            if tool_call.function.arguments:
         | 
| 612 | 
            -
                                self.current_function_arguments | 
| 612 | 
            +
                                self.current_function_arguments += tool_call.function.arguments
         | 
| 613 613 |  | 
| 614 614 | 
             
                            # if we're in the middle of parsing a send_message, we'll keep processing the JSON chunks
         | 
| 615 615 | 
             
                            if tool_call.function.arguments and self.streaming_chat_completion_mode_function_name == self.assistant_message_tool_name:
         | 
| 616 616 | 
             
                                # Strip out any extras tokens
         | 
| 617 617 | 
             
                                # In the case that we just have the prefix of something, no message yet, then we should early exit to move to the next chunk
         | 
| 618 | 
            -
                                 | 
| 619 | 
            -
                                parsed_args = self.optimistic_json_parser.parse(combined_args)
         | 
| 618 | 
            +
                                parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
         | 
| 620 619 |  | 
| 621 620 | 
             
                                if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
         | 
| 622 621 | 
             
                                    self.assistant_message_tool_kwarg
         | 
| @@ -686,7 +685,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface): | |
| 686 685 | 
             
                                # updates_inner_thoughts = ""
         | 
| 687 686 | 
             
                                # else:  # OpenAI
         | 
| 688 687 | 
             
                                # updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
         | 
| 689 | 
            -
                                self.current_function_arguments | 
| 688 | 
            +
                                self.current_function_arguments += tool_call.function.arguments
         | 
| 690 689 | 
             
                                updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
         | 
| 691 690 |  | 
| 692 691 | 
             
                                # If we have inner thoughts, we should output them as a chunk
         | 
| @@ -805,8 +804,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface): | |
| 805 804 | 
             
                                                # TODO: THIS IS HORRIBLE
         | 
| 806 805 | 
             
                                                # TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
         | 
| 807 806 | 
             
                                                # TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
         | 
| 808 | 
            -
                                                 | 
| 809 | 
            -
                                                parsed_args = self.optimistic_json_parser.parse(combined_args)
         | 
| 807 | 
            +
                                                parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
         | 
| 810 808 |  | 
| 811 809 | 
             
                                                if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
         | 
| 812 810 | 
             
                                                    self.assistant_message_tool_kwarg
         | 
| @@ -1,7 +1,43 @@ | |
| 1 1 | 
             
            import json
         | 
| 2 | 
            +
            from abc import ABC, abstractmethod
         | 
| 3 | 
            +
            from typing import Any
         | 
| 2 4 |  | 
| 5 | 
            +
            from pydantic_core import from_json
         | 
| 3 6 |  | 
| 4 | 
            -
             | 
| 7 | 
            +
            from letta.log import get_logger
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            logger = get_logger(__name__)
         | 
| 10 | 
            +
             | 
| 11 | 
            +
             | 
| 12 | 
            +
            class JSONParser(ABC):
         | 
| 13 | 
            +
                @abstractmethod
         | 
| 14 | 
            +
                def parse(self, input_str: str) -> Any:
         | 
| 15 | 
            +
                    raise NotImplementedError()
         | 
| 16 | 
            +
             | 
| 17 | 
            +
             | 
| 18 | 
            +
            class PydanticJSONParser(JSONParser):
         | 
| 19 | 
            +
                """
         | 
| 20 | 
            +
                https://docs.pydantic.dev/latest/concepts/json/#json-parsing
         | 
| 21 | 
            +
                If `strict` is True, we will not allow for partial parsing of JSON.
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                Compared with `OptimisticJSONParser`, this parser is more strict.
         | 
| 24 | 
            +
                Note: This will not partially parse strings which may be decrease parsing speed for message strings
         | 
| 25 | 
            +
                """
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                def __init__(self, strict=False):
         | 
| 28 | 
            +
                    self.strict = strict
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                def parse(self, input_str: str) -> Any:
         | 
| 31 | 
            +
                    if not input_str:
         | 
| 32 | 
            +
                        return {}
         | 
| 33 | 
            +
                    try:
         | 
| 34 | 
            +
                        return from_json(input_str, allow_partial="trailing-strings" if not self.strict else False)
         | 
| 35 | 
            +
                    except ValueError as e:
         | 
| 36 | 
            +
                        logger.error(f"Failed to parse JSON: {e}")
         | 
| 37 | 
            +
                        raise
         | 
| 38 | 
            +
             | 
| 39 | 
            +
             | 
| 40 | 
            +
            class OptimisticJSONParser(JSONParser):
         | 
| 5 41 | 
             
                """
         | 
| 6 42 | 
             
                A JSON parser that attempts to parse a given string using `json.loads`,
         | 
| 7 43 | 
             
                and if that fails, it parses as much valid JSON as possible while
         | 
| @@ -13,25 +49,25 @@ class OptimisticJSONParser: | |
| 13 49 | 
             
                def __init__(self, strict=False):
         | 
| 14 50 | 
             
                    self.strict = strict
         | 
| 15 51 | 
             
                    self.parsers = {
         | 
| 16 | 
            -
                        " ": self. | 
| 17 | 
            -
                        "\r": self. | 
| 18 | 
            -
                        "\n": self. | 
| 19 | 
            -
                        "\t": self. | 
| 20 | 
            -
                        "[": self. | 
| 21 | 
            -
                        "{": self. | 
| 22 | 
            -
                        '"': self. | 
| 23 | 
            -
                        "t": self. | 
| 24 | 
            -
                        "f": self. | 
| 25 | 
            -
                        "n": self. | 
| 52 | 
            +
                        " ": self._parse_space,
         | 
| 53 | 
            +
                        "\r": self._parse_space,
         | 
| 54 | 
            +
                        "\n": self._parse_space,
         | 
| 55 | 
            +
                        "\t": self._parse_space,
         | 
| 56 | 
            +
                        "[": self._parse_array,
         | 
| 57 | 
            +
                        "{": self._parse_object,
         | 
| 58 | 
            +
                        '"': self._parse_string,
         | 
| 59 | 
            +
                        "t": self._parse_true,
         | 
| 60 | 
            +
                        "f": self._parse_false,
         | 
| 61 | 
            +
                        "n": self._parse_null,
         | 
| 26 62 | 
             
                    }
         | 
| 27 63 | 
             
                    # Register number parser for digits and signs
         | 
| 28 64 | 
             
                    for char in "0123456789.-":
         | 
| 29 65 | 
             
                        self.parsers[char] = self.parse_number
         | 
| 30 66 |  | 
| 31 67 | 
             
                    self.last_parse_reminding = None
         | 
| 32 | 
            -
                    self.on_extra_token = self. | 
| 68 | 
            +
                    self.on_extra_token = self._default_on_extra_token
         | 
| 33 69 |  | 
| 34 | 
            -
                def  | 
| 70 | 
            +
                def _default_on_extra_token(self, text, data, reminding):
         | 
| 35 71 | 
             
                    print(f"Parsed JSON with extra tokens: {data}, remaining: {reminding}")
         | 
| 36 72 |  | 
| 37 73 | 
             
                def parse(self, input_str):
         | 
| @@ -45,7 +81,7 @@ class OptimisticJSONParser: | |
| 45 81 | 
             
                        try:
         | 
| 46 82 | 
             
                            return json.loads(input_str)
         | 
| 47 83 | 
             
                        except json.JSONDecodeError as decode_error:
         | 
| 48 | 
            -
                            data, reminding = self. | 
| 84 | 
            +
                            data, reminding = self._parse_any(input_str, decode_error)
         | 
| 49 85 | 
             
                            self.last_parse_reminding = reminding
         | 
| 50 86 | 
             
                            if self.on_extra_token and reminding:
         | 
| 51 87 | 
             
                                self.on_extra_token(input_str, data, reminding)
         | 
| @@ -53,7 +89,7 @@ class OptimisticJSONParser: | |
| 53 89 | 
             
                    else:
         | 
| 54 90 | 
             
                        return json.loads("{}")
         | 
| 55 91 |  | 
| 56 | 
            -
                def  | 
| 92 | 
            +
                def _parse_any(self, input_str, decode_error):
         | 
| 57 93 | 
             
                    """Determine which parser to use based on the first character."""
         | 
| 58 94 | 
             
                    if not input_str:
         | 
| 59 95 | 
             
                        raise decode_error
         | 
| @@ -62,11 +98,11 @@ class OptimisticJSONParser: | |
| 62 98 | 
             
                        raise decode_error
         | 
| 63 99 | 
             
                    return parser(input_str, decode_error)
         | 
| 64 100 |  | 
| 65 | 
            -
                def  | 
| 101 | 
            +
                def _parse_space(self, input_str, decode_error):
         | 
| 66 102 | 
             
                    """Strip leading whitespace and parse again."""
         | 
| 67 | 
            -
                    return self. | 
| 103 | 
            +
                    return self._parse_any(input_str.strip(), decode_error)
         | 
| 68 104 |  | 
| 69 | 
            -
                def  | 
| 105 | 
            +
                def _parse_array(self, input_str, decode_error):
         | 
| 70 106 | 
             
                    """Parse a JSON array, returning the list and remaining string."""
         | 
| 71 107 | 
             
                    # Skip the '['
         | 
| 72 108 | 
             
                    input_str = input_str[1:]
         | 
| @@ -77,7 +113,7 @@ class OptimisticJSONParser: | |
| 77 113 | 
             
                            # Skip the ']'
         | 
| 78 114 | 
             
                            input_str = input_str[1:]
         | 
| 79 115 | 
             
                            break
         | 
| 80 | 
            -
                        value, input_str = self. | 
| 116 | 
            +
                        value, input_str = self._parse_any(input_str, decode_error)
         | 
| 81 117 | 
             
                        array_values.append(value)
         | 
| 82 118 | 
             
                        input_str = input_str.strip()
         | 
| 83 119 | 
             
                        if input_str.startswith(","):
         | 
| @@ -85,7 +121,7 @@ class OptimisticJSONParser: | |
| 85 121 | 
             
                            input_str = input_str[1:].strip()
         | 
| 86 122 | 
             
                    return array_values, input_str
         | 
| 87 123 |  | 
| 88 | 
            -
                def  | 
| 124 | 
            +
                def _parse_object(self, input_str, decode_error):
         | 
| 89 125 | 
             
                    """Parse a JSON object, returning the dict and remaining string."""
         | 
| 90 126 | 
             
                    # Skip the '{'
         | 
| 91 127 | 
             
                    input_str = input_str[1:]
         | 
| @@ -96,7 +132,7 @@ class OptimisticJSONParser: | |
| 96 132 | 
             
                            # Skip the '}'
         | 
| 97 133 | 
             
                            input_str = input_str[1:]
         | 
| 98 134 | 
             
                            break
         | 
| 99 | 
            -
                        key, input_str = self. | 
| 135 | 
            +
                        key, input_str = self._parse_any(input_str, decode_error)
         | 
| 100 136 | 
             
                        input_str = input_str.strip()
         | 
| 101 137 |  | 
| 102 138 | 
             
                        if not input_str or input_str[0] == "}":
         | 
| @@ -113,7 +149,7 @@ class OptimisticJSONParser: | |
| 113 149 | 
             
                                input_str = input_str[1:]
         | 
| 114 150 | 
             
                            break
         | 
| 115 151 |  | 
| 116 | 
            -
                        value, input_str = self. | 
| 152 | 
            +
                        value, input_str = self._parse_any(input_str, decode_error)
         | 
| 117 153 | 
             
                        obj[key] = value
         | 
| 118 154 | 
             
                        input_str = input_str.strip()
         | 
| 119 155 | 
             
                        if input_str.startswith(","):
         | 
| @@ -121,7 +157,7 @@ class OptimisticJSONParser: | |
| 121 157 | 
             
                            input_str = input_str[1:].strip()
         | 
| 122 158 | 
             
                    return obj, input_str
         | 
| 123 159 |  | 
| 124 | 
            -
                def  | 
| 160 | 
            +
                def _parse_string(self, input_str, decode_error):
         | 
| 125 161 | 
             
                    """Parse a JSON string, respecting escaped quotes if present."""
         | 
| 126 162 | 
             
                    end = input_str.find('"', 1)
         | 
| 127 163 | 
             
                    while end != -1 and input_str[end - 1] == "\\":
         | 
| @@ -166,19 +202,19 @@ class OptimisticJSONParser: | |
| 166 202 |  | 
| 167 203 | 
             
                    return num, remainder
         | 
| 168 204 |  | 
| 169 | 
            -
                def  | 
| 205 | 
            +
                def _parse_true(self, input_str, decode_error):
         | 
| 170 206 | 
             
                    """Parse a 'true' value."""
         | 
| 171 207 | 
             
                    if input_str.startswith(("t", "T")):
         | 
| 172 208 | 
             
                        return True, input_str[4:]
         | 
| 173 209 | 
             
                    raise decode_error
         | 
| 174 210 |  | 
| 175 | 
            -
                def  | 
| 211 | 
            +
                def _parse_false(self, input_str, decode_error):
         | 
| 176 212 | 
             
                    """Parse a 'false' value."""
         | 
| 177 213 | 
             
                    if input_str.startswith(("f", "F")):
         | 
| 178 214 | 
             
                        return False, input_str[5:]
         | 
| 179 215 | 
             
                    raise decode_error
         | 
| 180 216 |  | 
| 181 | 
            -
                def  | 
| 217 | 
            +
                def _parse_null(self, input_str, decode_error):
         | 
| 182 218 | 
             
                    """Parse a 'null' value."""
         | 
| 183 219 | 
             
                    if input_str.startswith("n"):
         | 
| 184 220 | 
             
                        return None, input_str[4:]
         | 
| @@ -678,7 +678,7 @@ async def send_message_streaming( | |
| 678 678 | 
             
                server: SyncServer = Depends(get_letta_server),
         | 
| 679 679 | 
             
                request: LettaStreamingRequest = Body(...),
         | 
| 680 680 | 
             
                actor_id: Optional[str] = Header(None, alias="user_id"),  # Extract user_id from header, default to None if not present
         | 
| 681 | 
            -
            ):
         | 
| 681 | 
            +
            ) -> StreamingResponse | LettaResponse:
         | 
| 682 682 | 
             
                """
         | 
| 683 683 | 
             
                Process a user message and return the agent's response.
         | 
| 684 684 | 
             
                This endpoint accepts a message from a user and processes it through the agent.
         | 
| @@ -1,6 +1,6 @@ | |
| 1 | 
            -
            from typing import TYPE_CHECKING, List
         | 
| 1 | 
            +
            from typing import TYPE_CHECKING, List, Optional
         | 
| 2 2 |  | 
| 3 | 
            -
            from fastapi import APIRouter, Depends
         | 
| 3 | 
            +
            from fastapi import APIRouter, Depends, Query
         | 
| 4 4 |  | 
| 5 5 | 
             
            from letta.schemas.embedding_config import EmbeddingConfig
         | 
| 6 6 | 
             
            from letta.schemas.llm_config import LLMConfig
         | 
| @@ -14,10 +14,11 @@ router = APIRouter(prefix="/models", tags=["models", "llms"]) | |
| 14 14 |  | 
| 15 15 | 
             
            @router.get("/", response_model=List[LLMConfig], operation_id="list_models")
         | 
| 16 16 | 
             
            def list_llm_models(
         | 
| 17 | 
            +
                byok_only: Optional[bool] = Query(None),
         | 
| 17 18 | 
             
                server: "SyncServer" = Depends(get_letta_server),
         | 
| 18 19 | 
             
            ):
         | 
| 19 20 |  | 
| 20 | 
            -
                models = server.list_llm_models()
         | 
| 21 | 
            +
                models = server.list_llm_models(byok_only=byok_only)
         | 
| 21 22 | 
             
                # print(models)
         | 
| 22 23 | 
             
                return models
         | 
| 23 24 |  | 
| @@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, List, Optional | |
| 2 2 |  | 
| 3 3 | 
             
            from fastapi import APIRouter, Body, Depends, Header, HTTPException, Query
         | 
| 4 4 |  | 
| 5 | 
            +
            from letta.schemas.enums import ProviderType
         | 
| 5 6 | 
             
            from letta.schemas.providers import Provider, ProviderCreate, ProviderUpdate
         | 
| 6 7 | 
             
            from letta.server.rest_api.utils import get_letta_server
         | 
| 7 8 |  | 
| @@ -13,6 +14,8 @@ router = APIRouter(prefix="/providers", tags=["providers"]) | |
| 13 14 |  | 
| 14 15 | 
             
            @router.get("/", response_model=List[Provider], operation_id="list_providers")
         | 
| 15 16 | 
             
            def list_providers(
         | 
| 17 | 
            +
                name: Optional[str] = Query(None),
         | 
| 18 | 
            +
                provider_type: Optional[ProviderType] = Query(None),
         | 
| 16 19 | 
             
                after: Optional[str] = Query(None),
         | 
| 17 20 | 
             
                limit: Optional[int] = Query(50),
         | 
| 18 21 | 
             
                actor_id: Optional[str] = Header(None, alias="user_id"),
         | 
| @@ -23,7 +26,7 @@ def list_providers( | |
| 23 26 | 
             
                """
         | 
| 24 27 | 
             
                try:
         | 
| 25 28 | 
             
                    actor = server.user_manager.get_user_or_default(user_id=actor_id)
         | 
| 26 | 
            -
                    providers = server.provider_manager.list_providers(after=after, limit=limit, actor=actor)
         | 
| 29 | 
            +
                    providers = server.provider_manager.list_providers(after=after, limit=limit, actor=actor, name=name, provider_type=provider_type)
         | 
| 27 30 | 
             
                except HTTPException:
         | 
| 28 31 | 
             
                    raise
         | 
| 29 32 | 
             
                except Exception as e:
         | 
    
        letta/server/rest_api/utils.py
    CHANGED
    
    | @@ -16,6 +16,7 @@ from pydantic import BaseModel | |
| 16 16 | 
             
            from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE
         | 
| 17 17 | 
             
            from letta.errors import ContextWindowExceededError, RateLimitExceededError
         | 
| 18 18 | 
             
            from letta.helpers.datetime_helpers import get_utc_time
         | 
| 19 | 
            +
            from letta.helpers.message_helper import convert_message_creates_to_messages
         | 
| 19 20 | 
             
            from letta.log import get_logger
         | 
| 20 21 | 
             
            from letta.schemas.enums import MessageRole
         | 
| 21 22 | 
             
            from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
         | 
| @@ -143,27 +144,15 @@ def log_error_to_sentry(e): | |
| 143 144 | 
             
            def create_input_messages(input_messages: List[MessageCreate], agent_id: str, actor: User) -> List[Message]:
         | 
| 144 145 | 
             
                """
         | 
| 145 146 | 
             
                Converts a user input message into the internal structured format.
         | 
| 147 | 
            +
             | 
| 148 | 
            +
                TODO (cliandy): this effectively duplicates the functionality of `convert_message_creates_to_messages`,
         | 
| 149 | 
            +
                we should unify this when it's clear what message attributes we need.
         | 
| 146 150 | 
             
                """
         | 
| 147 | 
            -
                new_messages = []
         | 
| 148 | 
            -
                for input_message in input_messages:
         | 
| 149 | 
            -
                    # Construct the Message object
         | 
| 150 | 
            -
                    new_message = Message(
         | 
| 151 | 
            -
                        id=f"message-{uuid.uuid4()}",
         | 
| 152 | 
            -
                        role=input_message.role,
         | 
| 153 | 
            -
                        content=input_message.content,
         | 
| 154 | 
            -
                        name=input_message.name,
         | 
| 155 | 
            -
                        otid=input_message.otid,
         | 
| 156 | 
            -
                        sender_id=input_message.sender_id,
         | 
| 157 | 
            -
                        organization_id=actor.organization_id,
         | 
| 158 | 
            -
                        agent_id=agent_id,
         | 
| 159 | 
            -
                        model=None,
         | 
| 160 | 
            -
                        tool_calls=None,
         | 
| 161 | 
            -
                        tool_call_id=None,
         | 
| 162 | 
            -
                        created_at=get_utc_time(),
         | 
| 163 | 
            -
                    )
         | 
| 164 | 
            -
                    new_messages.append(new_message)
         | 
| 165 151 |  | 
| 166 | 
            -
                 | 
| 152 | 
            +
                messages = convert_message_creates_to_messages(input_messages, agent_id, wrap_user_message=False, wrap_system_message=False)
         | 
| 153 | 
            +
                for message in messages:
         | 
| 154 | 
            +
                    message.organization_id = actor.organization_id
         | 
| 155 | 
            +
                return messages
         | 
| 167 156 |  | 
| 168 157 |  | 
| 169 158 | 
             
            def create_letta_messages_from_llm_response(
         | 
    
        letta/server/server.py
    CHANGED
    
    | @@ -268,10 +268,11 @@ class SyncServer(Server): | |
| 268 268 | 
             
                                )
         | 
| 269 269 |  | 
| 270 270 | 
             
                    # collect providers (always has Letta as a default)
         | 
| 271 | 
            -
                    self._enabled_providers: List[Provider] = [LettaProvider()]
         | 
| 271 | 
            +
                    self._enabled_providers: List[Provider] = [LettaProvider(name="letta")]
         | 
| 272 272 | 
             
                    if model_settings.openai_api_key:
         | 
| 273 273 | 
             
                        self._enabled_providers.append(
         | 
| 274 274 | 
             
                            OpenAIProvider(
         | 
| 275 | 
            +
                                name="openai",
         | 
| 275 276 | 
             
                                api_key=model_settings.openai_api_key,
         | 
| 276 277 | 
             
                                base_url=model_settings.openai_api_base,
         | 
| 277 278 | 
             
                            )
         | 
| @@ -279,12 +280,14 @@ class SyncServer(Server): | |
| 279 280 | 
             
                    if model_settings.anthropic_api_key:
         | 
| 280 281 | 
             
                        self._enabled_providers.append(
         | 
| 281 282 | 
             
                            AnthropicProvider(
         | 
| 283 | 
            +
                                name="anthropic",
         | 
| 282 284 | 
             
                                api_key=model_settings.anthropic_api_key,
         | 
| 283 285 | 
             
                            )
         | 
| 284 286 | 
             
                        )
         | 
| 285 287 | 
             
                    if model_settings.ollama_base_url:
         | 
| 286 288 | 
             
                        self._enabled_providers.append(
         | 
| 287 289 | 
             
                            OllamaProvider(
         | 
| 290 | 
            +
                                name="ollama",
         | 
| 288 291 | 
             
                                base_url=model_settings.ollama_base_url,
         | 
| 289 292 | 
             
                                api_key=None,
         | 
| 290 293 | 
             
                                default_prompt_formatter=model_settings.default_prompt_formatter,
         | 
| @@ -293,12 +296,14 @@ class SyncServer(Server): | |
| 293 296 | 
             
                    if model_settings.gemini_api_key:
         | 
| 294 297 | 
             
                        self._enabled_providers.append(
         | 
| 295 298 | 
             
                            GoogleAIProvider(
         | 
| 299 | 
            +
                                name="google_ai",
         | 
| 296 300 | 
             
                                api_key=model_settings.gemini_api_key,
         | 
| 297 301 | 
             
                            )
         | 
| 298 302 | 
             
                        )
         | 
| 299 303 | 
             
                    if model_settings.google_cloud_location and model_settings.google_cloud_project:
         | 
| 300 304 | 
             
                        self._enabled_providers.append(
         | 
| 301 305 | 
             
                            GoogleVertexProvider(
         | 
| 306 | 
            +
                                name="google_vertex",
         | 
| 302 307 | 
             
                                google_cloud_project=model_settings.google_cloud_project,
         | 
| 303 308 | 
             
                                google_cloud_location=model_settings.google_cloud_location,
         | 
| 304 309 | 
             
                            )
         | 
| @@ -307,6 +312,7 @@ class SyncServer(Server): | |
| 307 312 | 
             
                        assert model_settings.azure_api_version, "AZURE_API_VERSION is required"
         | 
| 308 313 | 
             
                        self._enabled_providers.append(
         | 
| 309 314 | 
             
                            AzureProvider(
         | 
| 315 | 
            +
                                name="azure",
         | 
| 310 316 | 
             
                                api_key=model_settings.azure_api_key,
         | 
| 311 317 | 
             
                                base_url=model_settings.azure_base_url,
         | 
| 312 318 | 
             
                                api_version=model_settings.azure_api_version,
         | 
| @@ -315,12 +321,14 @@ class SyncServer(Server): | |
| 315 321 | 
             
                    if model_settings.groq_api_key:
         | 
| 316 322 | 
             
                        self._enabled_providers.append(
         | 
| 317 323 | 
             
                            GroqProvider(
         | 
| 324 | 
            +
                                name="groq",
         | 
| 318 325 | 
             
                                api_key=model_settings.groq_api_key,
         | 
| 319 326 | 
             
                            )
         | 
| 320 327 | 
             
                        )
         | 
| 321 328 | 
             
                    if model_settings.together_api_key:
         | 
| 322 329 | 
             
                        self._enabled_providers.append(
         | 
| 323 330 | 
             
                            TogetherProvider(
         | 
| 331 | 
            +
                                name="together",
         | 
| 324 332 | 
             
                                api_key=model_settings.together_api_key,
         | 
| 325 333 | 
             
                                default_prompt_formatter=model_settings.default_prompt_formatter,
         | 
| 326 334 | 
             
                            )
         | 
| @@ -329,6 +337,7 @@ class SyncServer(Server): | |
| 329 337 | 
             
                        # vLLM exposes both a /chat/completions and a /completions endpoint
         | 
| 330 338 | 
             
                        self._enabled_providers.append(
         | 
| 331 339 | 
             
                            VLLMCompletionsProvider(
         | 
| 340 | 
            +
                                name="vllm",
         | 
| 332 341 | 
             
                                base_url=model_settings.vllm_api_base,
         | 
| 333 342 | 
             
                                default_prompt_formatter=model_settings.default_prompt_formatter,
         | 
| 334 343 | 
             
                            )
         | 
| @@ -338,12 +347,14 @@ class SyncServer(Server): | |
| 338 347 | 
             
                        # e.g. "... --enable-auto-tool-choice --tool-call-parser hermes"
         | 
| 339 348 | 
             
                        self._enabled_providers.append(
         | 
| 340 349 | 
             
                            VLLMChatCompletionsProvider(
         | 
| 350 | 
            +
                                name="vllm",
         | 
| 341 351 | 
             
                                base_url=model_settings.vllm_api_base,
         | 
| 342 352 | 
             
                            )
         | 
| 343 353 | 
             
                        )
         | 
| 344 354 | 
             
                    if model_settings.aws_access_key and model_settings.aws_secret_access_key and model_settings.aws_region:
         | 
| 345 355 | 
             
                        self._enabled_providers.append(
         | 
| 346 356 | 
             
                            AnthropicBedrockProvider(
         | 
| 357 | 
            +
                                name="bedrock",
         | 
| 347 358 | 
             
                                aws_region=model_settings.aws_region,
         | 
| 348 359 | 
             
                            )
         | 
| 349 360 | 
             
                        )
         | 
| @@ -355,11 +366,11 @@ class SyncServer(Server): | |
| 355 366 | 
             
                            if model_settings.lmstudio_base_url.endswith("/v1")
         | 
| 356 367 | 
             
                            else model_settings.lmstudio_base_url + "/v1"
         | 
| 357 368 | 
             
                        )
         | 
| 358 | 
            -
                        self._enabled_providers.append(LMStudioOpenAIProvider(base_url=lmstudio_url))
         | 
| 369 | 
            +
                        self._enabled_providers.append(LMStudioOpenAIProvider(name="lmstudio_openai", base_url=lmstudio_url))
         | 
| 359 370 | 
             
                    if model_settings.deepseek_api_key:
         | 
| 360 | 
            -
                        self._enabled_providers.append(DeepSeekProvider(api_key=model_settings.deepseek_api_key))
         | 
| 371 | 
            +
                        self._enabled_providers.append(DeepSeekProvider(name="deepseek", api_key=model_settings.deepseek_api_key))
         | 
| 361 372 | 
             
                    if model_settings.xai_api_key:
         | 
| 362 | 
            -
                        self._enabled_providers.append(XAIProvider(api_key=model_settings.xai_api_key))
         | 
| 373 | 
            +
                        self._enabled_providers.append(XAIProvider(name="xai", api_key=model_settings.xai_api_key))
         | 
| 363 374 |  | 
| 364 375 | 
             
                    # For MCP
         | 
| 365 376 | 
             
                    """Initialize the MCP clients (there may be multiple)"""
         | 
| @@ -862,6 +873,8 @@ class SyncServer(Server): | |
| 862 873 | 
             
                            agent_ids=[voice_sleeptime_agent.id],
         | 
| 863 874 | 
             
                            manager_config=VoiceSleeptimeManager(
         | 
| 864 875 | 
             
                                manager_agent_id=main_agent.id,
         | 
| 876 | 
            +
                                max_message_buffer_length=constants.DEFAULT_MAX_MESSAGE_BUFFER_LENGTH,
         | 
| 877 | 
            +
                                min_message_buffer_length=constants.DEFAULT_MIN_MESSAGE_BUFFER_LENGTH,
         | 
| 865 878 | 
             
                            ),
         | 
| 866 879 | 
             
                        ),
         | 
| 867 880 | 
             
                        actor=actor,
         | 
| @@ -1182,10 +1195,10 @@ class SyncServer(Server): | |
| 1182 1195 | 
             
                    except NoResultFound:
         | 
| 1183 1196 | 
             
                        raise HTTPException(status_code=404, detail=f"Organization with id {org_id} not found")
         | 
| 1184 1197 |  | 
| 1185 | 
            -
                def list_llm_models(self) -> List[LLMConfig]:
         | 
| 1198 | 
            +
                def list_llm_models(self, byok_only: bool = False) -> List[LLMConfig]:
         | 
| 1186 1199 | 
             
                    """List available models"""
         | 
| 1187 1200 | 
             
                    llm_models = []
         | 
| 1188 | 
            -
                    for provider in self.get_enabled_providers():
         | 
| 1201 | 
            +
                    for provider in self.get_enabled_providers(byok_only=byok_only):
         | 
| 1189 1202 | 
             
                        try:
         | 
| 1190 1203 | 
             
                            llm_models.extend(provider.list_llm_models())
         | 
| 1191 1204 | 
             
                        except Exception as e:
         | 
| @@ -1205,11 +1218,12 @@ class SyncServer(Server): | |
| 1205 1218 | 
             
                            warnings.warn(f"An error occurred while listing embedding models for provider {provider}: {e}")
         | 
| 1206 1219 | 
             
                    return embedding_models
         | 
| 1207 1220 |  | 
| 1208 | 
            -
                def get_enabled_providers(self):
         | 
| 1221 | 
            +
                def get_enabled_providers(self, byok_only: bool = False):
         | 
| 1222 | 
            +
                    providers_from_db = {p.name: p.cast_to_subtype() for p in self.provider_manager.list_providers()}
         | 
| 1223 | 
            +
                    if byok_only:
         | 
| 1224 | 
            +
                        return list(providers_from_db.values())
         | 
| 1209 1225 | 
             
                    providers_from_env = {p.name: p for p in self._enabled_providers}
         | 
| 1210 | 
            -
                     | 
| 1211 | 
            -
                    # Merge the two dictionaries, keeping the values from providers_from_db where conflicts occur
         | 
| 1212 | 
            -
                    return {**providers_from_env, **providers_from_db}.values()
         | 
| 1226 | 
            +
                    return list(providers_from_env.values()) + list(providers_from_db.values())
         | 
| 1213 1227 |  | 
| 1214 1228 | 
             
                @trace_method
         | 
| 1215 1229 | 
             
                def get_llm_config_from_handle(
         | 
| @@ -1294,7 +1308,7 @@ class SyncServer(Server): | |
| 1294 1308 | 
             
                    return embedding_config
         | 
| 1295 1309 |  | 
| 1296 1310 | 
             
                def get_provider_from_name(self, provider_name: str) -> Provider:
         | 
| 1297 | 
            -
                    providers = [provider for provider in self. | 
| 1311 | 
            +
                    providers = [provider for provider in self.get_enabled_providers() if provider.name == provider_name]
         | 
| 1298 1312 | 
             
                    if not providers:
         | 
| 1299 1313 | 
             
                        raise ValueError(f"Provider {provider_name} is not supported")
         | 
| 1300 1314 | 
             
                    elif len(providers) > 1:
         | 
    
        letta/services/group_manager.py
    CHANGED
    
    | @@ -80,6 +80,12 @@ class GroupManager: | |
| 80 80 | 
             
                            case ManagerType.voice_sleeptime:
         | 
| 81 81 | 
             
                                new_group.manager_type = ManagerType.voice_sleeptime
         | 
| 82 82 | 
             
                                new_group.manager_agent_id = group.manager_config.manager_agent_id
         | 
| 83 | 
            +
                                max_message_buffer_length = group.manager_config.max_message_buffer_length
         | 
| 84 | 
            +
                                min_message_buffer_length = group.manager_config.min_message_buffer_length
         | 
| 85 | 
            +
                                # Safety check for buffer length range
         | 
| 86 | 
            +
                                self.ensure_buffer_length_range_valid(max_value=max_message_buffer_length, min_value=min_message_buffer_length)
         | 
| 87 | 
            +
                                new_group.max_message_buffer_length = max_message_buffer_length
         | 
| 88 | 
            +
                                new_group.min_message_buffer_length = min_message_buffer_length
         | 
| 83 89 | 
             
                            case _:
         | 
| 84 90 | 
             
                                raise ValueError(f"Unsupported manager type: {group.manager_config.manager_type}")
         | 
| 85 91 |  | 
| @@ -97,6 +103,8 @@ class GroupManager: | |
| 97 103 | 
             
                        group = GroupModel.read(db_session=session, identifier=group_id, actor=actor)
         | 
| 98 104 |  | 
| 99 105 | 
             
                        sleeptime_agent_frequency = None
         | 
| 106 | 
            +
                        max_message_buffer_length = None
         | 
| 107 | 
            +
                        min_message_buffer_length = None
         | 
| 100 108 | 
             
                        max_turns = None
         | 
| 101 109 | 
             
                        termination_token = None
         | 
| 102 110 | 
             
                        manager_agent_id = None
         | 
| @@ -117,11 +125,24 @@ class GroupManager: | |
| 117 125 | 
             
                                    sleeptime_agent_frequency = group_update.manager_config.sleeptime_agent_frequency
         | 
| 118 126 | 
             
                                    if sleeptime_agent_frequency and group.turns_counter is None:
         | 
| 119 127 | 
             
                                        group.turns_counter = -1
         | 
| 128 | 
            +
                                case ManagerType.voice_sleeptime:
         | 
| 129 | 
            +
                                    manager_agent_id = group_update.manager_config.manager_agent_id
         | 
| 130 | 
            +
                                    max_message_buffer_length = group_update.manager_config.max_message_buffer_length or group.max_message_buffer_length
         | 
| 131 | 
            +
                                    min_message_buffer_length = group_update.manager_config.min_message_buffer_length or group.min_message_buffer_length
         | 
| 132 | 
            +
                                    if sleeptime_agent_frequency and group.turns_counter is None:
         | 
| 133 | 
            +
                                        group.turns_counter = -1
         | 
| 120 134 | 
             
                                case _:
         | 
| 121 135 | 
             
                                    raise ValueError(f"Unsupported manager type: {group_update.manager_config.manager_type}")
         | 
| 122 136 |  | 
| 137 | 
            +
                        # Safety check for buffer length range
         | 
| 138 | 
            +
                        self.ensure_buffer_length_range_valid(max_value=max_message_buffer_length, min_value=min_message_buffer_length)
         | 
| 139 | 
            +
             | 
| 123 140 | 
             
                        if sleeptime_agent_frequency:
         | 
| 124 141 | 
             
                            group.sleeptime_agent_frequency = sleeptime_agent_frequency
         | 
| 142 | 
            +
                        if max_message_buffer_length:
         | 
| 143 | 
            +
                            group.max_message_buffer_length = max_message_buffer_length
         | 
| 144 | 
            +
                        if min_message_buffer_length:
         | 
| 145 | 
            +
                            group.min_message_buffer_length = min_message_buffer_length
         | 
| 125 146 | 
             
                        if max_turns:
         | 
| 126 147 | 
             
                            group.max_turns = max_turns
         | 
| 127 148 | 
             
                        if termination_token:
         | 
| @@ -274,3 +295,40 @@ class GroupManager: | |
| 274 295 | 
             
                        if manager_agent:
         | 
| 275 296 | 
             
                            for block in blocks:
         | 
| 276 297 | 
             
                                session.add(BlocksAgents(agent_id=manager_agent.id, block_id=block.id, block_label=block.label))
         | 
| 298 | 
            +
             | 
| 299 | 
            +
                @staticmethod
         | 
| 300 | 
            +
                def ensure_buffer_length_range_valid(
         | 
| 301 | 
            +
                    max_value: Optional[int],
         | 
| 302 | 
            +
                    min_value: Optional[int],
         | 
| 303 | 
            +
                    max_name: str = "max_message_buffer_length",
         | 
| 304 | 
            +
                    min_name: str = "min_message_buffer_length",
         | 
| 305 | 
            +
                ) -> None:
         | 
| 306 | 
            +
                    """
         | 
| 307 | 
            +
                    1) Both-or-none: if one is set, the other must be set.
         | 
| 308 | 
            +
                    2) Both must be ints > 4.
         | 
| 309 | 
            +
                    3) max_value must be strictly greater than min_value.
         | 
| 310 | 
            +
                    """
         | 
| 311 | 
            +
                    # 1) require both-or-none
         | 
| 312 | 
            +
                    if (max_value is None) != (min_value is None):
         | 
| 313 | 
            +
                        raise ValueError(
         | 
| 314 | 
            +
                            f"Both '{max_name}' and '{min_name}' must be provided together " f"(got {max_name}={max_value}, {min_name}={min_value})"
         | 
| 315 | 
            +
                        )
         | 
| 316 | 
            +
             | 
| 317 | 
            +
                    # no further checks if neither is provided
         | 
| 318 | 
            +
                    if max_value is None:
         | 
| 319 | 
            +
                        return
         | 
| 320 | 
            +
             | 
| 321 | 
            +
                    # 2) type & lower‐bound checks
         | 
| 322 | 
            +
                    if not isinstance(max_value, int) or not isinstance(min_value, int):
         | 
| 323 | 
            +
                        raise ValueError(
         | 
| 324 | 
            +
                            f"Both '{max_name}' and '{min_name}' must be integers "
         | 
| 325 | 
            +
                            f"(got {max_name}={type(max_value).__name__}, {min_name}={type(min_value).__name__})"
         | 
| 326 | 
            +
                        )
         | 
| 327 | 
            +
                    if max_value <= 4 or min_value <= 4:
         | 
| 328 | 
            +
                        raise ValueError(
         | 
| 329 | 
            +
                            f"Both '{max_name}' and '{min_name}' must be greater than 4 " f"(got {max_name}={max_value}, {min_name}={min_value})"
         | 
| 330 | 
            +
                        )
         | 
| 331 | 
            +
             | 
| 332 | 
            +
                    # 3) ordering
         | 
| 333 | 
            +
                    if max_value <= min_value:
         | 
| 334 | 
            +
                        raise ValueError(f"'{max_name}' must be greater than '{min_name}' " f"(got {max_name}={max_value} <= {min_name}={min_value})")
         |