PyPI - letta-nightly - Versions diffs - 0.7.6.dev20250430104233__py3-none-any.whl → 0.7.8.dev20250501064110__py3-none-any.whl - Mend

letta-nightly 0.7.6.dev20250430104233py3-none-any.whl → 0.7.8.dev20250501064110py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (70) hide show

letta/__init__.py +1 -1
letta/agent.py +8 -12
letta/agents/exceptions.py +6 -0
letta/agents/helpers.py +1 -1
letta/agents/letta_agent.py +48 -35
letta/agents/letta_agent_batch.py +6 -2
letta/agents/voice_agent.py +41 -59
letta/agents/{ephemeral_memory_agent.py → voice_sleeptime_agent.py} +106 -129
letta/client/client.py +3 -3
letta/constants.py +18 -2
letta/functions/composio_helpers.py +100 -0
letta/functions/function_sets/base.py +0 -10
letta/functions/function_sets/voice.py +92 -0
letta/functions/functions.py +4 -2
letta/functions/helpers.py +19 -101
letta/groups/helpers.py +1 -0
letta/groups/sleeptime_multi_agent.py +5 -1
letta/helpers/message_helper.py +21 -4
letta/helpers/tool_execution_helper.py +1 -1
letta/interfaces/anthropic_streaming_interface.py +165 -158
letta/interfaces/openai_chat_completions_streaming_interface.py +1 -1
letta/llm_api/anthropic.py +15 -10
letta/llm_api/anthropic_client.py +5 -1
letta/llm_api/google_vertex_client.py +1 -1
letta/llm_api/llm_api_tools.py +7 -0
letta/llm_api/llm_client.py +12 -2
letta/llm_api/llm_client_base.py +4 -0
letta/llm_api/openai.py +9 -3
letta/llm_api/openai_client.py +18 -4
letta/memory.py +3 -1
letta/orm/enums.py +1 -0
letta/orm/group.py +2 -0
letta/orm/provider.py +10 -0
letta/personas/examples/voice_memory_persona.txt +5 -0
letta/prompts/system/voice_chat.txt +29 -0
letta/prompts/system/voice_sleeptime.txt +74 -0
letta/schemas/agent.py +14 -2
letta/schemas/enums.py +11 -0
letta/schemas/group.py +37 -2
letta/schemas/llm_config.py +1 -0
letta/schemas/llm_config_overrides.py +2 -2
letta/schemas/message.py +4 -3
letta/schemas/providers.py +75 -213
letta/schemas/tool.py +8 -12
letta/server/rest_api/app.py +12 -0
letta/server/rest_api/chat_completions_interface.py +1 -1
letta/server/rest_api/interface.py +8 -10
letta/server/rest_api/{optimistic_json_parser.py → json_parser.py} +62 -26
letta/server/rest_api/routers/v1/agents.py +1 -1
letta/server/rest_api/routers/v1/embeddings.py +4 -3
letta/server/rest_api/routers/v1/llms.py +4 -3
letta/server/rest_api/routers/v1/providers.py +4 -1
letta/server/rest_api/routers/v1/voice.py +0 -2
letta/server/rest_api/utils.py +22 -33
letta/server/server.py +91 -37
letta/services/agent_manager.py +14 -7
letta/services/group_manager.py +61 -0
letta/services/helpers/agent_manager_helper.py +69 -12
letta/services/message_manager.py +2 -2
letta/services/passage_manager.py +13 -4
letta/services/provider_manager.py +25 -14
letta/services/summarizer/summarizer.py +20 -15
letta/services/tool_executor/tool_execution_manager.py +1 -1
letta/services/tool_executor/tool_executor.py +3 -3
letta/services/tool_manager.py +32 -7
{letta_nightly-0.7.6.dev20250430104233.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/METADATA +4 -5
{letta_nightly-0.7.6.dev20250430104233.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/RECORD +70 -64
{letta_nightly-0.7.6.dev20250430104233.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/LICENSE +0 -0
{letta_nightly-0.7.6.dev20250430104233.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/WHEEL +0 -0
{letta_nightly-0.7.6.dev20250430104233.dist-info → letta_nightly-0.7.8.dev20250501064110.dist-info}/entry_points.txt +0 -0

letta/server/rest_api/interface.py CHANGED Viewed

@@ -28,7 +28,7 @@ from letta.schemas.letta_message import (
 from letta.schemas.letta_message_content import ReasoningContent, RedactedReasoningContent, TextContent
 from letta.schemas.message import Message
 from letta.schemas.openai.chat_completion_response import ChatCompletionChunkResponse
-from letta.server.rest_api.optimistic_json_parser import OptimisticJSONParser
+from letta.server.rest_api.json_parser import OptimisticJSONParser
 from letta.streaming_interface import AgentChunkStreamingInterface
 from letta.streaming_utils import FunctionArgumentsStreamHandler, JSONInnerThoughtsExtractor
 from letta.utils import parse_json
@@ -291,7 +291,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
         self.streaming_chat_completion_json_reader = FunctionArgumentsStreamHandler(json_key=assistant_message_tool_kwarg)
         # @matt's changes here, adopting new optimistic json parser
-        self.current_function_arguments = []
+        self.current_function_arguments = ""
         self.optimistic_json_parser = OptimisticJSONParser()
         self.current_json_parse_result = {}
@@ -387,7 +387,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
     def stream_start(self):
         """Initialize streaming by activating the generator and clearing any old chunks."""
         self.streaming_chat_completion_mode_function_name = None
-        self.current_function_arguments = []
+        self.current_function_arguments = ""
         self.current_json_parse_result = {}
         if not self._active:
@@ -398,7 +398,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
     def stream_end(self):
         """Clean up the stream by deactivating and clearing chunks."""
         self.streaming_chat_completion_mode_function_name = None
-        self.current_function_arguments = []
+        self.current_function_arguments = ""
         self.current_json_parse_result = {}
         # if not self.streaming_chat_completion_mode and not self.nonstreaming_legacy_mode:
@@ -609,14 +609,13 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                     # early exit to turn into content mode
                     return None
                 if tool_call.function.arguments:
-                    self.current_function_arguments.append(tool_call.function.arguments)
+                    self.current_function_arguments += tool_call.function.arguments
                 # if we're in the middle of parsing a send_message, we'll keep processing the JSON chunks
                 if tool_call.function.arguments and self.streaming_chat_completion_mode_function_name == self.assistant_message_tool_name:
                     # Strip out any extras tokens
                     # In the case that we just have the prefix of something, no message yet, then we should early exit to move to the next chunk
-                    combined_args = "".join(self.current_function_arguments)
-                    parsed_args = self.optimistic_json_parser.parse(combined_args)
+                    parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
                     if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
                         self.assistant_message_tool_kwarg
@@ -686,7 +685,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                     # updates_inner_thoughts = ""
                     # else:  # OpenAI
                     # updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
-                    self.current_function_arguments.append(tool_call.function.arguments)
+                    self.current_function_arguments += tool_call.function.arguments
                     updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
                     # If we have inner thoughts, we should output them as a chunk
@@ -805,8 +804,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                                     # TODO: THIS IS HORRIBLE
                                     # TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
                                     # TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
-                                    combined_args = "".join(self.current_function_arguments)
-                                    parsed_args = self.optimistic_json_parser.parse(combined_args)
+                                    parsed_args = self.optimistic_json_parser.parse(self.current_function_arguments)
                                     if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
                                         self.assistant_message_tool_kwarg

letta/server/rest_api/{optimistic_json_parser.py → json_parser.py} RENAMED Viewed

@@ -1,7 +1,43 @@
 import json
+from abc import ABC, abstractmethod
+from typing import Any
+from pydantic_core import from_json
-class OptimisticJSONParser:
+from letta.log import get_logger
+logger = get_logger(__name__)
+class JSONParser(ABC):
+    @abstractmethod
+    def parse(self, input_str: str) -> Any:
+        raise NotImplementedError()
+class PydanticJSONParser(JSONParser):
+    """
+    https://docs.pydantic.dev/latest/concepts/json/#json-parsing
+    If `strict` is True, we will not allow for partial parsing of JSON.
+    Compared with `OptimisticJSONParser`, this parser is more strict.
+    Note: This will not partially parse strings which may be decrease parsing speed for message strings
+    """
+    def __init__(self, strict=False):
+        self.strict = strict
+    def parse(self, input_str: str) -> Any:
+        if not input_str:
+            return {}
+        try:
+            return from_json(input_str, allow_partial="trailing-strings" if not self.strict else False)
+        except ValueError as e:
+            logger.error(f"Failed to parse JSON: {e}")
+            raise
+class OptimisticJSONParser(JSONParser):
     """
     A JSON parser that attempts to parse a given string using `json.loads`,
     and if that fails, it parses as much valid JSON as possible while
@@ -13,25 +49,25 @@ class OptimisticJSONParser:
     def __init__(self, strict=False):
         self.strict = strict
         self.parsers = {
-            " ": self.parse_space,
-            "\r": self.parse_space,
-            "\n": self.parse_space,
-            "\t": self.parse_space,
-            "[": self.parse_array,
-            "{": self.parse_object,
-            '"': self.parse_string,
-            "t": self.parse_true,
-            "f": self.parse_false,
-            "n": self.parse_null,
+            " ": self._parse_space,
+            "\r": self._parse_space,
+            "\n": self._parse_space,
+            "\t": self._parse_space,
+            "[": self._parse_array,
+            "{": self._parse_object,
+            '"': self._parse_string,
+            "t": self._parse_true,
+            "f": self._parse_false,
+            "n": self._parse_null,
         }
         # Register number parser for digits and signs
         for char in "0123456789.-":
             self.parsers[char] = self.parse_number
         self.last_parse_reminding = None
-        self.on_extra_token = self.default_on_extra_token
+        self.on_extra_token = self._default_on_extra_token
-    def default_on_extra_token(self, text, data, reminding):
+    def _default_on_extra_token(self, text, data, reminding):
         print(f"Parsed JSON with extra tokens: {data}, remaining: {reminding}")
     def parse(self, input_str):
@@ -45,7 +81,7 @@ class OptimisticJSONParser:
             try:
                 return json.loads(input_str)
             except json.JSONDecodeError as decode_error:
-                data, reminding = self.parse_any(input_str, decode_error)
+                data, reminding = self._parse_any(input_str, decode_error)
                 self.last_parse_reminding = reminding
                 if self.on_extra_token and reminding:
                     self.on_extra_token(input_str, data, reminding)
@@ -53,7 +89,7 @@ class OptimisticJSONParser:
         else:
             return json.loads("{}")
-    def parse_any(self, input_str, decode_error):
+    def _parse_any(self, input_str, decode_error):
         """Determine which parser to use based on the first character."""
         if not input_str:
             raise decode_error
@@ -62,11 +98,11 @@ class OptimisticJSONParser:
             raise decode_error
         return parser(input_str, decode_error)
-    def parse_space(self, input_str, decode_error):
+    def _parse_space(self, input_str, decode_error):
         """Strip leading whitespace and parse again."""
-        return self.parse_any(input_str.strip(), decode_error)
+        return self._parse_any(input_str.strip(), decode_error)
-    def parse_array(self, input_str, decode_error):
+    def _parse_array(self, input_str, decode_error):
         """Parse a JSON array, returning the list and remaining string."""
         # Skip the '['
         input_str = input_str[1:]
@@ -77,7 +113,7 @@ class OptimisticJSONParser:
                 # Skip the ']'
                 input_str = input_str[1:]
                 break
-            value, input_str = self.parse_any(input_str, decode_error)
+            value, input_str = self._parse_any(input_str, decode_error)
             array_values.append(value)
             input_str = input_str.strip()
             if input_str.startswith(","):
@@ -85,7 +121,7 @@ class OptimisticJSONParser:
                 input_str = input_str[1:].strip()
         return array_values, input_str
-    def parse_object(self, input_str, decode_error):
+    def _parse_object(self, input_str, decode_error):
         """Parse a JSON object, returning the dict and remaining string."""
         # Skip the '{'
         input_str = input_str[1:]
@@ -96,7 +132,7 @@ class OptimisticJSONParser:
                 # Skip the '}'
                 input_str = input_str[1:]
                 break
-            key, input_str = self.parse_any(input_str, decode_error)
+            key, input_str = self._parse_any(input_str, decode_error)
             input_str = input_str.strip()
             if not input_str or input_str[0] == "}":
@@ -113,7 +149,7 @@ class OptimisticJSONParser:
                     input_str = input_str[1:]
                 break
-            value, input_str = self.parse_any(input_str, decode_error)
+            value, input_str = self._parse_any(input_str, decode_error)
             obj[key] = value
             input_str = input_str.strip()
             if input_str.startswith(","):
@@ -121,7 +157,7 @@ class OptimisticJSONParser:
                 input_str = input_str[1:].strip()
         return obj, input_str
-    def parse_string(self, input_str, decode_error):
+    def _parse_string(self, input_str, decode_error):
         """Parse a JSON string, respecting escaped quotes if present."""
         end = input_str.find('"', 1)
         while end != -1 and input_str[end - 1] == "\\":
@@ -166,19 +202,19 @@ class OptimisticJSONParser:
         return num, remainder
-    def parse_true(self, input_str, decode_error):
+    def _parse_true(self, input_str, decode_error):
         """Parse a 'true' value."""
         if input_str.startswith(("t", "T")):
             return True, input_str[4:]
         raise decode_error
-    def parse_false(self, input_str, decode_error):
+    def _parse_false(self, input_str, decode_error):
         """Parse a 'false' value."""
         if input_str.startswith(("f", "F")):
             return False, input_str[5:]
         raise decode_error
-    def parse_null(self, input_str, decode_error):
+    def _parse_null(self, input_str, decode_error):
         """Parse a 'null' value."""
         if input_str.startswith("n"):
             return None, input_str[4:]

letta/server/rest_api/routers/v1/agents.py CHANGED Viewed

@@ -678,7 +678,7 @@ async def send_message_streaming(
     server: SyncServer = Depends(get_letta_server),
     request: LettaStreamingRequest = Body(...),
     actor_id: Optional[str] = Header(None, alias="user_id"),  # Extract user_id from header, default to None if not present
-):
+) -> StreamingResponse | LettaResponse:
     """
     Process a user message and return the agent's response.
     This endpoint accepts a message from a user and processes it through the agent.

letta/server/rest_api/routers/v1/embeddings.py CHANGED Viewed

@@ -9,12 +9,13 @@ router = APIRouter(prefix="/embeddings", tags=["embeddings"])
 @router.get("/total_storage_size", response_model=float, operation_id="get_total_storage_size")
-def get_embeddings_storage_size(
+def get_embeddings_total_storage_size(
     server: SyncServer = Depends(get_letta_server),
     actor_id: Optional[str] = Header(None, alias="user_id"),  # Extract user_id from header, default to None if not present
+    storage_unit: Optional[str] = Header("GB", alias="storage_unit"),  # Extract storage unit from header, default to GB
 ):
     """
-    Get the total size of all embeddings in the database for a user in GB.
+    Get the total size of all embeddings in the database for a user in the storage unit given.
     """
     actor = server.user_manager.get_user_or_default(user_id=actor_id)
-    return server.passage_manager.estimate_embeddings_size_GB(actor=actor)
+    return server.passage_manager.estimate_embeddings_size(actor=actor, storage_unit=storage_unit)

letta/server/rest_api/routers/v1/llms.py CHANGED Viewed

@@ -1,6 +1,6 @@
-from typing import TYPE_CHECKING, List
+from typing import TYPE_CHECKING, List, Optional
-from fastapi import APIRouter, Depends
+from fastapi import APIRouter, Depends, Query
 from letta.schemas.embedding_config import EmbeddingConfig
 from letta.schemas.llm_config import LLMConfig
@@ -14,10 +14,11 @@ router = APIRouter(prefix="/models", tags=["models", "llms"])
 @router.get("/", response_model=List[LLMConfig], operation_id="list_models")
 def list_llm_models(
+    byok_only: Optional[bool] = Query(None),
     server: "SyncServer" = Depends(get_letta_server),
 ):
-    models = server.list_llm_models()
+    models = server.list_llm_models(byok_only=byok_only)
     # print(models)
     return models

letta/server/rest_api/routers/v1/providers.py CHANGED Viewed

@@ -2,6 +2,7 @@ from typing import TYPE_CHECKING, List, Optional
 from fastapi import APIRouter, Body, Depends, Header, HTTPException, Query
+from letta.schemas.enums import ProviderType
 from letta.schemas.providers import Provider, ProviderCreate, ProviderUpdate
 from letta.server.rest_api.utils import get_letta_server
@@ -13,6 +14,8 @@ router = APIRouter(prefix="/providers", tags=["providers"])
 @router.get("/", response_model=List[Provider], operation_id="list_providers")
 def list_providers(
+    name: Optional[str] = Query(None),
+    provider_type: Optional[ProviderType] = Query(None),
     after: Optional[str] = Query(None),
     limit: Optional[int] = Query(50),
     actor_id: Optional[str] = Header(None, alias="user_id"),
@@ -23,7 +26,7 @@ def list_providers(
     """
     try:
         actor = server.user_manager.get_user_or_default(user_id=actor_id)
-        providers = server.provider_manager.list_providers(after=after, limit=limit, actor=actor)
+        providers = server.provider_manager.list_providers(after=after, limit=limit, actor=actor, name=name, provider_type=provider_type)
     except HTTPException:
         raise
     except Exception as e:

letta/server/rest_api/routers/v1/voice.py CHANGED Viewed

@@ -54,8 +54,6 @@ async def create_voice_chat_completions(
         block_manager=server.block_manager,
         passage_manager=server.passage_manager,
         actor=actor,
-        message_buffer_limit=40,
-        message_buffer_min=15,
     )
     # Return the streaming generator

letta/server/rest_api/utils.py CHANGED Viewed

@@ -16,6 +16,7 @@ from pydantic import BaseModel
 from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG, FUNC_FAILED_HEARTBEAT_MESSAGE, REQ_HEARTBEAT_MESSAGE
 from letta.errors import ContextWindowExceededError, RateLimitExceededError
 from letta.helpers.datetime_helpers import get_utc_time
+from letta.helpers.message_helper import convert_message_creates_to_messages
 from letta.log import get_logger
 from letta.schemas.enums import MessageRole
 from letta.schemas.letta_message_content import OmittedReasoningContent, ReasoningContent, RedactedReasoningContent, TextContent
@@ -143,27 +144,15 @@ def log_error_to_sentry(e):
 def create_input_messages(input_messages: List[MessageCreate], agent_id: str, actor: User) -> List[Message]:
     """
     Converts a user input message into the internal structured format.
+    TODO (cliandy): this effectively duplicates the functionality of `convert_message_creates_to_messages`,
+    we should unify this when it's clear what message attributes we need.
     """
-    new_messages = []
-    for input_message in input_messages:
-        # Construct the Message object
-        new_message = Message(
-            id=f"message-{uuid.uuid4()}",
-            role=input_message.role,
-            content=input_message.content,
-            name=input_message.name,
-            otid=input_message.otid,
-            sender_id=input_message.sender_id,
-            organization_id=actor.organization_id,
-            agent_id=agent_id,
-            model=None,
-            tool_calls=None,
-            tool_call_id=None,
-            created_at=get_utc_time(),
-        )
-        new_messages.append(new_message)
-    return new_messages
+    messages = convert_message_creates_to_messages(input_messages, agent_id, wrap_user_message=False, wrap_system_message=False)
+    for message in messages:
+        message.organization_id = actor.organization_id
+    return messages
 def create_letta_messages_from_llm_response(
@@ -210,20 +199,20 @@ def create_letta_messages_from_llm_response(
     # TODO: Use ToolReturnContent instead of TextContent
     # TODO: This helps preserve ordering
-    if function_response:
-        tool_message = Message(
-            role=MessageRole.tool,
-            content=[TextContent(text=package_function_response(function_call_success, function_response))],
-            organization_id=actor.organization_id,
-            agent_id=agent_id,
-            model=model,
-            tool_calls=[],
-            tool_call_id=tool_call_id,
-            created_at=get_utc_time(),
-        )
-        if pre_computed_tool_message_id:
-            tool_message.id = pre_computed_tool_message_id
-        messages.append(tool_message)
+    tool_message = Message(
+        role=MessageRole.tool,
+        content=[TextContent(text=package_function_response(function_call_success, function_response))],
+        organization_id=actor.organization_id,
+        agent_id=agent_id,
+        model=model,
+        tool_calls=[],
+        tool_call_id=tool_call_id,
+        created_at=get_utc_time(),
+        name=function_name,
+    )
+    if pre_computed_tool_message_id:
+        tool_message.id = pre_computed_tool_message_id
+    messages.append(tool_message)
     if add_heartbeat_request_system_message:
         heartbeat_system_message = create_heartbeat_system_message(

letta-nightly 0.7.6.dev20250430104233__py3-none-any.whl → 0.7.8.dev20250501064110__py3-none-any.whl

letta-nightly 0.7.6.dev20250430104233py3-none-any.whl → 0.7.8.dev20250501064110py3-none-any.whl