PyPI - letta-nightly - Versions diffs - 0.6.48.dev20250406104033__py3-none-any.whl → 0.6.49.dev20250408030511__py3-none-any.whl - Mend

letta-nightly 0.6.48.dev20250406104033py3-none-any.whl → 0.6.49.dev20250408030511py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (87) hide show

letta/__init__.py +1 -1
letta/agent.py +47 -12
letta/agents/base_agent.py +7 -4
letta/agents/helpers.py +52 -0
letta/agents/letta_agent.py +105 -42
letta/agents/voice_agent.py +2 -2
letta/constants.py +13 -1
letta/errors.py +10 -3
letta/functions/function_sets/base.py +65 -0
letta/functions/interface.py +2 -2
letta/functions/mcp_client/base_client.py +18 -1
letta/{dynamic_multi_agent.py → groups/dynamic_multi_agent.py} +3 -0
letta/groups/helpers.py +113 -0
letta/{round_robin_multi_agent.py → groups/round_robin_multi_agent.py} +2 -0
letta/groups/sleeptime_multi_agent.py +259 -0
letta/{supervisor_multi_agent.py → groups/supervisor_multi_agent.py} +1 -0
letta/helpers/converters.py +109 -7
letta/helpers/message_helper.py +1 -0
letta/helpers/tool_rule_solver.py +40 -23
letta/interface.py +12 -5
letta/interfaces/anthropic_streaming_interface.py +329 -0
letta/llm_api/anthropic.py +12 -1
letta/llm_api/anthropic_client.py +65 -14
letta/llm_api/azure_openai.py +2 -2
letta/llm_api/google_ai_client.py +13 -2
letta/llm_api/google_constants.py +3 -0
letta/llm_api/google_vertex_client.py +2 -2
letta/llm_api/llm_api_tools.py +1 -1
letta/llm_api/llm_client.py +7 -0
letta/llm_api/llm_client_base.py +2 -7
letta/llm_api/openai.py +7 -1
letta/llm_api/openai_client.py +250 -0
letta/orm/__init__.py +4 -0
letta/orm/agent.py +6 -0
letta/orm/block.py +32 -2
letta/orm/block_history.py +46 -0
letta/orm/custom_columns.py +60 -0
letta/orm/enums.py +7 -0
letta/orm/group.py +6 -0
letta/orm/groups_blocks.py +13 -0
letta/orm/llm_batch_items.py +55 -0
letta/orm/llm_batch_job.py +48 -0
letta/orm/message.py +7 -1
letta/orm/organization.py +2 -0
letta/orm/sqlalchemy_base.py +18 -15
letta/prompts/system/memgpt_sleeptime_chat.txt +52 -0
letta/prompts/system/sleeptime.txt +26 -0
letta/schemas/agent.py +13 -1
letta/schemas/enums.py +17 -2
letta/schemas/group.py +14 -1
letta/schemas/letta_message.py +5 -3
letta/schemas/llm_batch_job.py +53 -0
letta/schemas/llm_config.py +14 -4
letta/schemas/message.py +44 -0
letta/schemas/tool.py +3 -0
letta/schemas/usage.py +1 -0
letta/server/db.py +2 -0
letta/server/rest_api/app.py +1 -1
letta/server/rest_api/chat_completions_interface.py +8 -3
letta/server/rest_api/interface.py +36 -7
letta/server/rest_api/routers/v1/agents.py +53 -39
letta/server/rest_api/routers/v1/runs.py +14 -2
letta/server/rest_api/utils.py +15 -4
letta/server/server.py +120 -71
letta/services/agent_manager.py +70 -6
letta/services/block_manager.py +190 -2
letta/services/group_manager.py +68 -0
letta/services/helpers/agent_manager_helper.py +6 -4
letta/services/llm_batch_manager.py +139 -0
letta/services/message_manager.py +17 -31
letta/services/tool_executor/tool_execution_sandbox.py +1 -3
letta/services/tool_executor/tool_executor.py +9 -20
letta/services/tool_manager.py +14 -3
letta/services/tool_sandbox/__init__.py +0 -0
letta/services/tool_sandbox/base.py +188 -0
letta/services/tool_sandbox/e2b_sandbox.py +116 -0
letta/services/tool_sandbox/local_sandbox.py +221 -0
letta/sleeptime_agent.py +61 -0
letta/streaming_interface.py +20 -10
letta/utils.py +4 -0
{letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/METADATA +2 -2
{letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/RECORD +85 -69
letta/offline_memory_agent.py +0 -173
letta/services/tool_executor/async_tool_execution_sandbox.py +0 -397
{letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/LICENSE +0 -0
{letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/WHEEL +0 -0
{letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/entry_points.txt +0 -0

letta/llm_api/google_ai_client.py CHANGED Viewed

@@ -2,6 +2,7 @@ import uuid
 from typing import List, Optional, Tuple
 import requests
+from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, ToolConfig
 from letta.constants import NON_USER_MSG_PREFIX
 from letta.helpers.datetime_helpers import get_utc_time
@@ -36,7 +37,7 @@ class GoogleAIClient(LLMClientBase):
         self,
         messages: List[PydanticMessage],
         tools: List[dict],
-        tool_call: Optional[str],
+        force_tool_call: Optional[str] = None,
     ) -> dict:
         """
         Constructs a request object in the expected data format for this client.
@@ -50,7 +51,7 @@ class GoogleAIClient(LLMClientBase):
             [m.to_google_ai_dict() for m in messages],
         )
-        return {
+        request_data = {
             "contents": contents,
             "tools": tools,
             "generation_config": {
@@ -59,6 +60,16 @@ class GoogleAIClient(LLMClientBase):
             },
         }
+        # write tool config
+        tool_config = ToolConfig(
+            function_calling_config=FunctionCallingConfig(
+                # ANY mode forces the model to predict only function calls
+                mode=FunctionCallingConfigMode.ANY,
+            )
+        )
+        request_data["tool_config"] = tool_config.model_dump()
+        return request_data
     def convert_response_to_chat_completion(
         self,
         response_data: dict,

letta/llm_api/google_constants.py CHANGED Viewed

@@ -1,4 +1,5 @@
 GOOGLE_MODEL_TO_CONTEXT_LENGTH = {
+    "gemini-2.5-pro-exp-03-25": 1048576,
     "gemini-2.0-flash-001": 1048576,
     "gemini-2.0-pro-exp-02-05": 2097152,
     "gemini-2.0-flash-lite-preview-02-05": 1048576,
@@ -9,4 +10,6 @@ GOOGLE_MODEL_TO_CONTEXT_LENGTH = {
     "gemini-1.0-pro-vision": 16384,
 }
+GOOGLE_MODEL_TO_OUTPUT_LENGTH = {"gemini-2.0-flash-001": 8192, "gemini-2.5-pro-exp-03-25": 65536}
 GOOGLE_EMBEDING_MODEL_TO_DIM = {"text-embedding-005": 768, "text-multilingual-embedding-002": 768}

letta/llm_api/google_vertex_client.py CHANGED Viewed

@@ -38,12 +38,12 @@ class GoogleVertexClient(GoogleAIClient):
         self,
         messages: List[PydanticMessage],
         tools: List[dict],
-        tool_call: Optional[str],
+        force_tool_call: Optional[str] = None,
     ) -> dict:
         """
         Constructs a request object in the expected data format for this client.
         """
-        request_data = super().build_request_data(messages, tools, tool_call)
+        request_data = super().build_request_data(messages, tools, force_tool_call)
         request_data["config"] = request_data.pop("generation_config")
         request_data["config"]["tools"] = request_data.pop("tools")

letta/llm_api/llm_api_tools.py CHANGED Viewed

@@ -340,7 +340,7 @@ def create(
                 tool_choice = {"type": "any", "disable_parallel_tool_use": True}
             else:
                 tool_choice = {"type": "auto", "disable_parallel_tool_use": True}
-            tools = [{"type": "function", "function": f} for f in functions]
+            tools = [{"type": "function", "function": f} for f in functions] if functions is not None else None
         chat_completion_request = ChatCompletionRequest(
             model=llm_config.model,

letta/llm_api/llm_client.py CHANGED Viewed

@@ -49,5 +49,12 @@ class LLMClient:
                     llm_config=llm_config,
                     put_inner_thoughts_first=put_inner_thoughts_first,
                 )
+            case "openai":
+                from letta.llm_api.openai_client import OpenAIClient
+                return OpenAIClient(
+                    llm_config=llm_config,
+                    put_inner_thoughts_first=put_inner_thoughts_first,
+                )
             case _:
                 return None

letta/llm_api/llm_client_base.py CHANGED Viewed

@@ -32,9 +32,7 @@ class LLMClientBase:
         self,
         messages: List[Message],
         tools: Optional[List[dict]] = None,  # TODO: change to Tool object
-        tool_call: Optional[str] = None,
         stream: bool = False,
-        first_message: bool = False,
         force_tool_call: Optional[str] = None,
     ) -> Union[ChatCompletionResponse, Stream[ChatCompletionChunk]]:
         """
@@ -42,7 +40,7 @@ class LLMClientBase:
         If stream=True, returns a Stream[ChatCompletionChunk] that can be iterated over.
         Otherwise returns a ChatCompletionResponse.
         """
-        request_data = self.build_request_data(messages, tools, tool_call)
+        request_data = self.build_request_data(messages, tools, force_tool_call)
         try:
             log_event(name="llm_request_sent", attributes=request_data)
@@ -60,9 +58,7 @@ class LLMClientBase:
         self,
         messages: List[Message],
         tools: Optional[List[dict]] = None,  # TODO: change to Tool object
-        tool_call: Optional[str] = None,
         stream: bool = False,
-        first_message: bool = False,
         force_tool_call: Optional[str] = None,
     ) -> Union[ChatCompletionResponse, AsyncStream[ChatCompletionChunk]]:
         """
@@ -70,7 +66,7 @@ class LLMClientBase:
         If stream=True, returns an AsyncStream[ChatCompletionChunk] that can be async iterated over.
         Otherwise returns a ChatCompletionResponse.
         """
-        request_data = self.build_request_data(messages, tools, tool_call, force_tool_call)
+        request_data = self.build_request_data(messages, tools, force_tool_call)
         response_data = {}
         try:
@@ -90,7 +86,6 @@ class LLMClientBase:
         self,
         messages: List[Message],
         tools: List[dict],
-        tool_call: Optional[str],
         force_tool_call: Optional[str] = None,
     ) -> dict:
         """

letta/llm_api/openai.py CHANGED Viewed

@@ -252,6 +252,8 @@ def openai_chat_completions_process_stream(
     n_chunks = 0  # approx == n_tokens
     chunk_idx = 0
+    prev_message_type = None
+    message_idx = 0
     try:
         for chat_completion_chunk in openai_chat_completions_request_stream(
             url=url, api_key=api_key, chat_completion_request=chat_completion_request
@@ -268,13 +270,17 @@ def openai_chat_completions_process_stream(
             if stream_interface:
                 if isinstance(stream_interface, AgentChunkStreamingInterface):
-                    stream_interface.process_chunk(
+                    message_type = stream_interface.process_chunk(
                         chat_completion_chunk,
                         message_id=chat_completion_response.id if create_message_id else chat_completion_chunk.id,
                         message_date=chat_completion_response.created if create_message_datetime else chat_completion_chunk.created,
                         expect_reasoning_content=expect_reasoning_content,
                         name=name,
+                        message_index=message_idx,
                     )
+                    if message_type != prev_message_type and message_type is not None:
+                        message_idx += 1
+                    prev_message_type = message_type
                 elif isinstance(stream_interface, AgentRefreshStreamingInterface):
                     stream_interface.process_refresh(chat_completion_response)
                 else:

letta/llm_api/openai_client.py ADDED Viewed

@@ -0,0 +1,250 @@
+import os
+from typing import List, Optional
+import openai
+from openai import AsyncOpenAI, AsyncStream, OpenAI, Stream
+from openai.types.chat.chat_completion import ChatCompletion
+from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
+from letta.errors import (
+    ErrorCode,
+    LLMAuthenticationError,
+    LLMBadRequestError,
+    LLMConnectionError,
+    LLMNotFoundError,
+    LLMPermissionDeniedError,
+    LLMRateLimitError,
+    LLMServerError,
+    LLMUnprocessableEntityError,
+)
+from letta.llm_api.helpers import add_inner_thoughts_to_functions, convert_to_structured_output, unpack_all_inner_thoughts_from_kwargs
+from letta.llm_api.llm_client_base import LLMClientBase
+from letta.local_llm.constants import INNER_THOUGHTS_KWARG, INNER_THOUGHTS_KWARG_DESCRIPTION, INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST
+from letta.log import get_logger
+from letta.schemas.message import Message as PydanticMessage
+from letta.schemas.openai.chat_completion_request import ChatCompletionRequest
+from letta.schemas.openai.chat_completion_request import FunctionCall as ToolFunctionChoiceFunctionCall
+from letta.schemas.openai.chat_completion_request import FunctionSchema
+from letta.schemas.openai.chat_completion_request import Tool as OpenAITool
+from letta.schemas.openai.chat_completion_request import ToolFunctionChoice, cast_message_to_subtype
+from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
+from letta.settings import model_settings
+logger = get_logger(__name__)
+class OpenAIClient(LLMClientBase):
+    def _prepare_client_kwargs(self) -> dict:
+        api_key = model_settings.openai_api_key or os.environ.get("OPENAI_API_KEY")
+        # supposedly the openai python client requires a dummy API key
+        api_key = api_key or "DUMMY_API_KEY"
+        kwargs = {"api_key": api_key, "base_url": self.llm_config.model_endpoint}
+        return kwargs
+    def build_request_data(
+        self,
+        messages: List[PydanticMessage],
+        tools: Optional[List[dict]] = None,  # Keep as dict for now as per base class
+        force_tool_call: Optional[str] = None,
+    ) -> dict:
+        """
+        Constructs a request object in the expected data format for the OpenAI API.
+        """
+        if tools and self.llm_config.put_inner_thoughts_in_kwargs:
+            # Special case for LM Studio backend since it needs extra guidance to force out the thoughts first
+            # TODO(fix)
+            inner_thoughts_desc = (
+                INNER_THOUGHTS_KWARG_DESCRIPTION_GO_FIRST if ":1234" in self.llm_config.model_endpoint else INNER_THOUGHTS_KWARG_DESCRIPTION
+            )
+            tools = add_inner_thoughts_to_functions(
+                functions=tools,
+                inner_thoughts_key=INNER_THOUGHTS_KWARG,
+                inner_thoughts_description=inner_thoughts_desc,
+                put_inner_thoughts_first=True,
+            )
+        openai_message_list = [
+            cast_message_to_subtype(m.to_openai_dict(put_inner_thoughts_in_kwargs=self.llm_config.put_inner_thoughts_in_kwargs))
+            for m in messages
+        ]
+        if self.llm_config.model:
+            model = self.llm_config.model
+        else:
+            logger.warning(f"Model type not set in llm_config: {self.llm_config.model_dump_json(indent=4)}")
+            model = None
+        # force function calling for reliability, see https://platform.openai.com/docs/api-reference/chat/create#chat-create-tool_choice
+        # TODO(matt) move into LLMConfig
+        # TODO: This vllm checking is very brittle and is a patch at most
+        if self.llm_config.model_endpoint == "https://inference.memgpt.ai" or (self.llm_config.handle and "vllm" in self.llm_config.handle):
+            tool_choice = "auto"  # TODO change to "required" once proxy supports it
+        else:
+            tool_choice = "required"
+        if force_tool_call is not None:
+            tool_choice = ToolFunctionChoice(type="function", function=ToolFunctionChoiceFunctionCall(name=force_tool_call))
+        data = ChatCompletionRequest(
+            model=model,
+            messages=openai_message_list,
+            tools=[OpenAITool(type="function", function=f) for f in tools] if tools else None,
+            tool_choice=tool_choice,
+            user=str(),
+            max_completion_tokens=self.llm_config.max_tokens,
+            temperature=self.llm_config.temperature,
+        )
+        if "inference.memgpt.ai" in self.llm_config.model_endpoint:
+            # override user id for inference.memgpt.ai
+            import uuid
+            data.user = str(uuid.UUID(int=0))
+            data.model = "memgpt-openai"
+        if data.tools is not None and len(data.tools) > 0:
+            # Convert to structured output style (which has 'strict' and no optionals)
+            for tool in data.tools:
+                try:
+                    structured_output_version = convert_to_structured_output(tool.function.model_dump())
+                    tool.function = FunctionSchema(**structured_output_version)
+                except ValueError as e:
+                    logger.warning(f"Failed to convert tool function to structured output, tool={tool}, error={e}")
+        return data.model_dump(exclude_unset=True)
+    def request(self, request_data: dict) -> dict:
+        """
+        Performs underlying synchronous request to OpenAI API and returns raw response dict.
+        """
+        client = OpenAI(**self._prepare_client_kwargs())
+        response: ChatCompletion = client.chat.completions.create(**request_data)
+        return response.model_dump()
+    async def request_async(self, request_data: dict) -> dict:
+        """
+        Performs underlying asynchronous request to OpenAI API and returns raw response dict.
+        """
+        client = AsyncOpenAI(**self._prepare_client_kwargs())
+        response: ChatCompletion = await client.chat.completions.create(**request_data)
+        return response.model_dump()
+    def convert_response_to_chat_completion(
+        self,
+        response_data: dict,
+        input_messages: List[PydanticMessage],  # Included for consistency, maybe used later
+    ) -> ChatCompletionResponse:
+        """
+        Converts raw OpenAI response dict into the ChatCompletionResponse Pydantic model.
+        Handles potential extraction of inner thoughts if they were added via kwargs.
+        """
+        # OpenAI's response structure directly maps to ChatCompletionResponse
+        # We just need to instantiate the Pydantic model for validation and type safety.
+        chat_completion_response = ChatCompletionResponse(**response_data)
+        # Unpack inner thoughts if they were embedded in function arguments
+        if self.llm_config.put_inner_thoughts_in_kwargs:
+            chat_completion_response = unpack_all_inner_thoughts_from_kwargs(
+                response=chat_completion_response, inner_thoughts_key=INNER_THOUGHTS_KWARG
+            )
+        return chat_completion_response
+    def stream(self, request_data: dict) -> Stream[ChatCompletionChunk]:
+        """
+        Performs underlying streaming request to OpenAI and returns the stream iterator.
+        """
+        client = OpenAI(**self._prepare_client_kwargs())
+        response_stream: Stream[ChatCompletionChunk] = client.chat.completions.create(**request_data, stream=True)
+        return response_stream
+    async def stream_async(self, request_data: dict) -> AsyncStream[ChatCompletionChunk]:
+        """
+        Performs underlying asynchronous streaming request to OpenAI and returns the async stream iterator.
+        """
+        client = AsyncOpenAI(**self._prepare_client_kwargs())
+        response_stream: AsyncStream[ChatCompletionChunk] = await client.chat.completions.create(**request_data, stream=True)
+        return response_stream
+    def handle_llm_error(self, e: Exception) -> Exception:
+        """
+        Maps OpenAI-specific errors to common LLMError types.
+        """
+        if isinstance(e, openai.APIConnectionError):
+            logger.warning(f"[OpenAI] API connection error: {e}")
+            return LLMConnectionError(
+                message=f"Failed to connect to OpenAI: {str(e)}",
+                code=ErrorCode.INTERNAL_SERVER_ERROR,
+                details={"cause": str(e.__cause__) if e.__cause__ else None},
+            )
+        if isinstance(e, openai.RateLimitError):
+            logger.warning(f"[OpenAI] Rate limited (429). Consider backoff. Error: {e}")
+            return LLMRateLimitError(
+                message=f"Rate limited by OpenAI: {str(e)}",
+                code=ErrorCode.RATE_LIMIT_EXCEEDED,
+                details=e.body,  # Include body which often has rate limit details
+            )
+        if isinstance(e, openai.BadRequestError):
+            logger.warning(f"[OpenAI] Bad request (400): {str(e)}")
+            # BadRequestError can signify different issues (e.g., invalid args, context length)
+            # Check message content if finer-grained errors are needed
+            # Example: if "context_length_exceeded" in str(e): return LLMContextLengthExceededError(...)
+            return LLMBadRequestError(
+                message=f"Bad request to OpenAI: {str(e)}",
+                code=ErrorCode.INVALID_ARGUMENT,  # Or more specific if detectable
+                details=e.body,
+            )
+        if isinstance(e, openai.AuthenticationError):
+            logger.error(f"[OpenAI] Authentication error (401): {str(e)}")  # More severe log level
+            return LLMAuthenticationError(
+                message=f"Authentication failed with OpenAI: {str(e)}", code=ErrorCode.UNAUTHENTICATED, details=e.body
+            )
+        if isinstance(e, openai.PermissionDeniedError):
+            logger.error(f"[OpenAI] Permission denied (403): {str(e)}")  # More severe log level
+            return LLMPermissionDeniedError(
+                message=f"Permission denied by OpenAI: {str(e)}", code=ErrorCode.PERMISSION_DENIED, details=e.body
+            )
+        if isinstance(e, openai.NotFoundError):
+            logger.warning(f"[OpenAI] Resource not found (404): {str(e)}")
+            # Could be invalid model name, etc.
+            return LLMNotFoundError(message=f"Resource not found in OpenAI: {str(e)}", code=ErrorCode.NOT_FOUND, details=e.body)
+        if isinstance(e, openai.UnprocessableEntityError):
+            logger.warning(f"[OpenAI] Unprocessable entity (422): {str(e)}")
+            return LLMUnprocessableEntityError(
+                message=f"Invalid request content for OpenAI: {str(e)}",
+                code=ErrorCode.INVALID_ARGUMENT,  # Usually validation errors
+                details=e.body,
+            )
+        # General API error catch-all
+        if isinstance(e, openai.APIStatusError):
+            logger.warning(f"[OpenAI] API status error ({e.status_code}): {str(e)}")
+            # Map based on status code potentially
+            if e.status_code >= 500:
+                error_cls = LLMServerError
+                error_code = ErrorCode.INTERNAL_SERVER_ERROR
+            else:
+                # Treat other 4xx as bad requests if not caught above
+                error_cls = LLMBadRequestError
+                error_code = ErrorCode.INVALID_ARGUMENT
+            return error_cls(
+                message=f"OpenAI API error: {str(e)}",
+                code=error_code,
+                details={
+                    "status_code": e.status_code,
+                    "response": str(e.response),
+                    "body": e.body,
+                },
+            )
+        # Fallback for unexpected errors
+        return super().handle_llm_error(e)

letta/orm/__init__.py CHANGED Viewed

@@ -2,15 +2,19 @@ from letta.orm.agent import Agent
 from letta.orm.agents_tags import AgentsTags
 from letta.orm.base import Base
 from letta.orm.block import Block
+from letta.orm.block_history import BlockHistory
 from letta.orm.blocks_agents import BlocksAgents
 from letta.orm.file import FileMetadata
 from letta.orm.group import Group
 from letta.orm.groups_agents import GroupsAgents
+from letta.orm.groups_blocks import GroupsBlocks
 from letta.orm.identities_agents import IdentitiesAgents
 from letta.orm.identities_blocks import IdentitiesBlocks
 from letta.orm.identity import Identity
 from letta.orm.job import Job
 from letta.orm.job_messages import JobMessage
+from letta.orm.llm_batch_items import LLMBatchItem
+from letta.orm.llm_batch_job import LLMBatchJob
 from letta.orm.message import Message
 from letta.orm.organization import Organization
 from letta.orm.passage import AgentPassage, BasePassage, SourcePassage

letta/orm/agent.py CHANGED Viewed

@@ -68,6 +68,9 @@ class Agent(SqlalchemyBase, OrganizationMixin):
     message_buffer_autoclear: Mapped[bool] = mapped_column(
         Boolean, doc="If set to True, the agent will not remember previous messages. Not recommended unless you have an advanced use case."
     )
+    enable_sleeptime: Mapped[Optional[bool]] = mapped_column(
+        Boolean, doc="If set to True, memory management will move to a background agent thread."
+    )
     # relationships
     organization: Mapped["Organization"] = relationship("Organization", back_populates="agents")
@@ -141,6 +144,7 @@ class Agent(SqlalchemyBase, OrganizationMixin):
         viewonly=True,
         back_populates="manager_agent",
     )
+    batch_items: Mapped[List["LLMBatchItem"]] = relationship("LLMBatchItem", back_populates="agent", lazy="selectin")
     def to_pydantic(self, include_relationships: Optional[Set[str]] = None) -> PydanticAgentState:
         """
@@ -190,6 +194,7 @@ class Agent(SqlalchemyBase, OrganizationMixin):
             "identity_ids": [],
             "multi_agent_group": None,
             "tool_exec_environment_variables": [],
+            "enable_sleeptime": None,
         }
         # Optional fields: only included if requested
@@ -201,6 +206,7 @@ class Agent(SqlalchemyBase, OrganizationMixin):
             "identity_ids": lambda: [i.id for i in self.identities],
             "multi_agent_group": lambda: self.multi_agent_group,
             "tool_exec_environment_variables": lambda: self.tool_exec_environment_variables,
+            "enable_sleeptime": lambda: self.enable_sleeptime,
         }
         include_relationships = set(optional_fields.keys() if include_relationships is None else include_relationships)

letta/orm/block.py CHANGED Viewed

@@ -1,9 +1,10 @@
 from typing import TYPE_CHECKING, List, Optional, Type
-from sqlalchemy import JSON, BigInteger, Index, Integer, UniqueConstraint, event
-from sqlalchemy.orm import Mapped, attributes, mapped_column, relationship
+from sqlalchemy import JSON, BigInteger, ForeignKey, Index, Integer, String, UniqueConstraint, event
+from sqlalchemy.orm import Mapped, attributes, declared_attr, mapped_column, relationship
 from letta.constants import CORE_MEMORY_BLOCK_CHAR_LIMIT
+from letta.orm.block_history import BlockHistory
 from letta.orm.blocks_agents import BlocksAgents
 from letta.orm.mixins import OrganizationMixin
 from letta.orm.sqlalchemy_base import SqlalchemyBase
@@ -38,6 +39,17 @@ class Block(OrganizationMixin, SqlalchemyBase):
     limit: Mapped[BigInteger] = mapped_column(Integer, default=CORE_MEMORY_BLOCK_CHAR_LIMIT, doc="Character limit of the block.")
     metadata_: Mapped[Optional[dict]] = mapped_column(JSON, default={}, doc="arbitrary information related to the block.")
+    # history pointers / locking mechanisms
+    current_history_entry_id: Mapped[Optional[str]] = mapped_column(
+        String, ForeignKey("block_history.id", name="fk_block_current_history_entry", use_alter=True), nullable=True, index=True
+    )
+    version: Mapped[int] = mapped_column(
+        Integer, nullable=False, default=1, server_default="1", doc="Optimistic locking version counter, incremented on each state change."
+    )
+    # NOTE: This takes advantage of built-in optimistic locking functionality by SqlAlchemy
+    # https://docs.sqlalchemy.org/en/20/orm/versioning.html
+    __mapper_args__ = {"version_id_col": version}
     # relationships
     organization: Mapped[Optional["Organization"]] = relationship("Organization")
     agents: Mapped[List["Agent"]] = relationship(
@@ -55,6 +67,13 @@ class Block(OrganizationMixin, SqlalchemyBase):
         back_populates="blocks",
         passive_deletes=True,
     )
+    groups: Mapped[List["Group"]] = relationship(
+        "Group",
+        secondary="groups_blocks",
+        lazy="selectin",
+        back_populates="shared_blocks",
+        passive_deletes=True,
+    )
     def to_pydantic(self) -> Type:
         match self.label:
@@ -68,6 +87,17 @@ class Block(OrganizationMixin, SqlalchemyBase):
         model_dict["metadata"] = self.metadata_
         return Schema.model_validate(model_dict)
+    @declared_attr
+    def current_history_entry(cls) -> Mapped[Optional["BlockHistory"]]:
+        # Relationship to easily load the specific history entry that is current
+        return relationship(
+            "BlockHistory",
+            primaryjoin=lambda: cls.current_history_entry_id == BlockHistory.id,
+            foreign_keys=[cls.current_history_entry_id],
+            lazy="joined",  # Typically want current history details readily available
+            post_update=True,
+        )  # Helps manage potential FK cycles
 @event.listens_for(Block, "after_update")  # Changed from 'before_update'
 def block_before_update(mapper, connection, target):

letta/orm/block_history.py ADDED Viewed

@@ -0,0 +1,46 @@
+import uuid
+from typing import Optional
+from sqlalchemy import JSON, BigInteger, ForeignKey, Index, Integer, String, Text
+from sqlalchemy.orm import Mapped, mapped_column
+from letta.orm.enums import ActorType
+from letta.orm.mixins import OrganizationMixin
+from letta.orm.sqlalchemy_base import SqlalchemyBase
+class BlockHistory(OrganizationMixin, SqlalchemyBase):
+    """Stores a single historical state of a Block for undo/redo functionality."""
+    __tablename__ = "block_history"
+    __table_args__ = (
+        # PRIMARY lookup index for finding specific history entries & ordering
+        Index("ix_block_history_block_id_sequence", "block_id", "sequence_number", unique=True),
+    )
+    # agent generates its own id
+    # TODO: We want to migrate all the ORM models to do this, so we will need to move this to the SqlalchemyBase
+    # TODO: Some still rely on the Pydantic object to do this
+    id: Mapped[str] = mapped_column(String, primary_key=True, default=lambda: f"block_hist-{uuid.uuid4()}")
+    # Snapshot State Fields (Copied from Block)
+    description: Mapped[Optional[str]] = mapped_column(Text, nullable=True)
+    label: Mapped[str] = mapped_column(String, nullable=False)
+    value: Mapped[str] = mapped_column(Text, nullable=False)
+    limit: Mapped[BigInteger] = mapped_column(BigInteger, nullable=False)
+    metadata_: Mapped[Optional[dict]] = mapped_column(JSON, nullable=True)
+    # Editor info
+    # These are not made to be FKs because these may not always exist (e.g. a User be deleted after they made a checkpoint)
+    actor_type: Mapped[Optional[ActorType]] = mapped_column(String, nullable=True)
+    actor_id: Mapped[Optional[str]] = mapped_column(String, nullable=True)
+    # Relationships
+    block_id: Mapped[str] = mapped_column(
+        String, ForeignKey("block.id", ondelete="CASCADE"), nullable=False  # History deleted if Block is deleted
+    )
+    sequence_number: Mapped[int] = mapped_column(
+        Integer, nullable=False, doc="Monotonically increasing sequence number for the history of a specific block_id, starting from 1."
+    )

letta/orm/custom_columns.py CHANGED Viewed

@@ -2,16 +2,24 @@ from sqlalchemy import JSON
 from sqlalchemy.types import BINARY, TypeDecorator
 from letta.helpers.converters import (
+    deserialize_agent_step_state,
+    deserialize_batch_request_result,
+    deserialize_create_batch_response,
     deserialize_embedding_config,
     deserialize_llm_config,
     deserialize_message_content,
+    deserialize_poll_batch_response,
     deserialize_tool_calls,
     deserialize_tool_returns,
     deserialize_tool_rules,
     deserialize_vector,
+    serialize_agent_step_state,
+    serialize_batch_request_result,
+    serialize_create_batch_response,
     serialize_embedding_config,
     serialize_llm_config,
     serialize_message_content,
+    serialize_poll_batch_response,
     serialize_tool_calls,
     serialize_tool_returns,
     serialize_tool_rules,
@@ -108,3 +116,55 @@ class CommonVector(TypeDecorator):
     def process_result_value(self, value, dialect):
         return deserialize_vector(value, dialect)
+class CreateBatchResponseColumn(TypeDecorator):
+    """Custom SQLAlchemy column type for storing a list of ToolRules as JSON."""
+    impl = JSON
+    cache_ok = True
+    def process_bind_param(self, value, dialect):
+        return serialize_create_batch_response(value)
+    def process_result_value(self, value, dialect):
+        return deserialize_create_batch_response(value)
+class PollBatchResponseColumn(TypeDecorator):
+    """Custom SQLAlchemy column type for storing a list of ToolRules as JSON."""
+    impl = JSON
+    cache_ok = True
+    def process_bind_param(self, value, dialect):
+        return serialize_poll_batch_response(value)
+    def process_result_value(self, value, dialect):
+        return deserialize_poll_batch_response(value)
+class BatchRequestResultColumn(TypeDecorator):
+    """Custom SQLAlchemy column type for storing a list of ToolRules as JSON."""
+    impl = JSON
+    cache_ok = True
+    def process_bind_param(self, value, dialect):
+        return serialize_batch_request_result(value)
+    def process_result_value(self, value, dialect):
+        return deserialize_batch_request_result(value)
+class AgentStepStateColumn(TypeDecorator):
+    """Custom SQLAlchemy column type for storing a list of ToolRules as JSON."""
+    impl = JSON
+    cache_ok = True
+    def process_bind_param(self, value, dialect):
+        return serialize_agent_step_state(value)
+    def process_result_value(self, value, dialect):
+        return deserialize_agent_step_state(value)

letta-nightly 0.6.48.dev20250406104033__py3-none-any.whl → 0.6.49.dev20250408030511__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.6.48.dev20250406104033py3-none-any.whl → 0.6.49.dev20250408030511py3-none-any.whl