PyPI - letta-nightly - Versions diffs - 0.6.37.dev20250310103931__py3-none-any.whl → 0.6.38.dev20250312104155__py3-none-any.whl - Mend

letta-nightly 0.6.37.dev20250310103931py3-none-any.whl → 0.6.38.dev20250312104155py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (32) hide show

letta/__init__.py +1 -1
letta/agent.py +34 -12
letta/client/client.py +1 -50
letta/constants.py +1 -1
letta/functions/function_sets/multi_agent.py +9 -8
letta/functions/helpers.py +33 -6
letta/llm_api/anthropic.py +20 -0
letta/llm_api/google_ai_client.py +332 -0
letta/llm_api/google_vertex_client.py +214 -0
letta/llm_api/llm_client.py +48 -0
letta/llm_api/llm_client_base.py +129 -0
letta/orm/step.py +1 -0
letta/schemas/block.py +4 -48
letta/schemas/letta_message.py +26 -0
letta/schemas/message.py +1 -1
letta/schemas/step.py +1 -0
letta/serialize_schemas/agent.py +8 -1
letta/server/rest_api/interface.py +9 -7
letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +2 -7
letta/server/rest_api/routers/v1/agents.py +12 -8
letta/server/rest_api/routers/v1/steps.py +2 -0
letta/server/rest_api/routers/v1/voice.py +3 -6
letta/services/agent_manager.py +56 -3
letta/services/helpers/agent_manager_helper.py +12 -1
letta/services/identity_manager.py +7 -1
letta/services/message_manager.py +40 -0
letta/services/step_manager.py +8 -1
{letta_nightly-0.6.37.dev20250310103931.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/METADATA +18 -17
{letta_nightly-0.6.37.dev20250310103931.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/RECORD +32 -28
{letta_nightly-0.6.37.dev20250310103931.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/LICENSE +0 -0
{letta_nightly-0.6.37.dev20250310103931.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/WHEEL +0 -0
{letta_nightly-0.6.37.dev20250310103931.dist-info → letta_nightly-0.6.38.dev20250312104155.dist-info}/entry_points.txt +0 -0

letta/llm_api/google_vertex_client.py ADDED Viewed

@@ -0,0 +1,214 @@
+import uuid
+from typing import List, Optional
+from google import genai
+from google.genai.types import FunctionCallingConfig, FunctionCallingConfigMode, GenerateContentResponse, ToolConfig
+from letta.helpers.datetime_helpers import get_utc_time
+from letta.helpers.json_helpers import json_dumps
+from letta.llm_api.google_ai_client import GoogleAIClient
+from letta.local_llm.json_parser import clean_json_string_extra_backslash
+from letta.local_llm.utils import count_tokens
+from letta.schemas.message import Message as PydanticMessage
+from letta.schemas.openai.chat_completion_response import ChatCompletionResponse, Choice, FunctionCall, Message, ToolCall, UsageStatistics
+from letta.settings import model_settings
+from letta.utils import get_tool_call_id
+class GoogleVertexClient(GoogleAIClient):
+    def request(self, request_data: dict) -> dict:
+        """
+        Performs underlying request to llm and returns raw response.
+        """
+        client = genai.Client(
+            vertexai=True,
+            project=model_settings.google_cloud_project,
+            location=model_settings.google_cloud_location,
+            http_options={"api_version": "v1"},
+        )
+        response = client.models.generate_content(
+            model=self.llm_config.model,
+            contents=request_data["contents"],
+            config=request_data["config"],
+        )
+        return response.model_dump()
+    def build_request_data(
+        self,
+        messages: List[PydanticMessage],
+        tools: List[dict],
+        tool_call: Optional[str],
+    ) -> dict:
+        """
+        Constructs a request object in the expected data format for this client.
+        """
+        request_data = super().build_request_data(messages, tools, tool_call)
+        request_data["config"] = request_data.pop("generation_config")
+        request_data["config"]["tools"] = request_data.pop("tools")
+        tool_config = ToolConfig(
+            function_calling_config=FunctionCallingConfig(
+                # ANY mode forces the model to predict only function calls
+                mode=FunctionCallingConfigMode.ANY,
+            )
+        )
+        request_data["config"]["tool_config"] = tool_config.model_dump()
+        return request_data
+    def convert_response_to_chat_completion(
+        self,
+        response_data: dict,
+        input_messages: List[PydanticMessage],
+    ) -> ChatCompletionResponse:
+        """
+        Converts custom response format from llm client into an OpenAI
+        ChatCompletionsResponse object.
+        Example:
+        {
+        "candidates": [
+            {
+                "content": {
+                    "parts": [
+                        {
+                            "text": " OK. Barbie is showing in two theaters in Mountain View, CA: AMC Mountain View 16 and Regal Edwards 14."
+                        }
+                    ]
+                }
+            }
+        ],
+        "usageMetadata": {
+            "promptTokenCount": 9,
+            "candidatesTokenCount": 27,
+            "totalTokenCount": 36
+        }
+        }
+        """
+        response = GenerateContentResponse(**response_data)
+        try:
+            choices = []
+            index = 0
+            for candidate in response.candidates:
+                content = candidate.content
+                role = content.role
+                assert role == "model", f"Unknown role in response: {role}"
+                parts = content.parts
+                # TODO support parts / multimodal
+                # TODO support parallel tool calling natively
+                # TODO Alternative here is to throw away everything else except for the first part
+                for response_message in parts:
+                    # Convert the actual message style to OpenAI style
+                    if response_message.function_call:
+                        function_call = response_message.function_call
+                        function_name = function_call.name
+                        function_args = function_call.args
+                        assert isinstance(function_args, dict), function_args
+                        # NOTE: this also involves stripping the inner monologue out of the function
+                        if self.llm_config.put_inner_thoughts_in_kwargs:
+                            from letta.local_llm.constants import INNER_THOUGHTS_KWARG
+                            assert INNER_THOUGHTS_KWARG in function_args, f"Couldn't find inner thoughts in function args:\n{function_call}"
+                            inner_thoughts = function_args.pop(INNER_THOUGHTS_KWARG)
+                            assert inner_thoughts is not None, f"Expected non-null inner thoughts function arg:\n{function_call}"
+                        else:
+                            inner_thoughts = None
+                        # Google AI API doesn't generate tool call IDs
+                        openai_response_message = Message(
+                            role="assistant",  # NOTE: "model" -> "assistant"
+                            content=inner_thoughts,
+                            tool_calls=[
+                                ToolCall(
+                                    id=get_tool_call_id(),
+                                    type="function",
+                                    function=FunctionCall(
+                                        name=function_name,
+                                        arguments=clean_json_string_extra_backslash(json_dumps(function_args)),
+                                    ),
+                                )
+                            ],
+                        )
+                    else:
+                        # Inner thoughts are the content by default
+                        inner_thoughts = response_message.text
+                        # Google AI API doesn't generate tool call IDs
+                        openai_response_message = Message(
+                            role="assistant",  # NOTE: "model" -> "assistant"
+                            content=inner_thoughts,
+                        )
+                    # Google AI API uses different finish reason strings than OpenAI
+                    # OpenAI: 'stop', 'length', 'function_call', 'content_filter', null
+                    #   see: https://platform.openai.com/docs/guides/text-generation/chat-completions-api
+                    # Google AI API: FINISH_REASON_UNSPECIFIED, STOP, MAX_TOKENS, SAFETY, RECITATION, OTHER
+                    #   see: https://ai.google.dev/api/python/google/ai/generativelanguage/Candidate/FinishReason
+                    finish_reason = candidate.finish_reason.value
+                    if finish_reason == "STOP":
+                        openai_finish_reason = (
+                            "function_call"
+                            if openai_response_message.tool_calls is not None and len(openai_response_message.tool_calls) > 0
+                            else "stop"
+                        )
+                    elif finish_reason == "MAX_TOKENS":
+                        openai_finish_reason = "length"
+                    elif finish_reason == "SAFETY":
+                        openai_finish_reason = "content_filter"
+                    elif finish_reason == "RECITATION":
+                        openai_finish_reason = "content_filter"
+                    else:
+                        raise ValueError(f"Unrecognized finish reason in Google AI response: {finish_reason}")
+                    choices.append(
+                        Choice(
+                            finish_reason=openai_finish_reason,
+                            index=index,
+                            message=openai_response_message,
+                        )
+                    )
+                    index += 1
+            # if len(choices) > 1:
+            #     raise UserWarning(f"Unexpected number of candidates in response (expected 1, got {len(choices)})")
+            # NOTE: some of the Google AI APIs show UsageMetadata in the response, but it seems to not exist?
+            #  "usageMetadata": {
+            #     "promptTokenCount": 9,
+            #     "candidatesTokenCount": 27,
+            #     "totalTokenCount": 36
+            #   }
+            if response.usage_metadata:
+                usage = UsageStatistics(
+                    prompt_tokens=response.usage_metadata.prompt_token_count,
+                    completion_tokens=response.usage_metadata.candidates_token_count,
+                    total_tokens=response.usage_metadata.total_token_count,
+                )
+            else:
+                # Count it ourselves
+                assert input_messages is not None, f"Didn't get UsageMetadata from the API response, so input_messages is required"
+                prompt_tokens = count_tokens(json_dumps(input_messages))  # NOTE: this is a very rough approximation
+                completion_tokens = count_tokens(json_dumps(openai_response_message.model_dump()))  # NOTE: this is also approximate
+                total_tokens = prompt_tokens + completion_tokens
+                usage = UsageStatistics(
+                    prompt_tokens=prompt_tokens,
+                    completion_tokens=completion_tokens,
+                    total_tokens=total_tokens,
+                )
+            response_id = str(uuid.uuid4())
+            return ChatCompletionResponse(
+                id=response_id,
+                choices=choices,
+                model=self.llm_config.model,  # NOTE: Google API doesn't pass back model in the response
+                created=get_utc_time(),
+                usage=usage,
+            )
+        except KeyError as e:
+            raise e

letta/llm_api/llm_client.py ADDED Viewed

@@ -0,0 +1,48 @@
+from typing import Optional
+from letta.llm_api.llm_client_base import LLMClientBase
+from letta.schemas.llm_config import LLMConfig
+class LLMClient:
+    """Factory class for creating LLM clients based on the model endpoint type."""
+    @staticmethod
+    def create(
+        agent_id: str,
+        llm_config: LLMConfig,
+        put_inner_thoughts_first: bool = True,
+        actor_id: Optional[str] = None,
+    ) -> Optional[LLMClientBase]:
+        """
+        Create an LLM client based on the model endpoint type.
+        Args:
+            agent_id: Unique identifier for the agent
+            llm_config: Configuration for the LLM model
+            put_inner_thoughts_first: Whether to put inner thoughts first in the response
+            use_structured_output: Whether to use structured output
+            use_tool_naming: Whether to use tool naming
+            actor_id: Optional actor identifier
+        Returns:
+            An instance of LLMClientBase subclass
+        Raises:
+            ValueError: If the model endpoint type is not supported
+        """
+        match llm_config.model_endpoint_type:
+            case "google_ai":
+                from letta.llm_api.google_ai_client import GoogleAIClient
+                return GoogleAIClient(
+                    agent_id=agent_id, llm_config=llm_config, put_inner_thoughts_first=put_inner_thoughts_first, actor_id=actor_id
+                )
+            case "google_vertex":
+                from letta.llm_api.google_vertex_client import GoogleVertexClient
+                return GoogleVertexClient(
+                    agent_id=agent_id, llm_config=llm_config, put_inner_thoughts_first=put_inner_thoughts_first, actor_id=actor_id
+                )
+            case _:
+                return None

letta/llm_api/llm_client_base.py ADDED Viewed

@@ -0,0 +1,129 @@
+from abc import abstractmethod
+from typing import List, Optional, Union
+from openai import AsyncStream, Stream
+from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
+from letta.schemas.llm_config import LLMConfig
+from letta.schemas.message import Message
+from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
+from letta.tracing import log_event
+class LLMClientBase:
+    """
+    Abstract base class for LLM clients, formatting the request objects,
+    handling the downstream request and parsing into chat completions response format
+    """
+    def __init__(
+        self,
+        agent_id: str,
+        llm_config: LLMConfig,
+        put_inner_thoughts_first: Optional[bool] = True,
+        use_structured_output: Optional[bool] = True,
+        use_tool_naming: bool = True,
+        actor_id: Optional[str] = None,
+    ):
+        self.agent_id = agent_id
+        self.llm_config = llm_config
+        self.put_inner_thoughts_first = put_inner_thoughts_first
+        self.actor_id = actor_id
+    def send_llm_request(
+        self,
+        messages: List[Message],
+        tools: Optional[List[dict]] = None,  # TODO: change to Tool object
+        tool_call: Optional[str] = None,
+        stream: bool = False,
+        first_message: bool = False,
+        force_tool_call: Optional[str] = None,
+    ) -> Union[ChatCompletionResponse, Stream[ChatCompletionChunk]]:
+        """
+        Issues a request to the downstream model endpoint and parses response.
+        If stream=True, returns a Stream[ChatCompletionChunk] that can be iterated over.
+        Otherwise returns a ChatCompletionResponse.
+        """
+        request_data = self.build_request_data(messages, tools, tool_call)
+        log_event(name="llm_request_sent", attributes=request_data)
+        if stream:
+            return self.stream(request_data)
+        else:
+            response_data = self.request(request_data)
+            log_event(name="llm_response_received", attributes=response_data)
+            return self.convert_response_to_chat_completion(response_data, messages)
+    async def send_llm_request_async(
+        self,
+        messages: List[Message],
+        tools: Optional[List[dict]] = None,  # TODO: change to Tool object
+        tool_call: Optional[str] = None,
+        stream: bool = False,
+        first_message: bool = False,
+        force_tool_call: Optional[str] = None,
+    ) -> Union[ChatCompletionResponse, AsyncStream[ChatCompletionChunk]]:
+        """
+        Issues a request to the downstream model endpoint.
+        If stream=True, returns an AsyncStream[ChatCompletionChunk] that can be async iterated over.
+        Otherwise returns a ChatCompletionResponse.
+        """
+        request_data = self.build_request_data(messages, tools, tool_call)
+        log_event(name="llm_request_sent", attributes=request_data)
+        if stream:
+            return await self.stream_async(request_data)
+        else:
+            response_data = await self.request_async(request_data)
+            log_event(name="llm_response_received", attributes=response_data)
+            return self.convert_response_to_chat_completion(response_data, messages)
+    @abstractmethod
+    def build_request_data(
+        self,
+        messages: List[Message],
+        tools: List[dict],
+        tool_call: Optional[str],
+    ) -> dict:
+        """
+        Constructs a request object in the expected data format for this client.
+        """
+        raise NotImplementedError
+    @abstractmethod
+    def request(self, request_data: dict) -> dict:
+        """
+        Performs underlying request to llm and returns raw response.
+        """
+        raise NotImplementedError
+    @abstractmethod
+    async def request_async(self, request_data: dict) -> dict:
+        """
+        Performs underlying request to llm and returns raw response.
+        """
+        raise NotImplementedError
+    @abstractmethod
+    def convert_response_to_chat_completion(
+        self,
+        response_data: dict,
+        input_messages: List[Message],
+    ) -> ChatCompletionResponse:
+        """
+        Converts custom response format from llm client into an OpenAI
+        ChatCompletionsResponse object.
+        """
+        raise NotImplementedError
+    @abstractmethod
+    def stream(self, request_data: dict) -> Stream[ChatCompletionChunk]:
+        """
+        Performs underlying streaming request to llm and returns raw response.
+        """
+        raise NotImplementedError(f"Streaming is not supported for {self.llm_config.model_endpoint_type}")
+    @abstractmethod
+    async def stream_async(self, request_data: dict) -> AsyncStream[ChatCompletionChunk]:
+        """
+        Performs underlying streaming request to llm and returns raw response.
+        """
+        raise NotImplementedError(f"Streaming is not supported for {self.llm_config.model_endpoint_type}")

letta/orm/step.py CHANGED Viewed

@@ -33,6 +33,7 @@ class Step(SqlalchemyBase):
     job_id: Mapped[Optional[str]] = mapped_column(
         ForeignKey("jobs.id", ondelete="SET NULL"), nullable=True, doc="The unique identified of the job run that triggered this step"
     )
+    agent_id: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the model used for this step.")
     provider_name: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the provider used for this step.")
     model: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The name of the model used for this step.")
     model_endpoint: Mapped[Optional[str]] = mapped_column(None, nullable=True, doc="The model endpoint url used for this step.")

letta/schemas/block.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from typing import Optional
-from pydantic import BaseModel, Field, model_validator
+from pydantic import Field, model_validator
 from typing_extensions import Self
 from letta.constants import CORE_MEMORY_BLOCK_CHAR_LIMIT
@@ -37,7 +37,8 @@ class BaseBlock(LettaBase, validate_assignment=True):
     @model_validator(mode="after")
     def verify_char_limit(self) -> Self:
-        if self.value and len(self.value) > self.limit:
+        # self.limit can be None from
+        if self.limit is not None and self.value and len(self.value) > self.limit:
             error_msg = f"Edit failed: Exceeds {self.limit} character limit (requested {len(self.value)}) - {str(self)}."
             raise ValueError(error_msg)
@@ -89,61 +90,16 @@ class Persona(Block):
     label: str = "persona"
-# class CreateBlock(BaseBlock):
-#    """Create a block"""
-#
-#    is_template: bool = True
-#    label: str = Field(..., description="Label of the block.")
-class BlockLabelUpdate(BaseModel):
-    """Update the label of a block"""
-    current_label: str = Field(..., description="Current label of the block.")
-    new_label: str = Field(..., description="New label of the block.")
-# class CreatePersona(CreateBlock):
-#    """Create a persona block"""
-#
-#    label: str = "persona"
-#
-#
-# class CreateHuman(CreateBlock):
-#    """Create a human block"""
-#
-#    label: str = "human"
 class BlockUpdate(BaseBlock):
     """Update a block"""
-    limit: Optional[int] = Field(CORE_MEMORY_BLOCK_CHAR_LIMIT, description="Character limit of the block.")
+    limit: Optional[int] = Field(None, description="Character limit of the block.")
     value: Optional[str] = Field(None, description="Value of the block.")
     class Config:
         extra = "ignore"  # Ignores extra fields
-class BlockLimitUpdate(BaseModel):
-    """Update the limit of a block"""
-    label: str = Field(..., description="Label of the block.")
-    limit: int = Field(..., description="New limit of the block.")
-# class UpdatePersona(BlockUpdate):
-#    """Update a persona block"""
-#
-#    label: str = "persona"
-#
-#
-# class UpdateHuman(BlockUpdate):
-#    """Update a human block"""
-#
-#    label: str = "human"
 class CreateBlock(BaseBlock):
     """Create a block"""

letta/schemas/letta_message.py CHANGED Viewed

@@ -236,6 +236,32 @@ LettaMessageUnion = Annotated[
 ]
+class UpdateSystemMessage(BaseModel):
+    content: Union[str, List[MessageContentUnion]]
+    message_type: Literal["system_message"] = "system_message"
+class UpdateUserMessage(BaseModel):
+    content: Union[str, List[MessageContentUnion]]
+    message_type: Literal["user_message"] = "user_message"
+class UpdateReasoningMessage(BaseModel):
+    reasoning: Union[str, List[MessageContentUnion]]
+    message_type: Literal["reasoning_message"] = "reasoning_message"
+class UpdateAssistantMessage(BaseModel):
+    content: Union[str, List[MessageContentUnion]]
+    message_type: Literal["assistant_message"] = "assistant_message"
+LettaMessageUpdateUnion = Annotated[
+    Union[UpdateSystemMessage, UpdateUserMessage, UpdateReasoningMessage, UpdateAssistantMessage],
+    Field(discriminator="message_type"),
+]
 def create_letta_message_union_schema():
     return {
         "oneOf": [

letta/schemas/message.py CHANGED Viewed

@@ -74,7 +74,7 @@ class MessageUpdate(BaseModel):
     """Request to update a message"""
     role: Optional[MessageRole] = Field(None, description="The role of the participant.")
-    content: Optional[Union[str, List[MessageContentUnion]]] = Field(..., description="The content of the message.")
+    content: Optional[Union[str, List[MessageContentUnion]]] = Field(None, description="The content of the message.")
     # NOTE: probably doesn't make sense to allow remapping user_id or agent_id (vs creating a new message)
     # user_id: Optional[str] = Field(None, description="The unique identifier of the user.")
     # agent_id: Optional[str] = Field(None, description="The unique identifier of the agent.")

letta/schemas/step.py CHANGED Viewed

@@ -18,6 +18,7 @@ class Step(StepBase):
     job_id: Optional[str] = Field(
         None, description="The unique identifier of the job that this step belongs to. Only included for async calls."
     )
+    agent_id: Optional[str] = Field(None, description="The ID of the agent that performed the step.")
     provider_name: Optional[str] = Field(None, description="The name of the provider used for this step.")
     model: Optional[str] = Field(None, description="The name of the model used for this step.")
     model_endpoint: Optional[str] = Field(None, description="The model endpoint url used for this step.")

letta/serialize_schemas/agent.py CHANGED Viewed

@@ -70,4 +70,11 @@ class SerializedAgentSchema(BaseSchema):
     class Meta(BaseSchema.Meta):
         model = Agent
         # TODO: Serialize these as well...
-        exclude = BaseSchema.Meta.exclude + ("sources", "source_passages", "agent_passages")
+        exclude = BaseSchema.Meta.exclude + (
+            "project_id",
+            "template_id",
+            "base_template_id",
+            "sources",
+            "source_passages",
+            "agent_passages",
+        )

letta/server/rest_api/interface.py CHANGED Viewed

@@ -918,13 +918,15 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
             # skip if there's a finish
             return None
         else:
-            # Example case that would trigger here:
-            # id='chatcmpl-AKtUvREgRRvgTW6n8ZafiKuV0mxhQ'
-            # choices=[ChunkChoice(finish_reason=None, index=0, delta=MessageDelta(content=None, tool_calls=None, function_call=None), logprobs=None)]
-            # created=datetime.datetime(2024, 10, 21, 20, 40, 57, tzinfo=TzInfo(UTC))
-            # model='gpt-4o-mini-2024-07-18'
-            # object='chat.completion.chunk'
-            warnings.warn(f"Couldn't find delta in chunk: {chunk}")
+            # Only warn for non-Claude models since Claude commonly has empty first chunks
+            if not chunk.model.startswith("claude-"):
+                # Example case that would trigger here:
+                # id='chatcmpl-AKtUvREgRRvgTW6n8ZafiKuV0mxhQ'
+                # choices=[ChunkChoice(finish_reason=None, index=0, delta=MessageDelta(content=None, tool_calls=None, function_call=None), logprobs=None)]
+                # created=datetime.datetime(2024, 10, 21, 20, 40, 57, tzinfo=TzInfo(UTC))
+                # model='gpt-4o-mini-2024-07-18'
+                # object='chat.completion.chunk'
+                warnings.warn(f"Couldn't find delta in chunk: {chunk}")
             return None
         return processed_chunk

letta/server/rest_api/routers/openai/chat_completions/chat_completions.py CHANGED Viewed

@@ -24,7 +24,7 @@ logger = get_logger(__name__)
 @router.post(
-    "/chat/completions",
+    "/{agent_id}/chat/completions",
     response_model=None,
     operation_id="create_chat_completions",
     responses={
@@ -37,6 +37,7 @@ logger = get_logger(__name__)
     },
 )
 async def create_chat_completions(
+    agent_id: str,
     completion_request: CompletionCreateParams = Body(...),
     server: "SyncServer" = Depends(get_letta_server),
     user_id: Optional[str] = Header(None, alias="user_id"),
@@ -51,12 +52,6 @@ async def create_chat_completions(
     actor = server.user_manager.get_user_or_default(user_id=user_id)
-    agent_id = str(completion_request.get("user", None))
-    if agent_id is None:
-        error_msg = "Must pass agent_id in the 'user' field"
-        logger.error(error_msg)
-        raise HTTPException(status_code=400, detail=error_msg)
     letta_agent = server.load_agent(agent_id=agent_id, actor=actor)
     llm_config = letta_agent.agent_state.llm_config
     if llm_config.model_endpoint_type != "openai" or "inference.memgpt.ai" in llm_config.model_endpoint:

letta/server/rest_api/routers/v1/agents.py CHANGED Viewed

@@ -13,13 +13,12 @@ from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
 from letta.log import get_logger
 from letta.orm.errors import NoResultFound
 from letta.schemas.agent import AgentState, CreateAgent, UpdateAgent
-from letta.schemas.block import Block, BlockUpdate, CreateBlock  # , BlockLabelUpdate, BlockLimitUpdate
+from letta.schemas.block import Block, BlockUpdate
 from letta.schemas.job import JobStatus, JobUpdate, LettaRequestConfig
-from letta.schemas.letta_message import LettaMessageUnion
+from letta.schemas.letta_message import LettaMessageUnion, LettaMessageUpdateUnion
 from letta.schemas.letta_request import LettaRequest, LettaStreamingRequest
 from letta.schemas.letta_response import LettaResponse
 from letta.schemas.memory import ContextWindowOverview, CreateArchivalMemory, Memory
-from letta.schemas.message import Message, MessageUpdate
 from letta.schemas.passage import Passage, PassageUpdate
 from letta.schemas.run import Run
 from letta.schemas.source import Source
@@ -119,6 +118,7 @@ async def upload_agent_serialized(
         True,
         description="If set to True, existing tools can get their source code overwritten by the uploaded tool definitions. Note that Letta core tools can never be updated externally.",
     ),
+    project_id: Optional[str] = Query(None, description="The project ID to associate the uploaded agent with."),
 ):
     """
     Upload a serialized agent JSON file and recreate the agent in the system.
@@ -129,7 +129,11 @@ async def upload_agent_serialized(
         serialized_data = await file.read()
         agent_json = json.loads(serialized_data)
         new_agent = server.agent_manager.deserialize(
-            serialized_agent=agent_json, actor=actor, append_copy_suffix=append_copy_suffix, override_existing_tools=override_existing_tools
+            serialized_agent=agent_json,
+            actor=actor,
+            append_copy_suffix=append_copy_suffix,
+            override_existing_tools=override_existing_tools,
+            project_id=project_id,
         )
         return new_agent
@@ -526,20 +530,20 @@ def list_messages(
     )
-@router.patch("/{agent_id}/messages/{message_id}", response_model=Message, operation_id="modify_message")
+@router.patch("/{agent_id}/messages/{message_id}", response_model=LettaMessageUpdateUnion, operation_id="modify_message")
 def modify_message(
     agent_id: str,
     message_id: str,
-    request: MessageUpdate = Body(...),
+    request: LettaMessageUpdateUnion = Body(...),
     server: "SyncServer" = Depends(get_letta_server),
     actor_id: Optional[str] = Header(None, alias="user_id"),  # Extract user_id from header, default to None if not present
 ):
     """
     Update the details of a message associated with an agent.
     """
-    # TODO: Get rid of agent_id here, it's not really relevant
+    # TODO: support modifying tool calls/returns
     actor = server.user_manager.get_user_or_default(user_id=actor_id)
-    return server.message_manager.update_message_by_id(message_id=message_id, message_update=request, actor=actor)
+    return server.message_manager.update_message_by_letta_message(message_id=message_id, letta_message_update=request, actor=actor)
 @router.post(

letta-nightly 0.6.37.dev20250310103931__py3-none-any.whl → 0.6.38.dev20250312104155__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.6.37.dev20250310103931py3-none-any.whl → 0.6.38.dev20250312104155py3-none-any.whl