PyPI - letta-nightly - Versions diffs - 0.6.37.dev20250311104150__py3-none-any.whl → 0.6.39.dev20250313104142__py3-none-any.whl - Mend

letta-nightly 0.6.37.dev20250311104150py3-none-any.whl → 0.6.39.dev20250313104142py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (58) hide show

letta/__init__.py +1 -1
letta/agent.py +83 -23
letta/agents/low_latency_agent.py +3 -2
letta/client/client.py +1 -50
letta/constants.py +4 -1
letta/functions/function_sets/base.py +1 -1
letta/functions/function_sets/multi_agent.py +9 -8
letta/functions/helpers.py +47 -6
letta/functions/schema_generator.py +47 -0
letta/helpers/mcp_helpers.py +108 -0
letta/llm_api/cohere.py +1 -1
letta/llm_api/google_ai_client.py +332 -0
letta/llm_api/google_vertex_client.py +214 -0
letta/llm_api/helpers.py +1 -2
letta/llm_api/llm_api_tools.py +0 -1
letta/llm_api/llm_client.py +48 -0
letta/llm_api/llm_client_base.py +129 -0
letta/local_llm/utils.py +30 -20
letta/log.py +1 -1
letta/memory.py +1 -1
letta/orm/__init__.py +1 -0
letta/orm/block.py +8 -0
letta/orm/enums.py +2 -0
letta/orm/identities_blocks.py +13 -0
letta/orm/identity.py +9 -0
letta/orm/sqlalchemy_base.py +4 -4
letta/orm/step.py +1 -0
letta/schemas/block.py +4 -48
letta/schemas/identity.py +3 -0
letta/schemas/letta_message.py +26 -0
letta/schemas/message.py +69 -63
letta/schemas/step.py +1 -0
letta/schemas/tool.py +39 -2
letta/serialize_schemas/agent.py +8 -1
letta/server/rest_api/app.py +15 -0
letta/server/rest_api/chat_completions_interface.py +2 -0
letta/server/rest_api/interface.py +46 -13
letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +2 -7
letta/server/rest_api/routers/v1/agents.py +14 -10
letta/server/rest_api/routers/v1/blocks.py +5 -1
letta/server/rest_api/routers/v1/steps.py +2 -0
letta/server/rest_api/routers/v1/tools.py +71 -1
letta/server/rest_api/routers/v1/voice.py +3 -6
letta/server/server.py +102 -5
letta/services/agent_manager.py +58 -3
letta/services/block_manager.py +10 -1
letta/services/helpers/agent_manager_helper.py +12 -1
letta/services/identity_manager.py +61 -15
letta/services/message_manager.py +40 -0
letta/services/step_manager.py +8 -1
letta/services/summarizer/summarizer.py +1 -1
letta/services/tool_manager.py +6 -0
letta/settings.py +11 -12
{letta_nightly-0.6.37.dev20250311104150.dist-info → letta_nightly-0.6.39.dev20250313104142.dist-info}/METADATA +20 -18
{letta_nightly-0.6.37.dev20250311104150.dist-info → letta_nightly-0.6.39.dev20250313104142.dist-info}/RECORD +58 -52
{letta_nightly-0.6.37.dev20250311104150.dist-info → letta_nightly-0.6.39.dev20250313104142.dist-info}/LICENSE +0 -0
{letta_nightly-0.6.37.dev20250311104150.dist-info → letta_nightly-0.6.39.dev20250313104142.dist-info}/WHEEL +0 -0
{letta_nightly-0.6.37.dev20250311104150.dist-info → letta_nightly-0.6.39.dev20250313104142.dist-info}/entry_points.txt +0 -0

letta/schemas/message.py CHANGED Viewed

@@ -74,7 +74,7 @@ class MessageUpdate(BaseModel):
     """Request to update a message"""
     role: Optional[MessageRole] = Field(None, description="The role of the participant.")
-    content: Optional[Union[str, List[MessageContentUnion]]] = Field(..., description="The content of the message.")
+    content: Optional[Union[str, List[MessageContentUnion]]] = Field(None, description="The content of the message.")
     # NOTE: probably doesn't make sense to allow remapping user_id or agent_id (vs creating a new message)
     # user_id: Optional[str] = Field(None, description="The unique identifier of the user.")
     # agent_id: Optional[str] = Field(None, description="The unique identifier of the agent.")
@@ -158,19 +158,6 @@ class Message(BaseMessage):
             del data["content"]
         return data
-    @property
-    def text(self) -> Optional[str]:
-        """
-        Retrieve the first text content's text.
-        Returns:
-            str: The text content, or None if no text content exists
-        """
-        if not self.content:
-            return None
-        text_content = [content.text for content in self.content if content.type == MessageContentType.text]
-        return text_content[0] if text_content else None
     def to_json(self):
         json_message = vars(self)
         if json_message["tool_calls"] is not None:
@@ -227,17 +214,21 @@ class Message(BaseMessage):
         assistant_message_tool_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
     ) -> List[LettaMessage]:
         """Convert message object (in DB format) to the style used by the original Letta API"""
+        if self.content and len(self.content) == 1 and self.content[0].type == MessageContentType.text:
+            text_content = self.content[0].text
+        else:
+            text_content = None
         messages = []
         if self.role == MessageRole.assistant:
-            if self.text is not None:
+            if text_content is not None:
                 # This is type InnerThoughts
                 messages.append(
                     ReasoningMessage(
                         id=self.id,
                         date=self.created_at,
-                        reasoning=self.text,
+                        reasoning=text_content,
                     )
                 )
             if self.tool_calls is not None:
@@ -281,9 +272,9 @@ class Message(BaseMessage):
             #         "message": response_string,
             #         "time": formatted_time,
             #     }
-            assert self.text is not None, self
+            assert text_content is not None, self
             try:
-                function_return = json.loads(self.text)
+                function_return = json.loads(text_content)
                 status = function_return["status"]
                 if status == "OK":
                     status_enum = "success"
@@ -292,7 +283,7 @@ class Message(BaseMessage):
                 else:
                     raise ValueError(f"Invalid status: {status}")
             except json.JSONDecodeError:
-                raise ValueError(f"Failed to decode function return: {self.text}")
+                raise ValueError(f"Failed to decode function return: {text_content}")
             assert self.tool_call_id is not None
             messages.append(
                 # TODO make sure this is what the API returns
@@ -300,7 +291,7 @@ class Message(BaseMessage):
                 ToolReturnMessage(
                     id=self.id,
                     date=self.created_at,
-                    tool_return=self.text,
+                    tool_return=text_content,
                     status=self.tool_returns[0].status if self.tool_returns else status_enum,
                     tool_call_id=self.tool_call_id,
                     stdout=self.tool_returns[0].stdout if self.tool_returns else None,
@@ -309,23 +300,23 @@ class Message(BaseMessage):
             )
         elif self.role == MessageRole.user:
             # This is type UserMessage
-            assert self.text is not None, self
-            message_str = unpack_message(self.text)
+            assert text_content is not None, self
+            message_str = unpack_message(text_content)
             messages.append(
                 UserMessage(
                     id=self.id,
                     date=self.created_at,
-                    content=message_str or self.text,
+                    content=message_str or text_content,
                 )
             )
         elif self.role == MessageRole.system:
             # This is type SystemMessage
-            assert self.text is not None, self
+            assert text_content is not None, self
             messages.append(
                 SystemMessage(
                     id=self.id,
                     date=self.created_at,
-                    content=self.text,
+                    content=text_content,
                 )
             )
         else:
@@ -494,11 +485,15 @@ class Message(BaseMessage):
         """Go from Message class to ChatCompletion message object"""
         # TODO change to pydantic casting, eg `return SystemMessageModel(self)`
+        if self.content and len(self.content) == 1 and self.content[0].type == MessageContentType.text:
+            text_content = self.content[0].text
+        else:
+            text_content = None
         if self.role == "system":
             assert all([v is not None for v in [self.role]]), vars(self)
             openai_message = {
-                "content": self.text,
+                "content": text_content,
                 "role": self.role,
             }
             # Optional field, do not include if null
@@ -506,9 +501,9 @@ class Message(BaseMessage):
                 openai_message["name"] = self.name
         elif self.role == "user":
-            assert all([v is not None for v in [self.text, self.role]]), vars(self)
+            assert all([v is not None for v in [text_content, self.role]]), vars(self)
             openai_message = {
-                "content": self.text,
+                "content": text_content,
                 "role": self.role,
             }
             # Optional field, do not include if null
@@ -516,9 +511,9 @@ class Message(BaseMessage):
                 openai_message["name"] = self.name
         elif self.role == "assistant":
-            assert self.tool_calls is not None or self.text is not None
+            assert self.tool_calls is not None or text_content is not None
             openai_message = {
-                "content": None if put_inner_thoughts_in_kwargs else self.text,
+                "content": None if put_inner_thoughts_in_kwargs else text_content,
                 "role": self.role,
             }
             # Optional fields, do not include if null
@@ -530,7 +525,7 @@ class Message(BaseMessage):
                     openai_message["tool_calls"] = [
                         add_inner_thoughts_to_tool_call(
                             tool_call,
-                            inner_thoughts=self.text,
+                            inner_thoughts=text_content,
                             inner_thoughts_key=INNER_THOUGHTS_KWARG,
                         ).model_dump()
                         for tool_call in self.tool_calls
@@ -544,7 +539,7 @@ class Message(BaseMessage):
         elif self.role == "tool":
             assert all([v is not None for v in [self.role, self.tool_call_id]]), vars(self)
             openai_message = {
-                "content": self.text,
+                "content": text_content,
                 "role": self.role,
                 "tool_call_id": self.tool_call_id[:max_tool_id_length] if max_tool_id_length else self.tool_call_id,
             }
@@ -565,6 +560,10 @@ class Message(BaseMessage):
         Args:
             inner_thoughts_xml_tag (str): The XML tag to wrap around inner thoughts
         """
+        if self.content and len(self.content) == 1 and self.content[0].type == MessageContentType.text:
+            text_content = self.content[0].text
+        else:
+            text_content = None
         def add_xml_tag(string: str, xml_tag: Optional[str]):
             # NOTE: Anthropic docs recommends using <thinking> tag when using CoT + tool use
@@ -573,34 +572,34 @@ class Message(BaseMessage):
         if self.role == "system":
             # NOTE: this is not for system instructions, but instead system "events"
-            assert all([v is not None for v in [self.text, self.role]]), vars(self)
+            assert all([v is not None for v in [text_content, self.role]]), vars(self)
             # Two options here, we would use system.package_system_message,
             # or use a more Anthropic-specific packaging ie xml tags
-            user_system_event = add_xml_tag(string=f"SYSTEM ALERT: {self.text}", xml_tag="event")
+            user_system_event = add_xml_tag(string=f"SYSTEM ALERT: {text_content}", xml_tag="event")
             anthropic_message = {
                 "content": user_system_event,
                 "role": "user",
             }
         elif self.role == "user":
-            assert all([v is not None for v in [self.text, self.role]]), vars(self)
+            assert all([v is not None for v in [text_content, self.role]]), vars(self)
             anthropic_message = {
-                "content": self.text,
+                "content": text_content,
                 "role": self.role,
             }
         elif self.role == "assistant":
-            assert self.tool_calls is not None or self.text is not None
+            assert self.tool_calls is not None or text_content is not None
             anthropic_message = {
                 "role": self.role,
             }
             content = []
             # COT / reasoning / thinking
-            if self.text is not None and not put_inner_thoughts_in_kwargs:
+            if text_content is not None and not put_inner_thoughts_in_kwargs:
                 content.append(
                     {
                         "type": "text",
-                        "text": add_xml_tag(string=self.text, xml_tag=inner_thoughts_xml_tag),
+                        "text": add_xml_tag(string=text_content, xml_tag=inner_thoughts_xml_tag),
                     }
                 )
             # Tool calling
@@ -610,7 +609,7 @@ class Message(BaseMessage):
                     if put_inner_thoughts_in_kwargs:
                         tool_call_input = add_inner_thoughts_to_tool_call(
                             tool_call,
-                            inner_thoughts=self.text,
+                            inner_thoughts=text_content,
                             inner_thoughts_key=INNER_THOUGHTS_KWARG,
                         ).model_dump()
                     else:
@@ -639,7 +638,7 @@ class Message(BaseMessage):
                     {
                         "type": "tool_result",
                         "tool_use_id": self.tool_call_id,
-                        "content": self.text,
+                        "content": text_content,
                     }
                 ],
             }
@@ -656,6 +655,10 @@ class Message(BaseMessage):
         # type Content: https://ai.google.dev/api/rest/v1/Content / https://ai.google.dev/api/rest/v1beta/Content
         #     parts[]: Part
         #     role: str ('user' or 'model')
+        if self.content and len(self.content) == 1 and self.content[0].type == MessageContentType.text:
+            text_content = self.content[0].text
+        else:
+            text_content = None
         if self.role != "tool" and self.name is not None:
             warnings.warn(f"Using Google AI with non-null 'name' field ({self.name}) not yet supported.")
@@ -665,18 +668,18 @@ class Message(BaseMessage):
             # https://www.reddit.com/r/Bard/comments/1b90i8o/does_gemini_have_a_system_prompt_option_while/
             google_ai_message = {
                 "role": "user",  # NOTE: no 'system'
-                "parts": [{"text": self.text}],
+                "parts": [{"text": text_content}],
             }
         elif self.role == "user":
-            assert all([v is not None for v in [self.text, self.role]]), vars(self)
+            assert all([v is not None for v in [text_content, self.role]]), vars(self)
             google_ai_message = {
                 "role": "user",
-                "parts": [{"text": self.text}],
+                "parts": [{"text": text_content}],
             }
         elif self.role == "assistant":
-            assert self.tool_calls is not None or self.text is not None
+            assert self.tool_calls is not None or text_content is not None
             google_ai_message = {
                 "role": "model",  # NOTE: different
             }
@@ -684,10 +687,10 @@ class Message(BaseMessage):
             # NOTE: Google AI API doesn't allow non-null content + function call
             # To get around this, just two a two part message, inner thoughts first then
             parts = []
-            if not put_inner_thoughts_in_kwargs and self.text is not None:
+            if not put_inner_thoughts_in_kwargs and text_content is not None:
                 # NOTE: ideally we do multi-part for CoT / inner thoughts + function call, but Google AI API doesn't allow it
                 raise NotImplementedError
-                parts.append({"text": self.text})
+                parts.append({"text": text_content})
             if self.tool_calls is not None:
                 # NOTE: implied support for multiple calls
@@ -701,10 +704,10 @@ class Message(BaseMessage):
                         raise UserWarning(f"Failed to parse JSON function args: {function_args}")
                         function_args = {"args": function_args}
-                    if put_inner_thoughts_in_kwargs and self.text is not None:
+                    if put_inner_thoughts_in_kwargs and text_content is not None:
                         assert "inner_thoughts" not in function_args, function_args
                         assert len(self.tool_calls) == 1
-                        function_args[INNER_THOUGHTS_KWARG] = self.text
+                        function_args[INNER_THOUGHTS_KWARG] = text_content
                     parts.append(
                         {
@@ -715,8 +718,8 @@ class Message(BaseMessage):
                         }
                     )
             else:
-                assert self.text is not None
-                parts.append({"text": self.text})
+                assert text_content is not None
+                parts.append({"text": text_content})
             google_ai_message["parts"] = parts
         elif self.role == "tool":
@@ -731,9 +734,9 @@ class Message(BaseMessage):
             # NOTE: Google AI API wants the function response as JSON only, no string
             try:
-                function_response = json.loads(self.text)
+                function_response = json.loads(text_content)
             except:
-                function_response = {"function_response": self.text}
+                function_response = {"function_response": text_content}
             google_ai_message = {
                 "role": "function",
@@ -778,7 +781,10 @@ class Message(BaseMessage):
         # TODO: update this prompt style once guidance from Cohere on
         # embedded function calls in multi-turn conversation become more clear
+        if self.content and len(self.content) == 1 and self.content[0].type == MessageContentType.text:
+            text_content = self.content[0].text
+        else:
+            text_content = None
         if self.role == "system":
             """
             The chat_history parameter should not be used for SYSTEM messages in most cases.
@@ -787,26 +793,26 @@ class Message(BaseMessage):
             raise UserWarning(f"role 'system' messages should go in 'preamble' field for Cohere API")
         elif self.role == "user":
-            assert all([v is not None for v in [self.text, self.role]]), vars(self)
+            assert all([v is not None for v in [text_content, self.role]]), vars(self)
             cohere_message = [
                 {
                     "role": "USER",
-                    "message": self.text,
+                    "message": text_content,
                 }
             ]
         elif self.role == "assistant":
             # NOTE: we may break this into two message - an inner thought and a function call
             # Optionally, we could just make this a function call with the inner thought inside
-            assert self.tool_calls is not None or self.text is not None
+            assert self.tool_calls is not None or text_content is not None
-            if self.text and self.tool_calls:
+            if text_content and self.tool_calls:
                 if inner_thoughts_as_kwarg:
                     raise NotImplementedError
                 cohere_message = [
                     {
                         "role": "CHATBOT",
-                        "message": self.text,
+                        "message": text_content,
                     },
                 ]
                 for tc in self.tool_calls:
@@ -820,7 +826,7 @@ class Message(BaseMessage):
                             "message": f"{function_call_prefix} {function_call_text}",
                         }
                     )
-            elif not self.text and self.tool_calls:
+            elif not text_content and self.tool_calls:
                 cohere_message = []
                 for tc in self.tool_calls:
                     # TODO better way to pack?
@@ -831,11 +837,11 @@ class Message(BaseMessage):
                             "message": f"{function_call_prefix} {function_call_text}",
                         }
                     )
-            elif self.text and not self.tool_calls:
+            elif text_content and not self.tool_calls:
                 cohere_message = [
                     {
                         "role": "CHATBOT",
-                        "message": self.text,
+                        "message": text_content,
                     }
                 ]
             else:
@@ -843,7 +849,7 @@ class Message(BaseMessage):
         elif self.role == "tool":
             assert all([v is not None for v in [self.role, self.tool_call_id]]), vars(self)
-            function_response_text = self.text
+            function_response_text = text_content
             cohere_message = [
                 {
                     "role": function_response_role,

letta/schemas/step.py CHANGED Viewed

@@ -18,6 +18,7 @@ class Step(StepBase):
     job_id: Optional[str] = Field(
         None, description="The unique identifier of the job that this step belongs to. Only included for async calls."
     )
+    agent_id: Optional[str] = Field(None, description="The ID of the agent that performed the step.")
     provider_name: Optional[str] = Field(None, description="The name of the provider used for this step.")
     model: Optional[str] = Field(None, description="The name of the model used for this step.")
     model_endpoint: Optional[str] = Field(None, description="The model endpoint url used for this step.")

letta/schemas/tool.py CHANGED Viewed

@@ -7,11 +7,22 @@ from letta.constants import (
     FUNCTION_RETURN_CHAR_LIMIT,
     LETTA_CORE_TOOL_MODULE_NAME,
     LETTA_MULTI_AGENT_TOOL_MODULE_NAME,
+    MCP_TOOL_TAG_NAME_PREFIX,
 )
 from letta.functions.ast_parsers import get_function_name_and_description
 from letta.functions.functions import derive_openai_json_schema, get_json_schema_from_module
-from letta.functions.helpers import generate_composio_tool_wrapper, generate_langchain_tool_wrapper, generate_model_from_args_json_schema
-from letta.functions.schema_generator import generate_schema_from_args_schema_v2, generate_tool_schema_for_composio
+from letta.functions.helpers import (
+    generate_composio_tool_wrapper,
+    generate_langchain_tool_wrapper,
+    generate_mcp_tool_wrapper,
+    generate_model_from_args_json_schema,
+)
+from letta.functions.schema_generator import (
+    generate_schema_from_args_schema_v2,
+    generate_tool_schema_for_composio,
+    generate_tool_schema_for_mcp,
+)
+from letta.helpers.mcp_helpers import MCPTool
 from letta.log import get_logger
 from letta.orm.enums import ToolType
 from letta.schemas.letta_base import LettaBase
@@ -121,6 +132,32 @@ class ToolCreate(LettaBase):
     args_json_schema: Optional[Dict] = Field(None, description="The args JSON schema of the function.")
     return_char_limit: int = Field(FUNCTION_RETURN_CHAR_LIMIT, description="The maximum number of characters in the response.")
+    # TODO should we put the HTTP / API fetch inside from_mcp?
+    # async def from_mcp(cls, mcp_server: str, mcp_tool_name: str) -> "ToolCreate":
+    @classmethod
+    def from_mcp(cls, mcp_server_name: str, mcp_tool: MCPTool) -> "ToolCreate":
+        # Get the MCP tool from the MCP server
+        # NVM
+        # Pass the MCP tool to the schema generator
+        json_schema = generate_tool_schema_for_mcp(mcp_tool=mcp_tool)
+        # Return a ToolCreate instance
+        description = mcp_tool.description
+        source_type = "python"
+        tags = [f"{MCP_TOOL_TAG_NAME_PREFIX}:{mcp_server_name}"]
+        wrapper_func_name, wrapper_function_str = generate_mcp_tool_wrapper(mcp_tool.name)
+        return cls(
+            description=description,
+            source_type=source_type,
+            tags=tags,
+            source_code=wrapper_function_str,
+            json_schema=json_schema,
+        )
     @classmethod
     def from_composio(cls, action_name: str) -> "ToolCreate":
         """

letta/serialize_schemas/agent.py CHANGED Viewed

@@ -70,4 +70,11 @@ class SerializedAgentSchema(BaseSchema):
     class Meta(BaseSchema.Meta):
         model = Agent
         # TODO: Serialize these as well...
-        exclude = BaseSchema.Meta.exclude + ("sources", "source_passages", "agent_passages")
+        exclude = BaseSchema.Meta.exclude + (
+            "project_id",
+            "template_id",
+            "base_template_id",
+            "sources",
+            "source_passages",
+            "agent_passages",
+        )

letta/server/rest_api/app.py CHANGED Viewed

@@ -136,6 +136,21 @@ def create_application() -> "FastAPI":
         debug=debug_mode,  # if True, the stack trace will be printed in the response
     )
+    @app.on_event("shutdown")
+    def shutdown_mcp_clients():
+        global server
+        import threading
+        def cleanup_clients():
+            if hasattr(server, "mcp_clients"):
+                for client in server.mcp_clients.values():
+                    client.cleanup()
+                server.mcp_clients.clear()
+        t = threading.Thread(target=cleanup_clients)
+        t.start()
+        t.join()
     @app.exception_handler(Exception)
     async def generic_error_handler(request: Request, exc: Exception):
         # Log the actual error for debugging

letta/server/rest_api/chat_completions_interface.py CHANGED Viewed

@@ -267,3 +267,5 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
         """Clears internal buffers for function call name/args."""
         self.current_function_name = ""
         self.current_function_arguments = []
+        self.current_json_parse_result = {}
+        self._found_message_tool_kwarg = False

letta/server/rest_api/interface.py CHANGED Viewed

@@ -24,6 +24,7 @@ from letta.schemas.letta_message import (
 )
 from letta.schemas.message import Message
 from letta.schemas.openai.chat_completion_response import ChatCompletionChunkResponse
+from letta.server.rest_api.optimistic_json_parser import OptimisticJSONParser
 from letta.streaming_interface import AgentChunkStreamingInterface
 from letta.streaming_utils import FunctionArgumentsStreamHandler, JSONInnerThoughtsExtractor
@@ -282,6 +283,11 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
         # turn function argument to send_message into a normal text stream
         self.streaming_chat_completion_json_reader = FunctionArgumentsStreamHandler(json_key=assistant_message_tool_kwarg)
+        # @matt's changes here, adopting new optimistic json parser
+        self.current_function_arguments = []
+        self.optimistic_json_parser = OptimisticJSONParser()
+        self.current_json_parse_result = {}
         # Store metadata passed from server
         self.metadata = {}
@@ -374,6 +380,8 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
     def stream_start(self):
         """Initialize streaming by activating the generator and clearing any old chunks."""
         self.streaming_chat_completion_mode_function_name = None
+        self.current_function_arguments = []
+        self.current_json_parse_result = {}
         if not self._active:
             self._active = True
@@ -383,6 +391,8 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
     def stream_end(self):
         """Clean up the stream by deactivating and clearing chunks."""
         self.streaming_chat_completion_mode_function_name = None
+        self.current_function_arguments = []
+        self.current_json_parse_result = {}
         # if not self.streaming_chat_completion_mode and not self.nonstreaming_legacy_mode:
         #     self._push_to_buffer(self.multi_step_gen_indicator)
@@ -568,20 +578,27 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                     self.streaming_chat_completion_json_reader.reset()
                     # early exit to turn into content mode
                     return None
+                if tool_call.function.arguments:
+                    self.current_function_arguments.append(tool_call.function.arguments)
                 # if we're in the middle of parsing a send_message, we'll keep processing the JSON chunks
                 if tool_call.function.arguments and self.streaming_chat_completion_mode_function_name == self.assistant_message_tool_name:
                     # Strip out any extras tokens
-                    cleaned_func_args = self.streaming_chat_completion_json_reader.process_json_chunk(tool_call.function.arguments)
                     # In the case that we just have the prefix of something, no message yet, then we should early exit to move to the next chunk
-                    if cleaned_func_args is None:
-                        return None
+                    combined_args = "".join(self.current_function_arguments)
+                    parsed_args = self.optimistic_json_parser.parse(combined_args)
+                    if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
+                        self.assistant_message_tool_kwarg
+                    ) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
+                        new_content = parsed_args.get(self.assistant_message_tool_kwarg)
+                        prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
+                        # TODO: Assumes consistent state and that prev_content is subset of new_content
+                        diff = new_content.replace(prev_content, "", 1)
+                        self.current_json_parse_result = parsed_args
+                        processed_chunk = AssistantMessage(id=message_id, date=message_date, content=diff)
                     else:
-                        processed_chunk = AssistantMessage(
-                            id=message_id,
-                            date=message_date,
-                            content=cleaned_func_args,
-                        )
+                        return None
                 # otherwise we just do a regular passthrough of a ToolCallDelta via a ToolCallMessage
                 else:
@@ -637,6 +654,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                     # updates_inner_thoughts = ""
                     # else:  # OpenAI
                     # updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
+                    self.current_function_arguments.append(tool_call.function.arguments)
                     updates_main_json, updates_inner_thoughts = self.function_args_reader.process_fragment(tool_call.function.arguments)
                     # If we have inner thoughts, we should output them as a chunk
@@ -731,6 +749,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                                 if self.function_args_buffer:
                                     # In this case, we should release the buffer + new data at once
                                     combined_chunk = self.function_args_buffer + updates_main_json
                                     processed_chunk = AssistantMessage(
                                         id=message_id,
                                         date=message_date,
@@ -745,11 +764,24 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                                 else:
                                     # If there's no buffer to clear, just output a new chunk with new data
-                                    processed_chunk = AssistantMessage(
-                                        id=message_id,
-                                        date=message_date,
-                                        content=updates_main_json,
-                                    )
+                                    # TODO: THIS IS HORRIBLE
+                                    # TODO: WE USE THE OLD JSON PARSER EARLIER (WHICH DOES NOTHING) AND NOW THE NEW JSON PARSER
+                                    # TODO: THIS IS TOTALLY WRONG AND BAD, BUT SAVING FOR A LARGER REWRITE IN THE NEAR FUTURE
+                                    combined_args = "".join(self.current_function_arguments)
+                                    parsed_args = self.optimistic_json_parser.parse(combined_args)
+                                    if parsed_args.get(self.assistant_message_tool_kwarg) and parsed_args.get(
+                                        self.assistant_message_tool_kwarg
+                                    ) != self.current_json_parse_result.get(self.assistant_message_tool_kwarg):
+                                        new_content = parsed_args.get(self.assistant_message_tool_kwarg)
+                                        prev_content = self.current_json_parse_result.get(self.assistant_message_tool_kwarg, "")
+                                        # TODO: Assumes consistent state and that prev_content is subset of new_content
+                                        diff = new_content.replace(prev_content, "", 1)
+                                        self.current_json_parse_result = parsed_args
+                                        processed_chunk = AssistantMessage(id=message_id, date=message_date, content=diff)
+                                    else:
+                                        return None
                                     # Store the ID of the tool call so allow skipping the corresponding response
                                     if self.function_id_buffer:
                                         self.prev_assistant_message_id = self.function_id_buffer
@@ -1018,6 +1050,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                 message_date=message_date,
                 expect_reasoning_content=expect_reasoning_content,
             )
         if processed_chunk is None:
             return

letta/server/rest_api/routers/openai/chat_completions/chat_completions.py CHANGED Viewed

@@ -24,7 +24,7 @@ logger = get_logger(__name__)
 @router.post(
-    "/chat/completions",
+    "/{agent_id}/chat/completions",
     response_model=None,
     operation_id="create_chat_completions",
     responses={
@@ -37,6 +37,7 @@ logger = get_logger(__name__)
     },
 )
 async def create_chat_completions(
+    agent_id: str,
     completion_request: CompletionCreateParams = Body(...),
     server: "SyncServer" = Depends(get_letta_server),
     user_id: Optional[str] = Header(None, alias="user_id"),
@@ -51,12 +52,6 @@ async def create_chat_completions(
     actor = server.user_manager.get_user_or_default(user_id=user_id)
-    agent_id = str(completion_request.get("user", None))
-    if agent_id is None:
-        error_msg = "Must pass agent_id in the 'user' field"
-        logger.error(error_msg)
-        raise HTTPException(status_code=400, detail=error_msg)
     letta_agent = server.load_agent(agent_id=agent_id, actor=actor)
     llm_config = letta_agent.agent_state.llm_config
     if llm_config.model_endpoint_type != "openai" or "inference.memgpt.ai" in llm_config.model_endpoint:

letta-nightly 0.6.37.dev20250311104150__py3-none-any.whl → 0.6.39.dev20250313104142__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.6.37.dev20250311104150py3-none-any.whl → 0.6.39.dev20250313104142py3-none-any.whl