PyPI - letta-nightly - Versions diffs - 0.6.48.dev20250406104033__py3-none-any.whl → 0.6.49.dev20250408030511__py3-none-any.whl - Mend

letta-nightly 0.6.48.dev20250406104033py3-none-any.whl → 0.6.49.dev20250408030511py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (87) hide show

letta/__init__.py +1 -1
letta/agent.py +47 -12
letta/agents/base_agent.py +7 -4
letta/agents/helpers.py +52 -0
letta/agents/letta_agent.py +105 -42
letta/agents/voice_agent.py +2 -2
letta/constants.py +13 -1
letta/errors.py +10 -3
letta/functions/function_sets/base.py +65 -0
letta/functions/interface.py +2 -2
letta/functions/mcp_client/base_client.py +18 -1
letta/{dynamic_multi_agent.py → groups/dynamic_multi_agent.py} +3 -0
letta/groups/helpers.py +113 -0
letta/{round_robin_multi_agent.py → groups/round_robin_multi_agent.py} +2 -0
letta/groups/sleeptime_multi_agent.py +259 -0
letta/{supervisor_multi_agent.py → groups/supervisor_multi_agent.py} +1 -0
letta/helpers/converters.py +109 -7
letta/helpers/message_helper.py +1 -0
letta/helpers/tool_rule_solver.py +40 -23
letta/interface.py +12 -5
letta/interfaces/anthropic_streaming_interface.py +329 -0
letta/llm_api/anthropic.py +12 -1
letta/llm_api/anthropic_client.py +65 -14
letta/llm_api/azure_openai.py +2 -2
letta/llm_api/google_ai_client.py +13 -2
letta/llm_api/google_constants.py +3 -0
letta/llm_api/google_vertex_client.py +2 -2
letta/llm_api/llm_api_tools.py +1 -1
letta/llm_api/llm_client.py +7 -0
letta/llm_api/llm_client_base.py +2 -7
letta/llm_api/openai.py +7 -1
letta/llm_api/openai_client.py +250 -0
letta/orm/__init__.py +4 -0
letta/orm/agent.py +6 -0
letta/orm/block.py +32 -2
letta/orm/block_history.py +46 -0
letta/orm/custom_columns.py +60 -0
letta/orm/enums.py +7 -0
letta/orm/group.py +6 -0
letta/orm/groups_blocks.py +13 -0
letta/orm/llm_batch_items.py +55 -0
letta/orm/llm_batch_job.py +48 -0
letta/orm/message.py +7 -1
letta/orm/organization.py +2 -0
letta/orm/sqlalchemy_base.py +18 -15
letta/prompts/system/memgpt_sleeptime_chat.txt +52 -0
letta/prompts/system/sleeptime.txt +26 -0
letta/schemas/agent.py +13 -1
letta/schemas/enums.py +17 -2
letta/schemas/group.py +14 -1
letta/schemas/letta_message.py +5 -3
letta/schemas/llm_batch_job.py +53 -0
letta/schemas/llm_config.py +14 -4
letta/schemas/message.py +44 -0
letta/schemas/tool.py +3 -0
letta/schemas/usage.py +1 -0
letta/server/db.py +2 -0
letta/server/rest_api/app.py +1 -1
letta/server/rest_api/chat_completions_interface.py +8 -3
letta/server/rest_api/interface.py +36 -7
letta/server/rest_api/routers/v1/agents.py +53 -39
letta/server/rest_api/routers/v1/runs.py +14 -2
letta/server/rest_api/utils.py +15 -4
letta/server/server.py +120 -71
letta/services/agent_manager.py +70 -6
letta/services/block_manager.py +190 -2
letta/services/group_manager.py +68 -0
letta/services/helpers/agent_manager_helper.py +6 -4
letta/services/llm_batch_manager.py +139 -0
letta/services/message_manager.py +17 -31
letta/services/tool_executor/tool_execution_sandbox.py +1 -3
letta/services/tool_executor/tool_executor.py +9 -20
letta/services/tool_manager.py +14 -3
letta/services/tool_sandbox/__init__.py +0 -0
letta/services/tool_sandbox/base.py +188 -0
letta/services/tool_sandbox/e2b_sandbox.py +116 -0
letta/services/tool_sandbox/local_sandbox.py +221 -0
letta/sleeptime_agent.py +61 -0
letta/streaming_interface.py +20 -10
letta/utils.py +4 -0
{letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/METADATA +2 -2
{letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/RECORD +85 -69
letta/offline_memory_agent.py +0 -173
letta/services/tool_executor/async_tool_execution_sandbox.py +0 -397
{letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/LICENSE +0 -0
{letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/WHEEL +0 -0
{letta_nightly-0.6.48.dev20250406104033.dist-info → letta_nightly-0.6.49.dev20250408030511.dist-info}/entry_points.txt +0 -0

letta/schemas/message.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 import copy
 import json
+import uuid
 import warnings
 from collections import OrderedDict
 from datetime import datetime, timezone
@@ -78,6 +79,7 @@ class MessageCreate(BaseModel):
         json_schema_extra=get_letta_message_content_union_str_json_schema(),
     )
     name: Optional[str] = Field(None, description="The name of the participant.")
+    otid: Optional[str] = Field(None, description="The offline threading id associated with this message")
     def model_dump(self, to_orm: bool = False, **kwargs) -> Dict[str, Any]:
         data = super().model_dump(**kwargs)
@@ -168,12 +170,17 @@ class Message(BaseMessage):
         json_message["created_at"] = self.created_at.isoformat()
         return json_message
+    @staticmethod
+    def generate_otid():
+        return str(uuid.uuid4())
     @staticmethod
     def to_letta_messages_from_list(
         messages: List[Message],
         use_assistant_message: bool = True,
         assistant_message_tool_name: str = DEFAULT_MESSAGE_TOOL,
         assistant_message_tool_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
+        reverse: bool = True,
     ) -> List[LettaMessage]:
         if use_assistant_message:
             message_ids_to_remove = []
@@ -203,6 +210,7 @@ class Message(BaseMessage):
                 use_assistant_message=use_assistant_message,
                 assistant_message_tool_name=assistant_message_tool_name,
                 assistant_message_tool_kwarg=assistant_message_tool_kwarg,
+                reverse=reverse,
             )
         ]
@@ -211,6 +219,7 @@ class Message(BaseMessage):
         use_assistant_message: bool = False,
         assistant_message_tool_name: str = DEFAULT_MESSAGE_TOOL,
         assistant_message_tool_kwarg: str = DEFAULT_MESSAGE_TOOL_KWARG,
+        reverse: bool = True,
     ) -> List[LettaMessage]:
         """Convert message object (in DB format) to the style used by the original Letta API"""
         messages = []
@@ -221,18 +230,21 @@ class Message(BaseMessage):
             if self.content:
                 # Check for ReACT-style COT inside of TextContent
                 if len(self.content) == 1 and isinstance(self.content[0], TextContent):
+                    otid = Message.generate_otid_from_id(self.id, len(messages))
                     messages.append(
                         ReasoningMessage(
                             id=self.id,
                             date=self.created_at,
                             reasoning=self.content[0].text,
                             name=self.name,
+                            otid=otid,
                         )
                     )
                 # Otherwise, we may have a list of multiple types
                 else:
                     # TODO we can probably collapse these two cases into a single loop
                     for content_part in self.content:
+                        otid = Message.generate_otid_from_id(self.id, len(messages))
                         if isinstance(content_part, TextContent):
                             # COT
                             messages.append(
@@ -241,6 +253,7 @@ class Message(BaseMessage):
                                     date=self.created_at,
                                     reasoning=content_part.text,
                                     name=self.name,
+                                    otid=otid,
                                 )
                             )
                         elif isinstance(content_part, ReasoningContent):
@@ -253,6 +266,7 @@ class Message(BaseMessage):
                                     source="reasoner_model",  # TODO do we want to tag like this?
                                     signature=content_part.signature,
                                     name=self.name,
+                                    otid=otid,
                                 )
                             )
                         elif isinstance(content_part, RedactedReasoningContent):
@@ -264,6 +278,7 @@ class Message(BaseMessage):
                                     state="redacted",
                                     hidden_reasoning=content_part.data,
                                     name=self.name,
+                                    otid=otid,
                                 )
                             )
                         else:
@@ -272,6 +287,7 @@ class Message(BaseMessage):
             if self.tool_calls is not None:
                 # This is type FunctionCall
                 for tool_call in self.tool_calls:
+                    otid = Message.generate_otid_from_id(self.id, len(messages))
                     # If we're supporting using assistant message,
                     # then we want to treat certain function calls as a special case
                     if use_assistant_message and tool_call.function.name == assistant_message_tool_name:
@@ -287,6 +303,7 @@ class Message(BaseMessage):
                                 date=self.created_at,
                                 content=message_string,
                                 name=self.name,
+                                otid=otid,
                             )
                         )
                     else:
@@ -300,6 +317,7 @@ class Message(BaseMessage):
                                     tool_call_id=tool_call.id,
                                 ),
                                 name=self.name,
+                                otid=otid,
                             )
                         )
         elif self.role == MessageRole.tool:
@@ -341,6 +359,7 @@ class Message(BaseMessage):
                     stdout=self.tool_returns[0].stdout if self.tool_returns else None,
                     stderr=self.tool_returns[0].stderr if self.tool_returns else None,
                     name=self.name,
+                    otid=self.id.replace("message-", ""),
                 )
             )
         elif self.role == MessageRole.user:
@@ -357,6 +376,7 @@ class Message(BaseMessage):
                     date=self.created_at,
                     content=message_str or text_content,
                     name=self.name,
+                    otid=self.otid,
                 )
             )
         elif self.role == MessageRole.system:
@@ -372,11 +392,15 @@ class Message(BaseMessage):
                     date=self.created_at,
                     content=text_content,
                     name=self.name,
+                    otid=self.otid,
                 )
             )
         else:
             raise ValueError(self.role)
+        if reverse:
+            messages.reverse()
         return messages
     @staticmethod
@@ -670,6 +694,9 @@ class Message(BaseMessage):
         def add_xml_tag(string: str, xml_tag: Optional[str]):
             # NOTE: Anthropic docs recommends using <thinking> tag when using CoT + tool use
+            if f"<{xml_tag}>" in string and f"</{xml_tag}>" in string:
+                # don't nest if tags already exist
+                return string
             return f"<{xml_tag}>{string}</{xml_tag}" if xml_tag else string
         if self.role == "system":
@@ -988,6 +1015,23 @@ class Message(BaseMessage):
         return cohere_message
+    @staticmethod
+    def generate_otid_from_id(message_id: str, index: int) -> str:
+        """
+        Convert message id to bits and change the list bit to the index
+        """
+        if not 0 <= index < 128:
+            raise ValueError("Index must be between 0 and 127")
+        message_uuid = message_id.replace("message-", "")
+        uuid_int = int(message_uuid.replace("-", ""), 16)
+        # Clear last 7 bits and set them to index; supports up to 128 unique indices
+        uuid_int = (uuid_int & ~0x7F) | (index & 0x7F)
+        hex_str = f"{uuid_int:032x}"
+        return f"{hex_str[:8]}-{hex_str[8:12]}-{hex_str[12:16]}-{hex_str[16:20]}-{hex_str[20:]}"
 class ToolReturn(BaseModel):
     status: Literal["success", "error"] = Field(..., description="The status of the tool call")

letta/schemas/tool.py CHANGED Viewed

@@ -104,6 +104,9 @@ class Tool(BaseTool):
         elif self.tool_type in {ToolType.LETTA_MULTI_AGENT_CORE}:
             # If it's letta multi-agent tool, we also generate the json_schema on the fly here
             self.json_schema = get_json_schema_from_module(module_name=LETTA_MULTI_AGENT_TOOL_MODULE_NAME, function_name=self.name)
+        elif self.tool_type in {ToolType.LETTA_SLEEPTIME_CORE}:
+            # If it's letta sleeptime core tool, we generate the json_schema on the fly here
+            self.json_schema = get_json_schema_from_module(module_name=LETTA_CORE_TOOL_MODULE_NAME, function_name=self.name)
         # At this point, we need to validate that at least json_schema is populated
         if not self.json_schema:

letta/schemas/usage.py CHANGED Viewed

@@ -23,3 +23,4 @@ class LettaUsageStatistics(BaseModel):
     step_count: int = Field(0, description="The number of steps taken by the agent.")
     # TODO: Optional for now. This field makes everyone's lives easier
     steps_messages: Optional[List[List[Message]]] = Field(None, description="The messages generated per step")
+    run_ids: Optional[List[str]] = Field(None, description="The background task run IDs associated with the agent interaction")

letta/server/db.py CHANGED Viewed

@@ -59,11 +59,13 @@ if settings.letta_pg_uri_no_default:
     # create engine
     engine = create_engine(
         settings.letta_pg_uri,
+        # f"{settings.letta_pg_uri}?options=-c%20client_encoding=UTF8",
         pool_size=settings.pg_pool_size,
         max_overflow=settings.pg_max_overflow,
         pool_timeout=settings.pg_pool_timeout,
         pool_recycle=settings.pg_pool_recycle,
         echo=settings.pg_echo,
+        # connect_args={"client_encoding": "utf8"},
     )
 else:
     # TODO: don't rely on config storage

letta/server/rest_api/app.py CHANGED Viewed

@@ -139,7 +139,7 @@ def create_application() -> "FastAPI":
     @app.on_event("startup")
     async def configure_executor():
-        print(f"Configured event loop executor with {settings.event_loop_threadpool_max_workers} workers.")
+        print(f"INFO:     Configured event loop executor with {settings.event_loop_threadpool_max_workers} workers.")
         loop = asyncio.get_running_loop()
         executor = concurrent.futures.ThreadPoolExecutor(max_workers=settings.event_loop_threadpool_max_workers)
         loop.set_default_executor(executor)

letta/server/rest_api/chat_completions_interface.py CHANGED Viewed

@@ -155,7 +155,12 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
         return
     def process_chunk(
-        self, chunk: ChatCompletionChunkResponse, message_id: str, message_date: datetime, expect_reasoning_content: bool = False
+        self,
+        chunk: ChatCompletionChunkResponse,
+        message_id: str,
+        message_date: datetime,
+        expect_reasoning_content: bool = False,
+        message_index: int = 0,
     ) -> None:
         """
         Called externally with a ChatCompletionChunkResponse. Transforms
@@ -172,7 +177,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
         """
         return
-    def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None) -> None:
+    def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None) -> None:
         """
         Handle LLM reasoning or internal monologue. Example usage: if you want
         to capture chain-of-thought for debugging in a non-streaming scenario.
@@ -186,7 +191,7 @@ class ChatCompletionsStreamingInterface(AgentChunkStreamingInterface):
         """
         return
-    def function_message(self, msg: str, msg_obj: Optional[Message] = None) -> None:
+    def function_message(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None) -> None:
         """
         Handle function-related log messages, typically of the form:
         It's a no-op by default.

letta/server/rest_api/interface.py CHANGED Viewed

@@ -165,7 +165,7 @@ class QueuingInterface(AgentInterface):
             print(vars(msg_obj))
             print(msg_obj.created_at.isoformat())
-    def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None) -> None:
+    def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None) -> None:
         """Handle the agent's internal monologue"""
         assert msg_obj is not None, "QueuingInterface requires msg_obj references for metadata"
         if self.debug:
@@ -209,7 +209,9 @@ class QueuingInterface(AgentInterface):
         self._queue_push(message_api=new_message, message_obj=msg_obj)
-    def function_message(self, msg: str, msg_obj: Optional[Message] = None, include_ran_messages: bool = False) -> None:
+    def function_message(
+        self, msg: str, msg_obj: Optional[Message] = None, include_ran_messages: bool = False, chunk_index: Optional[int] = None
+    ) -> None:
         """Handle the agent calling a function"""
         # TODO handle 'function' messages that indicate the start of a function call
         assert msg_obj is not None, "QueuingInterface requires msg_obj references for metadata"
@@ -466,6 +468,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
         # and `content` needs to be handled outside the interface
         expect_reasoning_content: bool = False,
         name: Optional[str] = None,
+        message_index: int = 0,
     ) -> Optional[Union[ReasoningMessage, ToolCallMessage, AssistantMessage]]:
         """
         Example data from non-streaming response looks like:
@@ -478,6 +481,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
         """
         choice = chunk.choices[0]
         message_delta = choice.delta
+        otid = Message.generate_otid_from_id(message_id, message_index)
         if (
             message_delta.content is None
@@ -499,6 +503,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                 signature=message_delta.reasoning_content_signature,
                 source="reasoner_model" if message_delta.reasoning_content_signature else "non_reasoner_model",
                 name=name,
+                otid=otid,
             )
         elif expect_reasoning_content and message_delta.redacted_reasoning_content is not None:
             processed_chunk = HiddenReasoningMessage(
@@ -507,6 +512,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                 hidden_reasoning=message_delta.redacted_reasoning_content,
                 state="redacted",
                 name=name,
+                otid=otid,
             )
         elif expect_reasoning_content and message_delta.content is not None:
             # "ignore" content if we expect reasoning content
@@ -534,6 +540,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                         tool_call_id=None,
                     ),
                     name=name,
+                    otid=otid,
                 )
             except json.JSONDecodeError as e:
@@ -564,6 +571,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                 date=message_date,
                 reasoning=message_delta.content,
                 name=name,
+                otid=otid,
             )
         # tool calls
@@ -612,7 +620,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                         # TODO: Assumes consistent state and that prev_content is subset of new_content
                         diff = new_content.replace(prev_content, "", 1)
                         self.current_json_parse_result = parsed_args
-                        processed_chunk = AssistantMessage(id=message_id, date=message_date, content=diff, name=name)
+                        processed_chunk = AssistantMessage(id=message_id, date=message_date, content=diff, name=name, otid=otid)
                     else:
                         return None
@@ -645,6 +653,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                                 tool_call_id=tool_call_delta.get("id"),
                             ),
                             name=name,
+                            otid=otid,
                         )
             elif self.inner_thoughts_in_kwargs and tool_call.function:
@@ -681,6 +690,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                             date=message_date,
                             reasoning=updates_inner_thoughts,
                             name=name,
+                            otid=otid,
                         )
                         # Additionally inner thoughts may stream back with a chunk of main JSON
                         # In that case, since we can only return a chunk at a time, we should buffer it
@@ -717,6 +727,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                                         tool_call_id=self.function_id_buffer,
                                     ),
                                     name=name,
+                                    otid=otid,
                                 )
                             # Record what the last function name we flushed was
@@ -774,6 +785,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                                         date=message_date,
                                         content=combined_chunk,
                                         name=name,
+                                        otid=otid,
                                     )
                                     # Store the ID of the tool call so allow skipping the corresponding response
                                     if self.function_id_buffer:
@@ -798,7 +810,9 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                                         # TODO: Assumes consistent state and that prev_content is subset of new_content
                                         diff = new_content.replace(prev_content, "", 1)
                                         self.current_json_parse_result = parsed_args
-                                        processed_chunk = AssistantMessage(id=message_id, date=message_date, content=diff, name=name)
+                                        processed_chunk = AssistantMessage(
+                                            id=message_id, date=message_date, content=diff, name=name, otid=otid
+                                        )
                                     else:
                                         return None
@@ -823,6 +837,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                                             tool_call_id=self.function_id_buffer,
                                         ),
                                         name=name,
+                                        otid=otid,
                                     )
                                     # clear buffer
                                     self.function_args_buffer = None
@@ -838,6 +853,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                                             tool_call_id=self.function_id_buffer,
                                         ),
                                         name=name,
+                                        otid=otid,
                                     )
                                     self.function_id_buffer = None
@@ -967,6 +983,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                             tool_call_id=tool_call_delta.get("id"),
                         ),
                         name=name,
+                        otid=otid,
                     )
         elif choice.finish_reason is not None:
@@ -1048,6 +1065,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
         message_date: datetime,
         expect_reasoning_content: bool = False,
         name: Optional[str] = None,
+        message_index: int = 0,
     ):
         """Process a streaming chunk from an OpenAI-compatible server.
@@ -1074,18 +1092,20 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                 message_date=message_date,
                 expect_reasoning_content=expect_reasoning_content,
                 name=name,
+                message_index=message_index,
             )
         if processed_chunk is None:
             return
         self._push_to_buffer(processed_chunk)
+        return processed_chunk.message_type
     def user_message(self, msg: str, msg_obj: Optional[Message] = None):
         """Letta receives a user message"""
         return
-    def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None):
+    def internal_monologue(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None):
         """Letta generates some internal monologue"""
         if not self.streaming_mode:
@@ -1102,6 +1122,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                     date=msg_obj.created_at,
                     reasoning=msg,
                     name=msg_obj.name,
+                    otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
                 )
                 self._push_to_buffer(processed_chunk)
@@ -1113,6 +1134,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                             date=msg_obj.created_at,
                             reasoning=content.text,
                             name=msg_obj.name,
+                            otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
                         )
                     elif isinstance(content, ReasoningContent):
                         processed_chunk = ReasoningMessage(
@@ -1122,6 +1144,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                             reasoning=content.reasoning,
                             signature=content.signature,
                             name=msg_obj.name,
+                            otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
                         )
                     elif isinstance(content, RedactedReasoningContent):
                         processed_chunk = HiddenReasoningMessage(
@@ -1130,6 +1153,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                             state="redacted",
                             hidden_reasoning=content.data,
                             name=msg_obj.name,
+                            otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
                         )
                     self._push_to_buffer(processed_chunk)
@@ -1142,7 +1166,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
         # NOTE: this is a no-op, we handle this special case in function_message instead
         return
-    def function_message(self, msg: str, msg_obj: Optional[Message] = None):
+    def function_message(self, msg: str, msg_obj: Optional[Message] = None, chunk_index: Optional[int] = None):
         """Letta calls a function"""
         # TODO handle 'function' messages that indicate the start of a function call
@@ -1191,6 +1215,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                                 date=msg_obj.created_at,
                                 content=func_args["message"],
                                 name=msg_obj.name,
+                                otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
                             )
                             self._push_to_buffer(processed_chunk)
                         except Exception as e:
@@ -1214,6 +1239,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                             date=msg_obj.created_at,
                             content=func_args[self.assistant_message_tool_kwarg],
                             name=msg_obj.name,
+                            otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
                         )
                         # Store the ID of the tool call so allow skipping the corresponding response
                         self.prev_assistant_message_id = function_call.id
@@ -1227,6 +1253,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                                 tool_call_id=function_call.id,
                             ),
                             name=msg_obj.name,
+                            otid=Message.generate_otid_from_id(msg_obj.id, chunk_index) if chunk_index is not None else None,
                         )
                     # processed_chunk = {
@@ -1267,6 +1294,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                     stdout=msg_obj.tool_returns[0].stdout if msg_obj.tool_returns else None,
                     stderr=msg_obj.tool_returns[0].stderr if msg_obj.tool_returns else None,
                     name=msg_obj.name,
+                    otid=Message.generate_otid_from_id(msg_obj.id, chunk_index),
                 )
         elif msg.startswith("Error: "):
@@ -1282,6 +1310,7 @@ class StreamingServerInterface(AgentChunkStreamingInterface):
                 stdout=msg_obj.tool_returns[0].stdout if msg_obj.tool_returns else None,
                 stderr=msg_obj.tool_returns[0].stderr if msg_obj.tool_returns else None,
                 name=msg_obj.name,
+                otid=Message.generate_otid_from_id(msg_obj.id, chunk_index),
             )
         else:

letta/server/rest_api/routers/v1/agents.py CHANGED Viewed

@@ -8,6 +8,7 @@ from fastapi.responses import JSONResponse
 from marshmallow import ValidationError
 from pydantic import Field
 from sqlalchemy.exc import IntegrityError, OperationalError
+from starlette.responses import StreamingResponse
 from letta.agents.letta_agent import LettaAgent
 from letta.constants import DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
@@ -30,7 +31,6 @@ from letta.schemas.user import User
 from letta.serialize_schemas.pydantic_agent_schema import AgentSchema
 from letta.server.rest_api.utils import get_letta_server
 from letta.server.server import SyncServer
-from letta.settings import settings
 # These can be forward refs, but because Fastapi needs them at runtime the must be imported normally
@@ -130,6 +130,10 @@ async def import_agent_serialized(
         description="If set to True, existing tools can get their source code overwritten by the uploaded tool definitions. Note that Letta core tools can never be updated externally.",
     ),
     project_id: Optional[str] = Query(None, description="The project ID to associate the uploaded agent with."),
+    strip_messages: bool = Query(
+        False,
+        description="If set to True, strips all messages from the agent before importing.",
+    ),
 ):
     """
     Import a serialized agent file and recreate the agent in the system.
@@ -149,6 +153,7 @@ async def import_agent_serialized(
             append_copy_suffix=append_copy_suffix,
             override_existing_tools=override_existing_tools,
             project_id=project_id,
+            strip_messages=strip_messages,
         )
         return new_agent
@@ -585,8 +590,10 @@ async def send_message(
     This endpoint accepts a message from a user and processes it through the agent.
     """
     actor = server.user_manager.get_user_or_default(user_id=actor_id)
-    if settings.use_experimental:
-        logger.warning("USING EXPERIMENTAL!")
+    # TODO: This is redundant, remove soon
+    agent = server.agent_manager.get_agent_by_id(agent_id, actor)
+    if agent.llm_config.model_endpoint_type == "anthropic" and not agent.enable_sleeptime and not agent.multi_agent_group:
         experimental_agent = LettaAgent(
             agent_id=agent_id,
             message_manager=server.message_manager,
@@ -639,17 +646,38 @@ async def send_message_streaming(
     It will stream the steps of the response always, and stream the tokens if 'stream_tokens' is set to True.
     """
     actor = server.user_manager.get_user_or_default(user_id=actor_id)
-    result = await server.send_message_to_agent(
-        agent_id=agent_id,
-        actor=actor,
-        messages=request.messages,
-        stream_steps=True,
-        stream_tokens=request.stream_tokens,
-        # Support for AssistantMessage
-        use_assistant_message=request.use_assistant_message,
-        assistant_message_tool_name=request.assistant_message_tool_name,
-        assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
-    )
+    # TODO: This is redundant, remove soon
+    agent = server.agent_manager.get_agent_by_id(agent_id, actor)
+    if agent.llm_config.model_endpoint_type == "anthropic" and not agent.enable_sleeptime and not agent.multi_agent_group:
+        experimental_agent = LettaAgent(
+            agent_id=agent_id,
+            message_manager=server.message_manager,
+            agent_manager=server.agent_manager,
+            block_manager=server.block_manager,
+            passage_manager=server.passage_manager,
+            actor=actor,
+        )
+        messages = request.messages
+        content = messages[0].content[0].text if messages and not isinstance(messages[0].content, str) else messages[0].content
+        result = StreamingResponse(
+            experimental_agent.step_stream(UserMessage(content=content), max_steps=10, use_assistant_message=request.use_assistant_message),
+            media_type="text/event-stream",
+        )
+    else:
+        result = await server.send_message_to_agent(
+            agent_id=agent_id,
+            actor=actor,
+            messages=request.messages,
+            stream_steps=True,
+            stream_tokens=request.stream_tokens,
+            # Support for AssistantMessage
+            use_assistant_message=request.use_assistant_message,
+            assistant_message_tool_name=request.assistant_message_tool_name,
+            assistant_message_tool_kwarg=request.assistant_message_tool_kwarg,
+        )
     return result
@@ -665,31 +693,17 @@ async def process_message_background(
 ) -> None:
     """Background task to process the message and update job status."""
     try:
-        # TODO(matt) we should probably make this stream_steps and log each step as it progresses, so the job update GET can see the total steps so far + partial usage?
-        if settings.use_experimental:
-            logger.warning("USING EXPERIMENTAL!")
-            experimental_agent = LettaAgent(
-                agent_id=agent_id,
-                message_manager=server.message_manager,
-                agent_manager=server.agent_manager,
-                block_manager=server.block_manager,
-                passage_manager=server.passage_manager,
-                actor=actor,
-            )
-            content = messages[0].content[0].text if messages and not isinstance(messages[0].content, str) else messages[0].content
-            result = await experimental_agent.step(UserMessage(content=content), max_steps=10)
-        else:
-            result = await server.send_message_to_agent(
-                agent_id=agent_id,
-                actor=actor,
-                messages=messages,
-                stream_steps=False,  # NOTE(matt)
-                stream_tokens=False,
-                use_assistant_message=use_assistant_message,
-                assistant_message_tool_name=assistant_message_tool_name,
-                assistant_message_tool_kwarg=assistant_message_tool_kwarg,
-                metadata={"job_id": job_id},  # Pass job_id through metadata
-            )
+        result = await server.send_message_to_agent(
+            agent_id=agent_id,
+            actor=actor,
+            messages=messages,
+            stream_steps=False,  # NOTE(matt)
+            stream_tokens=False,
+            use_assistant_message=use_assistant_message,
+            assistant_message_tool_name=assistant_message_tool_name,
+            assistant_message_tool_kwarg=assistant_message_tool_kwarg,
+            metadata={"job_id": job_id},  # Pass job_id through metadata
+        )
         # Update job status to completed
         job_update = JobUpdate(

letta-nightly 0.6.48.dev20250406104033__py3-none-any.whl → 0.6.49.dev20250408030511__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.6.48.dev20250406104033py3-none-any.whl → 0.6.49.dev20250408030511py3-none-any.whl