PyPI - letta-nightly - Versions diffs - 0.7.5.dev20250428110034__py3-none-any.whl → 0.7.6.dev20250429062643__py3-none-any.whl - Mend

letta-nightly 0.7.5.dev20250428110034py3-none-any.whl → 0.7.6.dev20250429062643py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

letta/__init__.py +1 -1
letta/agents/base_agent.py +1 -1
letta/agents/ephemeral_memory_agent.py +353 -43
letta/agents/voice_agent.py +196 -62
letta/constants.py +2 -0
letta/helpers/datetime_helpers.py +7 -0
letta/interfaces/openai_chat_completions_streaming_interface.py +16 -12
letta/llm_api/google_ai_client.py +4 -0
letta/llm_api/llm_api_tools.py +5 -2
letta/llm_api/openai.py +2 -1
letta/llm_api/openai_client.py +3 -2
letta/schemas/llm_config.py +5 -1
letta/schemas/openai/chat_completion_request.py +1 -0
letta/schemas/providers.py +4 -3
letta/schemas/sandbox_config.py +4 -4
letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +4 -10
letta/server/rest_api/routers/v1/voice.py +8 -18
letta/server/rest_api/utils.py +26 -20
letta/server/server.py +67 -26
letta/services/helpers/agent_manager_helper.py +2 -2
letta/services/helpers/tool_execution_helper.py +30 -3
letta/services/summarizer/summarizer.py +121 -54
letta/services/tool_executor/tool_execution_sandbox.py +13 -9
letta/services/tool_sandbox/local_sandbox.py +4 -4
letta/services/user_manager.py +5 -2
letta/settings.py +4 -2
letta/system.py +0 -1
letta/tracing.py +1 -0
{letta_nightly-0.7.5.dev20250428110034.dist-info → letta_nightly-0.7.6.dev20250429062643.dist-info}/METADATA +1 -1
{letta_nightly-0.7.5.dev20250428110034.dist-info → letta_nightly-0.7.6.dev20250429062643.dist-info}/RECORD +33 -33
{letta_nightly-0.7.5.dev20250428110034.dist-info → letta_nightly-0.7.6.dev20250429062643.dist-info}/LICENSE +0 -0
{letta_nightly-0.7.5.dev20250428110034.dist-info → letta_nightly-0.7.6.dev20250429062643.dist-info}/WHEEL +0 -0
{letta_nightly-0.7.5.dev20250428110034.dist-info → letta_nightly-0.7.6.dev20250429062643.dist-info}/entry_points.txt +0 -0

letta/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.7.5"
+__version__ = "0.7.6"
 # import clients
 from letta.client.client import LocalClient, RESTClient, create_client

letta/agents/base_agent.py CHANGED Viewed

@@ -63,4 +63,4 @@ class BaseAgent(ABC):
             else:
                 return ""
-        return [{"role": input_message.role, "content": get_content(input_message)} for input_message in input_messages]
+        return [{"role": input_message.role.value, "content": get_content(input_message)} for input_message in input_messages]

letta/agents/ephemeral_memory_agent.py CHANGED Viewed

@@ -1,24 +1,29 @@
-from typing import AsyncGenerator, Dict, List
+import json
+import xml.etree.ElementTree as ET
+from typing import AsyncGenerator, Dict, List, Tuple, Union
 import openai
 from letta.agents.base_agent import BaseAgent
-from letta.helpers.tool_execution_helper import enable_strict_mode
-from letta.orm.enums import ToolType
 from letta.schemas.agent import AgentState
-from letta.schemas.enums import MessageRole
+from letta.schemas.block import BlockUpdate
+from letta.schemas.enums import MessageStreamStatus
+from letta.schemas.letta_message import LegacyLettaMessage, LettaMessage
 from letta.schemas.letta_message_content import TextContent
-from letta.schemas.message import Message, MessageCreate
-from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
+from letta.schemas.letta_response import LettaResponse
+from letta.schemas.message import MessageCreate
+from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, SystemMessage, Tool, UserMessage
+from letta.schemas.usage import LettaUsageStatistics
 from letta.schemas.user import User
+from letta.server.rest_api.utils import convert_in_context_letta_messages_to_openai, create_input_messages
 from letta.services.agent_manager import AgentManager
+from letta.services.block_manager import BlockManager
 from letta.services.message_manager import MessageManager
 class EphemeralMemoryAgent(BaseAgent):
     """
     A stateless agent that helps with offline memory computations.
     """
     def __init__(
@@ -27,6 +32,9 @@ class EphemeralMemoryAgent(BaseAgent):
         openai_client: openai.AsyncClient,
         message_manager: MessageManager,
         agent_manager: AgentManager,
+        block_manager: BlockManager,
+        target_block_label: str,
+        message_transcripts: List[str],
         actor: User,
     ):
         super().__init__(
@@ -37,48 +45,122 @@ class EphemeralMemoryAgent(BaseAgent):
             actor=actor,
         )
-    async def step(self, input_messages: List[MessageCreate]) -> List[Message]:
+        self.block_manager = block_manager
+        self.target_block_label = target_block_label
+        self.message_transcripts = message_transcripts
+    def update_message_transcript(self, message_transcripts: List[str]):
+        self.message_transcripts = message_transcripts
+    async def step(self, input_messages: List[MessageCreate], max_steps: int = 10) -> LettaResponse:
         """
-        Synchronous method that takes a user's input text and returns a summary from OpenAI.
-        Returns a list of ephemeral Message objects containing both the user text and the assistant summary.
+        Process the user's input message, allowing the model to call memory-related tools
+        until it decides to stop and provide a final response.
         """
         agent_state = self.agent_manager.get_agent_by_id(agent_id=self.agent_id, actor=self.actor)
+        in_context_messages = create_input_messages(input_messages=input_messages, agent_id=self.agent_id, actor=self.actor)
+        openai_messages = convert_in_context_letta_messages_to_openai(in_context_messages, exclude_system_messages=True)
-        openai_messages = self.pre_process_input_message(input_messages=input_messages)
-        request = self._build_openai_request(openai_messages, agent_state)
+        # 1. Store memories
+        request = self._build_openai_request(
+            openai_messages, agent_state, tools=self._build_store_memory_tool_schemas(), system=self._get_memory_store_system_prompt()
+        )
         chat_completion = await self.openai_client.chat.completions.create(**request.model_dump(exclude_unset=True))
+        assistant_message = chat_completion.choices[0].message
-        return [
-            Message(
-                role=MessageRole.assistant,
-                content=[TextContent(text=chat_completion.choices[0].message.content.strip())],
-            )
-        ]
+        # Process tool calls
+        tool_call = assistant_message.tool_calls[0]
+        function_name = tool_call.function.name
+        function_args = json.loads(tool_call.function.arguments)
-    def pre_process_input_message(self, input_messages: List[MessageCreate]) -> List[Dict]:
-        input_message = input_messages[0]
-        input_prompt_augmented = f"""
-        You are a memory recall agent whose job is to comb through a large set of messages and write relevant memories in relation to a user query.
-        Your response will directly populate a "memory block" called "human" that describes the user, that will be used to answer more questions in the future.
-        You should err on the side of being more verbose, and also try to *predict* the trajectory of the conversation, and pull memories or messages you think will be relevant to where the conversation is going.
+        if function_name == "store_memory":
+            print("Called store_memory")
+            print(function_args)
+            for chunk_args in function_args.get("chunks"):
+                self.store_memory(agent_state=agent_state, **chunk_args)
+            result = "Successfully stored memories"
+        else:
+            raise ValueError("Error: Unknown tool function '{function_name}'")
-        Your response should include:
-        - A high level summary of the relevant events/timeline of the conversation relevant to the query
-        - Direct citations of quotes from the messages you used while creating the summary
+        openai_messages.append(
+            {
+                "role": "assistant",
+                "content": assistant_message.content,
+                "tool_calls": [
+                    {
+                        "id": tool_call.id,
+                        "type": "function",
+                        "function": {"name": function_name, "arguments": tool_call.function.arguments},
+                    }
+                ],
+            }
+        )
+        openai_messages.append({"role": "tool", "tool_call_id": tool_call.id, "content": str(result)})
-        Here is a history of the messages so far:
+        # 2. Execute rethink block memory loop
+        human_block_content = self.agent_manager.get_block_with_label(
+            agent_id=self.agent_id, block_label=self.target_block_label, actor=self.actor
+        )
+        rethink_command = f"""
+        Here is the current memory block created earlier:
-        {self._format_messages_llm_friendly()}
+### CURRENT MEMORY
+{human_block_content}
+### END CURRENT MEMORY
-        This is the query:
+Please refine this block:
-        "{input_message.content}"
+- Merge in any new facts and remove outdated or contradictory details.
+- Organize related information together (e.g., preferences, background, ongoing goals).
+- Add any light, supportable inferences that deepen understanding—but do not invent unsupported details.
-        Your response:
+Use `rethink_memory(new_memory)` as many times as you need to iteratively improve the text. When it’s fully polished and complete, call `finish_rethinking_memory()`.
         """
+        rethink_command = UserMessage(content=rethink_command)
+        openai_messages.append(rethink_command.model_dump())
+        for _ in range(max_steps):
+            request = self._build_openai_request(
+                openai_messages, agent_state, tools=self._build_sleeptime_tools(), system=self._get_rethink_memory_system_prompt()
+            )
+            chat_completion = await self.openai_client.chat.completions.create(**request.model_dump(exclude_unset=True))
+            assistant_message = chat_completion.choices[0].message
+            # Process tool calls
+            tool_call = assistant_message.tool_calls[0]
+            function_name = tool_call.function.name
+            function_args = json.loads(tool_call.function.arguments)
+            if function_name == "rethink_memory":
+                print("Called rethink_memory")
+                print(function_args)
+                result = self.rethink_memory(agent_state=agent_state, **function_args)
+            elif function_name == "finish_rethinking_memory":
+                print("Called finish_rethinking_memory")
+                break
+            else:
+                result = f"Error: Unknown tool function '{function_name}'"
+            openai_messages.append(
+                {
+                    "role": "assistant",
+                    "content": assistant_message.content,
+                    "tool_calls": [
+                        {
+                            "id": tool_call.id,
+                            "type": "function",
+                            "function": {"name": function_name, "arguments": tool_call.function.arguments},
+                        }
+                    ],
+                }
+            )
+            openai_messages.append({"role": "tool", "tool_call_id": tool_call.id, "content": str(result)})
+        # Actually save the memory:
+        target_block = agent_state.memory.get_block(self.target_block_label)
+        self.block_manager.update_block(block_id=target_block.id, block_update=BlockUpdate(value=target_block.value), actor=self.actor)
-        return [{"role": "user", "content": input_prompt_augmented}]
+        return LettaResponse(messages=[], usage=LettaUsageStatistics())
     def _format_messages_llm_friendly(self):
         messages = self.message_manager.list_messages_for_agent(agent_id=self.agent_id, actor=self.actor)
@@ -86,12 +168,15 @@ class EphemeralMemoryAgent(BaseAgent):
         llm_friendly_messages = [f"{m.role}: {m.content[0].text}" for m in messages if m.content and isinstance(m.content[0], TextContent)]
         return "\n".join(llm_friendly_messages)
-    def _build_openai_request(self, openai_messages: List[Dict], agent_state: AgentState) -> ChatCompletionRequest:
+    def _build_openai_request(
+        self, openai_messages: List[Dict], agent_state: AgentState, tools: List[Tool], system: str
+    ) -> ChatCompletionRequest:
+        system_message = SystemMessage(role="system", content=system)
         openai_request = ChatCompletionRequest(
-            model=agent_state.llm_config.model,
-            messages=openai_messages,
-            # tools=self._build_tool_schemas(agent_state),
-            # tool_choice="auto",
+            model="gpt-4o",  # agent_state.llm_config.model, # TODO: Separate config for summarizer?
+            messages=[system_message] + openai_messages,
+            tools=tools,
+            tool_choice="required",
             user=self.actor.id,
             max_completion_tokens=agent_state.llm_config.max_tokens,
             temperature=agent_state.llm_config.temperature,
@@ -99,14 +184,239 @@ class EphemeralMemoryAgent(BaseAgent):
         )
         return openai_request
-    def _build_tool_schemas(self, agent_state: AgentState) -> List[Tool]:
-        # Only include memory tools
-        tools = [t for t in agent_state.tools if t.tool_type in {ToolType.LETTA_CORE, ToolType.LETTA_MEMORY_CORE}]
+    def _build_store_memory_tool_schemas(self) -> List[Tool]:
+        """
+        Build the schemas for the three memory-related tools.
+        """
+        tools = [
+            Tool(
+                type="function",
+                function={
+                    "name": "store_memory",
+                    "description": "Archive coherent chunks of dialogue that will be evicted, preserving raw lines and a brief contextual description.",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "chunks": {
+                                "type": "array",
+                                "items": {
+                                    "type": "object",
+                                    "properties": {
+                                        "start_index": {"type": "integer", "description": "Index of first line in original history."},
+                                        "end_index": {"type": "integer", "description": "Index of last line in original history."},
+                                        "context": {
+                                            "type": "string",
+                                            "description": "A high-level description providing context for why this chunk matters.",
+                                        },
+                                    },
+                                    "required": ["start_index", "end_index", "context"],
+                                },
+                            }
+                        },
+                        "required": ["chunks"],
+                        "additionalProperties": False,
+                    },
+                },
+            ),
+        ]
+        return tools
+    def _build_sleeptime_tools(self) -> List[Tool]:
+        tools = [
+            Tool(
+                type="function",
+                function={
+                    "name": "rethink_memory",
+                    "description": (
+                        "Rewrite memory block for the main agent, new_memory should contain all current "
+                        "information from the block that is not outdated or inconsistent, integrating any "
+                        "new information, resulting in a new memory block that is organized, readable, and "
+                        "comprehensive."
+                    ),
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "new_memory": {
+                                "type": "string",
+                                "description": (
+                                    "The new memory with information integrated from the memory block. "
+                                    "If there is no new information, then this should be the same as the "
+                                    "content in the source block."
+                                ),
+                            },
+                        },
+                        "required": ["new_memory"],
+                        "additionalProperties": False,
+                    },
+                },
+            ),
+            Tool(
+                type="function",
+                function={
+                    "name": "finish_rethinking_memory",
+                    "description": ("This function is called when the agent is done rethinking the memory."),
+                    "parameters": {
+                        "type": "object",
+                        "properties": {},
+                        "required": [],
+                        "additionalProperties": False,
+                    },
+                },
+            ),
+        ]
+        return tools
-        return [Tool(type="function", function=enable_strict_mode(t.json_schema)) for t in tools]
+    def rethink_memory(self, new_memory: str, agent_state: AgentState) -> str:
+        if agent_state.memory.get_block(self.target_block_label) is None:
+            agent_state.memory.create_block(label=self.target_block_label, value=new_memory)
-    async def step_stream(self, input_messages: List[MessageCreate]) -> AsyncGenerator[str, None]:
+        agent_state.memory.update_block_value(label=self.target_block_label, value=new_memory)
+        return "Successfully updated memory"
+    def store_memory(self, start_index: int, end_index: int, context: str, agent_state: AgentState) -> str:
+        """
+        Store a memory.
+        """
+        try:
+            messages = self.message_transcripts[start_index : end_index + 1]
+            memory = self.serialize(messages, context)
+            self.agent_manager.passage_manager.insert_passage(
+                agent_state=agent_state,
+                agent_id=agent_state.id,
+                text=memory,
+                actor=self.actor,
+            )
+            self.agent_manager.rebuild_system_prompt(agent_id=agent_state.id, actor=self.actor, force=True)
+            return "Sucessfully stored memory"
+        except Exception as e:
+            return f"Failed to store memory given start_index {start_index} and end_index {end_index}: {e}"
+    def serialize(self, messages: List[str], context: str) -> str:
+        """
+        Produce an XML document like:
+        <memory>
+          <messages>
+            <message>…</message>
+            <message>…</message>
+            …
+          </messages>
+          <context>…</context>
+        </memory>
+        """
+        root = ET.Element("memory")
+        msgs_el = ET.SubElement(root, "messages")
+        for msg in messages:
+            m = ET.SubElement(msgs_el, "message")
+            m.text = msg
+        sum_el = ET.SubElement(root, "context")
+        sum_el.text = context
+        # ET.tostring will escape reserved chars for you
+        return ET.tostring(root, encoding="unicode")
+    def deserialize(self, xml_str: str) -> Tuple[List[str], str]:
+        """
+        Parse the XML back into (messages, context). Raises ValueError if tags are missing.
+        """
+        try:
+            root = ET.fromstring(xml_str)
+        except ET.ParseError as e:
+            raise ValueError(f"Invalid XML: {e}")
+        msgs_el = root.find("messages")
+        if msgs_el is None:
+            raise ValueError("Missing <messages> section")
+        messages = []
+        for m in msgs_el.findall("message"):
+            # .text may be None if empty, so coerce to empty string
+            messages.append(m.text or "")
+        sum_el = root.find("context")
+        if sum_el is None:
+            raise ValueError("Missing <context> section")
+        context = sum_el.text or ""
+        return messages, context
+    async def step_stream(
+        self, input_messages: List[MessageCreate], max_steps: int = 10
+    ) -> AsyncGenerator[Union[LettaMessage, LegacyLettaMessage, MessageStreamStatus], None]:
         """
         This agent is synchronous-only. If called in an async context, raise an error.
         """
         raise NotImplementedError("EphemeralMemoryAgent does not support async step.")
+    # TODO: Move these to independent text files
+    def _get_memory_store_system_prompt(self) -> str:
+        return """
+You are a memory-recall assistant working asynchronously alongside a main chat agent that retains only a portion of the message history in its context window.
+When given a full transcript with lines marked (Older) or (Newer), you should:
+1. Segment the (Older) portion into coherent chunks by topic, instruction, or preference.
+2. For each chunk, produce only:
+   - start_index: the first line’s index
+   - end_index:   the last line’s index
+   - context: a blurb explaining why this chunk matters
+Return exactly one JSON tool call to `store_memory`, consider this miniature example:
+---
+(Older)
+0. user: Okay. Got it. Keep your answers shorter, please.
+1. assistant: Sure thing! I’ll keep it brief. What would you like to know?
+2. user: I like basketball.
+3. assistant: That's great! Do you have a favorite team or player?
+(Newer)
+4. user: Yeah. I like basketball.
+5. assistant: Awesome! What do you enjoy most about basketball?
+---
+Example output:
+```json
+{
+  "name": "store_memory",
+  "arguments": {
+    "chunks": [
+      {
+        "start_index": 0,
+        "end_index": 1,
+        "context": "User explicitly asked the assistant to keep responses concise."
+      },
+      {
+        "start_index": 2,
+        "end_index": 3,
+        "context": "User enjoys basketball and prompted follow-up about their favorite team or player."
+      }
+    ]
+  }
+}
+```
+    """
+    def _get_rethink_memory_system_prompt(self) -> str:
+        return """
+SYSTEM
+You are a Memory-Updater agent. Your job is to iteratively refine the given memory block until it’s concise, organized, and complete.
+Instructions:
+- Call `rethink_memory(new_memory: string)` as many times as you like. Each call should submit a fully revised version of the block so far.
+- When you’re fully satisfied, call `finish_rethinking_memory()`.
+- Don’t output anything else—only the JSON for these tool calls.
+Goals:
+- Merge in new facts and remove contradictions.
+- Group related details (preferences, biography, goals).
+- Draw light, supportable inferences without inventing facts.
+- Preserve every critical piece of information.
+    """

letta-nightly 0.7.5.dev20250428110034__py3-none-any.whl → 0.7.6.dev20250429062643__py3-none-any.whl

letta-nightly 0.7.5.dev20250428110034py3-none-any.whl → 0.7.6.dev20250429062643py3-none-any.whl