PyPI - letta-nightly - Versions diffs - 0.6.43.dev20250320104204__py3-none-any.whl → 0.6.43.dev20250322104133__py3-none-any.whl - Mend

letta-nightly 0.6.43.dev20250320104204py3-none-any.whl → 0.6.43.dev20250322104133py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (32) hide show

letta/agent.py +2 -2
letta/agents/ephemeral_memory_agent.py +114 -0
letta/agents/{low_latency_agent.py → voice_agent.py} +133 -79
letta/client/client.py +1 -1
letta/embeddings.py +3 -14
letta/functions/function_sets/multi_agent.py +46 -1
letta/functions/helpers.py +10 -57
letta/functions/mcp_client/base_client.py +7 -9
letta/functions/mcp_client/exceptions.py +6 -0
letta/helpers/tool_execution_helper.py +9 -7
letta/llm_api/anthropic.py +1 -19
letta/llm_api/aws_bedrock.py +2 -2
letta/llm_api/azure_openai.py +22 -46
letta/llm_api/llm_api_tools.py +15 -4
letta/orm/sqlalchemy_base.py +106 -7
letta/schemas/openai/chat_completion_request.py +20 -1
letta/schemas/providers.py +251 -0
letta/schemas/tool.py +4 -1
letta/server/rest_api/app.py +1 -11
letta/server/rest_api/optimistic_json_parser.py +5 -5
letta/server/rest_api/routers/v1/tools.py +34 -2
letta/server/rest_api/routers/v1/voice.py +5 -5
letta/server/server.py +6 -0
letta/services/agent_manager.py +1 -1
letta/services/block_manager.py +8 -6
letta/services/message_manager.py +65 -2
letta/settings.py +3 -3
{letta_nightly-0.6.43.dev20250320104204.dist-info → letta_nightly-0.6.43.dev20250322104133.dist-info}/METADATA +4 -4
{letta_nightly-0.6.43.dev20250320104204.dist-info → letta_nightly-0.6.43.dev20250322104133.dist-info}/RECORD +32 -30
{letta_nightly-0.6.43.dev20250320104204.dist-info → letta_nightly-0.6.43.dev20250322104133.dist-info}/LICENSE +0 -0
{letta_nightly-0.6.43.dev20250320104204.dist-info → letta_nightly-0.6.43.dev20250322104133.dist-info}/WHEEL +0 -0
{letta_nightly-0.6.43.dev20250320104204.dist-info → letta_nightly-0.6.43.dev20250322104133.dist-info}/entry_points.txt +0 -0

letta/agent.py CHANGED Viewed

@@ -522,7 +522,7 @@ class Agent(BaseAgent):
                     openai_message_dict=response_message.model_dump(),
                 )
             )  # extend conversation with assistant's reply
-            self.logger.info(f"Function call message: {messages[-1]}")
+            self.logger.debug(f"Function call message: {messages[-1]}")
             nonnull_content = False
             if response_message.content:
@@ -537,7 +537,7 @@ class Agent(BaseAgent):
                 response_message.function_call if response_message.function_call is not None else response_message.tool_calls[0].function
             )
             function_name = function_call.name
-            self.logger.info(f"Request to call function {function_name} with tool_call_id: {tool_call_id}")
+            self.logger.debug(f"Request to call function {function_name} with tool_call_id: {tool_call_id}")
             # Failure case 1: function name is wrong (not in agent_state.tools)
             target_letta_tool = None

letta/agents/ephemeral_memory_agent.py ADDED Viewed

@@ -0,0 +1,114 @@
+from typing import AsyncGenerator, Dict, List
+import openai
+from letta.agents.base_agent import BaseAgent
+from letta.helpers.tool_execution_helper import enable_strict_mode
+from letta.orm.enums import ToolType
+from letta.schemas.agent import AgentState
+from letta.schemas.enums import MessageRole
+from letta.schemas.letta_message import UserMessage
+from letta.schemas.letta_message_content import TextContent
+from letta.schemas.message import Message
+from letta.schemas.openai.chat_completion_request import ChatCompletionRequest, Tool
+from letta.schemas.user import User
+from letta.services.agent_manager import AgentManager
+from letta.services.message_manager import MessageManager
+class EphemeralMemoryAgent(BaseAgent):
+    """
+    A stateless agent that helps with offline memory computations.
+    """
+    def __init__(
+        self,
+        agent_id: str,
+        openai_client: openai.AsyncClient,
+        message_manager: MessageManager,
+        agent_manager: AgentManager,
+        actor: User,
+    ):
+        super().__init__(
+            agent_id=agent_id,
+            openai_client=openai_client,
+            message_manager=message_manager,
+            agent_manager=agent_manager,
+            actor=actor,
+        )
+    async def step(self, input_message: UserMessage) -> List[Message]:
+        """
+        Synchronous method that takes a user's input text and returns a summary from OpenAI.
+        Returns a list of ephemeral Message objects containing both the user text and the assistant summary.
+        """
+        agent_state = self.agent_manager.get_agent_by_id(agent_id=self.agent_id, actor=self.actor)
+        input_message = self.pre_process_input_message(input_message=input_message)
+        request = self._build_openai_request([input_message], agent_state)
+        chat_completion = await self.openai_client.chat.completions.create(**request.model_dump(exclude_unset=True))
+        return [
+            Message(
+                role=MessageRole.assistant,
+                content=[TextContent(text=chat_completion.choices[0].message.content.strip())],
+            )
+        ]
+    def pre_process_input_message(self, input_message: UserMessage) -> Dict:
+        input_prompt_augmented = f"""
+        You are a memory recall agent whose job is to comb through a large set of messages and write relevant memories in relation to a user query.
+        Your response will directly populate a "memory block" called "human" that describes the user, that will be used to answer more questions in the future.
+        You should err on the side of being more verbose, and also try to *predict* the trajectory of the conversation, and pull memories or messages you think will be relevant to where the conversation is going.
+        Your response should include:
+        - A high level summary of the relevant events/timeline of the conversation relevant to the query
+        - Direct citations of quotes from the messages you used while creating the summary
+        Here is a history of the messages so far:
+        {self._format_messages_llm_friendly()}
+        This is the query:
+        "{input_message.content}"
+        Your response:
+        """
+        input_message.content = input_prompt_augmented
+        # print(input_prompt_augmented)
+        return input_message.model_dump()
+    def _format_messages_llm_friendly(self):
+        messages = self.message_manager.list_messages_for_agent(agent_id=self.agent_id, actor=self.actor)
+        llm_friendly_messages = [f"{m.role}: {m.content[0].text}" for m in messages if m.content and isinstance(m.content[0], TextContent)]
+        return "\n".join(llm_friendly_messages)
+    def _build_openai_request(self, openai_messages: List[Dict], agent_state: AgentState) -> ChatCompletionRequest:
+        openai_request = ChatCompletionRequest(
+            model=agent_state.llm_config.model,
+            messages=openai_messages,
+            # tools=self._build_tool_schemas(agent_state),
+            # tool_choice="auto",
+            user=self.actor.id,
+            max_completion_tokens=agent_state.llm_config.max_tokens,
+            temperature=agent_state.llm_config.temperature,
+            stream=False,
+        )
+        return openai_request
+    def _build_tool_schemas(self, agent_state: AgentState) -> List[Tool]:
+        # Only include memory tools
+        tools = [t for t in agent_state.tools if t.tool_type in {ToolType.LETTA_CORE, ToolType.LETTA_MEMORY_CORE}]
+        return [Tool(type="function", function=enable_strict_mode(t.json_schema)) for t in tools]
+    async def step_stream(self, input_message: UserMessage) -> AsyncGenerator[str, None]:
+        """
+        This agent is synchronous-only. If called in an async context, raise an error.
+        """
+        raise NotImplementedError("EphemeralMemoryAgent does not support async step.")

letta/agents/{low_latency_agent.py → voice_agent.py} RENAMED Viewed

@@ -5,7 +5,7 @@ from typing import Any, AsyncGenerator, Dict, List, Tuple
 import openai
 from letta.agents.base_agent import BaseAgent
-from letta.agents.ephemeral_agent import EphemeralAgent
+from letta.agents.ephemeral_memory_agent import EphemeralMemoryAgent
 from letta.constants import NON_USER_MSG_PREFIX
 from letta.helpers.datetime_helpers import get_utc_time
 from letta.helpers.tool_execution_helper import (
@@ -42,13 +42,12 @@ from letta.services.helpers.agent_manager_helper import compile_system_message
 from letta.services.message_manager import MessageManager
 from letta.services.passage_manager import PassageManager
 from letta.services.summarizer.enums import SummarizationMode
-from letta.services.summarizer.summarizer import Summarizer
 from letta.utils import united_diff
 logger = get_logger(__name__)
-class LowLatencyAgent(BaseAgent):
+class VoiceAgent(BaseAgent):
     """
     A function-calling loop for streaming OpenAI responses with tool execution.
     This agent:
@@ -65,9 +64,9 @@ class LowLatencyAgent(BaseAgent):
         agent_manager: AgentManager,
         block_manager: BlockManager,
         actor: User,
+        message_buffer_limit: int,
+        message_buffer_min: int,
         summarization_mode: SummarizationMode = SummarizationMode.STATIC_MESSAGE_BUFFER,
-        message_buffer_limit: int = 10,
-        message_buffer_min: int = 4,
     ):
         super().__init__(
             agent_id=agent_id, openai_client=openai_client, message_manager=message_manager, agent_manager=agent_manager, actor=actor
@@ -79,75 +78,78 @@ class LowLatencyAgent(BaseAgent):
         self.passage_manager = PassageManager()  # TODO: pass this in
         # TODO: This is not guaranteed to exist!
         self.summary_block_label = "human"
-        self.summarizer = Summarizer(
-            mode=summarization_mode,
-            summarizer_agent=EphemeralAgent(
-                agent_id=agent_id, openai_client=openai_client, message_manager=message_manager, agent_manager=agent_manager, actor=actor
-            ),
-            message_buffer_limit=message_buffer_limit,
-            message_buffer_min=message_buffer_min,
-        )
+        # self.summarizer = Summarizer(
+        #     mode=summarization_mode,
+        #     summarizer_agent=EphemeralAgent(
+        #         agent_id=agent_id, openai_client=openai_client, message_manager=message_manager, agent_manager=agent_manager, actor=actor
+        #     ),
+        #     message_buffer_limit=message_buffer_limit,
+        #     message_buffer_min=message_buffer_min,
+        # )
         self.message_buffer_limit = message_buffer_limit
-        self.message_buffer_min = message_buffer_min
+        # self.message_buffer_min = message_buffer_min
+        self.offline_memory_agent = EphemeralMemoryAgent(
+            agent_id=agent_id, openai_client=openai_client, message_manager=message_manager, agent_manager=agent_manager, actor=actor
+        )
     async def step(self, input_message: UserMessage) -> List[Message]:
         raise NotImplementedError("LowLatencyAgent does not have a synchronous step implemented currently.")
     async def step_stream(self, input_message: UserMessage) -> AsyncGenerator[str, None]:
         """
-        Async generator that yields partial tokens as SSE events, handles tool calls,
-        and streams error messages if OpenAI API failures occur.
+        Main streaming loop that yields partial tokens.
+        Whenever we detect a tool call, we yield from _handle_ai_response as well.
         """
-        input_message = self.pre_process_input_message(input_message=input_message)
-        agent_state = self.agent_manager.get_agent_by_id(agent_id=self.agent_id, actor=self.actor)
+        input_message = self.pre_process_input_message(input_message)
+        agent_state = self.agent_manager.get_agent_by_id(self.agent_id, actor=self.actor)
         in_context_messages = self.message_manager.get_messages_by_ids(message_ids=agent_state.message_ids, actor=self.actor)
         letta_message_db_queue = [create_user_message(input_message=input_message, agent_id=agent_state.id, actor=self.actor)]
         in_memory_message_history = [input_message]
+        # TODO: Define max steps here
         while True:
-            # Constantly pull down and integrate memory blocks
-            in_context_messages = self._rebuild_memory(in_context_messages=in_context_messages, agent_state=agent_state)
-            # Convert Letta messages to OpenAI messages
+            # Rebuild memory each loop
+            in_context_messages = self._rebuild_memory(in_context_messages, agent_state)
             openai_messages = convert_letta_messages_to_openai(in_context_messages)
             openai_messages.extend(in_memory_message_history)
             request = self._build_openai_request(openai_messages, agent_state)
-            # Execute the request
             stream = await self.openai_client.chat.completions.create(**request.model_dump(exclude_unset=True))
             streaming_interface = OpenAIChatCompletionsStreamingInterface(stream_pre_execution_message=True)
-            async for sse in streaming_interface.process(stream):
-                yield sse
+            # 1) Yield partial tokens from OpenAI
+            async for sse_chunk in streaming_interface.process(stream):
+                yield sse_chunk
-            # Process the AI response (buffered messages, tool execution, etc.)
-            continue_execution = await self._handle_ai_response(
-                streaming_interface, agent_state, in_memory_message_history, letta_message_db_queue
+            # 2) Now handle the final AI response. This might yield more text (stalling, etc.)
+            should_continue = await self._handle_ai_response(
+                streaming_interface,
+                agent_state,
+                in_memory_message_history,
+                letta_message_db_queue,
             )
-            if not continue_execution:
+            if not should_continue:
                 break
-        # Rebuild context window
+        # Rebuild context window if desired
         await self._rebuild_context_window(in_context_messages, letta_message_db_queue, agent_state)
         yield "data: [DONE]\n\n"
     async def _handle_ai_response(
         self,
-        streaming_interface: OpenAIChatCompletionsStreamingInterface,
+        streaming_interface: "OpenAIChatCompletionsStreamingInterface",
         agent_state: AgentState,
         in_memory_message_history: List[Dict[str, Any]],
         letta_message_db_queue: List[Any],
     ) -> bool:
         """
-        Handles AI response processing, including buffering messages, detecting tool calls,
-        executing tools, and deciding whether to continue execution.
-        Returns:
-            bool: True if execution should continue, False if the step loop should terminate.
+        Now that streaming is done, handle the final AI response.
+        This might yield additional SSE tokens if we do stalling.
+        At the end, set self._continue_execution accordingly.
         """
-        # Handle assistant message buffering
+        # 1. If we have any leftover content from partial stream, store it as an assistant message
         if streaming_interface.content_buffer:
             content = "".join(streaming_interface.content_buffer)
             in_memory_message_history.append({"role": "assistant", "content": content})
@@ -160,82 +162,92 @@ class LowLatencyAgent(BaseAgent):
             )
             letta_message_db_queue.extend(assistant_msgs)
-        # Handle tool execution if a tool call occurred
+        # 2. If a tool call was requested, handle it
         if streaming_interface.tool_call_happened:
+            tool_call_name = streaming_interface.tool_call_name
+            tool_call_args_str = streaming_interface.tool_call_args_str or "{}"
             try:
-                tool_args = json.loads(streaming_interface.tool_call_args_str)
+                tool_args = json.loads(tool_call_args_str)
             except json.JSONDecodeError:
                 tool_args = {}
             tool_call_id = streaming_interface.tool_call_id or f"call_{uuid.uuid4().hex[:8]}"
             assistant_tool_call_msg = AssistantMessage(
                 content=None,
                 tool_calls=[
                     ToolCall(
                         id=tool_call_id,
                         function=ToolCallFunction(
-                            name=streaming_interface.tool_call_name,
-                            arguments=streaming_interface.tool_call_args_str,
+                            name=tool_call_name,
+                            arguments=tool_call_args_str,
                         ),
                     )
                 ],
             )
             in_memory_message_history.append(assistant_tool_call_msg.model_dump())
-            tool_result, function_call_success = await self._execute_tool(
-                tool_name=streaming_interface.tool_call_name,
+            tool_result, success_flag = await self._execute_tool(
+                tool_name=tool_call_name,
                 tool_args=tool_args,
                 agent_state=agent_state,
             )
-            tool_message = ToolMessage(content=json.dumps({"result": tool_result}), tool_call_id=tool_call_id)
+            # 3. Provide function_call response back into the conversation
+            tool_message = ToolMessage(
+                content=json.dumps({"result": tool_result}),
+                tool_call_id=tool_call_id,
+            )
             in_memory_message_history.append(tool_message.model_dump())
+            # 4. Insert heartbeat message for follow-up
             heartbeat_user_message = UserMessage(
                 content=f"{NON_USER_MSG_PREFIX} Tool finished executing. Summarize the result for the user."
             )
             in_memory_message_history.append(heartbeat_user_message.model_dump())
+            # 5. Also store in DB
             tool_call_messages = create_tool_call_messages_from_openai_response(
                 agent_id=agent_state.id,
                 model=agent_state.llm_config.model,
-                function_name=streaming_interface.tool_call_name,
+                function_name=tool_call_name,
                 function_arguments=tool_args,
                 tool_call_id=tool_call_id,
-                function_call_success=function_call_success,
+                function_call_success=success_flag,
                 function_response=tool_result,
                 actor=self.actor,
                 add_heartbeat_request_system_message=True,
             )
             letta_message_db_queue.extend(tool_call_messages)
-            # Continue execution by restarting the loop with updated context
+            # Because we have new data, we want to continue the while-loop in `step_stream`
             return True
-        # Exit the loop if finish_reason_stop or no tool call occurred
-        return not streaming_interface.finish_reason_stop
+        else:
+            # If we got here, there's no tool call. If finish_reason_stop => done
+            return not streaming_interface.finish_reason_stop
     async def _rebuild_context_window(
         self, in_context_messages: List[Message], letta_message_db_queue: List[Message], agent_state: AgentState
     ) -> None:
         new_letta_messages = self.message_manager.create_many_messages(letta_message_db_queue, actor=self.actor)
+        new_in_context_messages = in_context_messages + new_letta_messages
-        # TODO: Make this more general and configurable, less brittle
-        target_block = next(b for b in agent_state.memory.blocks if b.label == self.summary_block_label)
-        previous_summary = self.block_manager.get_block_by_id(block_id=target_block.id, actor=self.actor).value
-        new_in_context_messages, summary_str, updated = await self.summarizer.summarize(
-            in_context_messages=in_context_messages, new_letta_messages=new_letta_messages, previous_summary=previous_summary
-        )
-        if updated:
-            self.block_manager.update_block(block_id=target_block.id, block_update=BlockUpdate(value=summary_str), actor=self.actor)
+        if len(new_in_context_messages) > self.message_buffer_limit:
+            cutoff = len(new_in_context_messages) - self.message_buffer_limit
+            new_in_context_messages = [new_in_context_messages[0]] + new_in_context_messages[cutoff:]
         self.agent_manager.set_in_context_messages(
             agent_id=self.agent_id, message_ids=[m.id for m in new_in_context_messages], actor=self.actor
         )
     def _rebuild_memory(self, in_context_messages: List[Message], agent_state: AgentState) -> List[Message]:
+        # Refresh memory
+        # TODO: This only happens for the summary block
+        # TODO: We want to extend this refresh to be general, and stick it in agent_manager
+        for i, b in enumerate(agent_state.memory.blocks):
+            if b.label == self.summary_block_label:
+                agent_state.memory.blocks[i] = self.block_manager.get_block_by_id(block_id=b.id, actor=self.actor)
+                break
         # TODO: This is a pretty brittle pattern established all over our code, need to get rid of this
         curr_system_message = in_context_messages[0]
         curr_memory_str = agent_state.memory.compile()
@@ -249,8 +261,8 @@ class LowLatencyAgent(BaseAgent):
         memory_edit_timestamp = get_utc_time()
-        num_messages = self.message_manager.size(actor=actor, agent_id=agent_id)
-        num_archival_memories = self.passage_manager.size(actor=actor, agent_id=agent_id)
+        num_messages = self.message_manager.size(actor=self.actor, agent_id=agent_state.id)
+        num_archival_memories = self.passage_manager.size(actor=self.actor, agent_id=agent_state.id)
         new_system_message_str = compile_system_message(
             system_prompt=agent_state.system,
@@ -296,8 +308,37 @@ class LowLatencyAgent(BaseAgent):
         else:
             tools = agent_state.tools
+        # Special tool state
+        recall_memory_utterance_description = (
+            "A lengthier message to be uttered while your memories of the current conversation are being re-contextualized."
+            "You should stall naturally and show the user you're thinking hard. The main thing is to not leave the user in silence."
+            "You MUST also include punctuation at the end of this message."
+        )
+        recall_memory_json = Tool(
+            type="function",
+            function=enable_strict_mode(
+                add_pre_execution_message(
+                    {
+                        "name": "recall_memory",
+                        "description": "Retrieve relevant information from memory based on a given query. Use when you don't remember the answer to a question.",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "query": {
+                                    "type": "string",
+                                    "description": "A description of what the model is trying to recall from memory.",
+                                }
+                            },
+                            "required": ["query"],
+                        },
+                    },
+                    description=recall_memory_utterance_description,
+                )
+            ),
+        )
         # TODO: Customize whether or not to have heartbeats, pre_exec_message, etc.
-        return [
+        return [recall_memory_json] + [
             Tool(type="function", function=enable_strict_mode(add_pre_execution_message(remove_request_heartbeat(t.json_schema))))
             for t in tools
         ]
@@ -306,19 +347,32 @@ class LowLatencyAgent(BaseAgent):
         """
         Executes a tool and returns (result, success_flag).
         """
-        target_tool = next((x for x in agent_state.tools if x.name == tool_name), None)
-        if not target_tool:
-            return f"Tool not found: {tool_name}", False
+        # Special memory case
+        if tool_name == "recall_memory":
+            # TODO: Make this safe
+            await self._recall_memory(tool_args["query"], agent_state)
+            return f"Successfully recalled memory and populated {self.summary_block_label} block.", True
+        else:
+            target_tool = next((x for x in agent_state.tools if x.name == tool_name), None)
+            if not target_tool:
+                return f"Tool not found: {tool_name}", False
-        try:
-            tool_result, _ = execute_external_tool(
-                agent_state=agent_state,
-                function_name=tool_name,
-                function_args=tool_args,
-                target_letta_tool=target_tool,
-                actor=self.actor,
-                allow_agent_state_modifications=False,
-            )
-            return tool_result, True
-        except Exception as e:
-            return f"Failed to call tool. Error: {e}", False
+            try:
+                tool_result, _ = execute_external_tool(
+                    agent_state=agent_state,
+                    function_name=tool_name,
+                    function_args=tool_args,
+                    target_letta_tool=target_tool,
+                    actor=self.actor,
+                    allow_agent_state_modifications=False,
+                )
+                return tool_result, True
+            except Exception as e:
+                return f"Failed to call tool. Error: {e}", False
+    async def _recall_memory(self, query, agent_state: AgentState) -> None:
+        results = await self.offline_memory_agent.step(UserMessage(content=query))
+        target_block = next(b for b in agent_state.memory.blocks if b.label == self.summary_block_label)
+        self.block_manager.update_block(
+            block_id=target_block.id, block_update=BlockUpdate(value=results[0].content[0].text), actor=self.actor
+        )

letta/client/client.py CHANGED Viewed

@@ -2937,7 +2937,6 @@ class LocalClient(AbstractClient):
         Args:
             func (callable): The function to create a tool for.
-            name: (str): Name of the tool (must be unique per-user.)
             tags (Optional[List[str]], optional): Tags for the tool. Defaults to None.
             description (str, optional): The description.
             return_char_limit (int): The character limit for the tool's return value. Defaults to FUNCTION_RETURN_CHAR_LIMIT.
@@ -2950,6 +2949,7 @@ class LocalClient(AbstractClient):
         # parse source code/schema
         source_code = parse_source_code(func)
         source_type = "python"
+        name = func.__name__  # Initialize name using function's __name__
         if not tags:
             tags = []

letta/embeddings.py CHANGED Viewed

@@ -235,7 +235,9 @@ def embedding_model(config: EmbeddingConfig, user_id: Optional[uuid.UUID] = None
     if endpoint_type == "openai":
         return OpenAIEmbeddings(
-            api_key=model_settings.openai_api_key, model=config.embedding_model, base_url=model_settings.openai_api_base
+            api_key=model_settings.openai_api_key,
+            model=config.embedding_model,
+            base_url=model_settings.openai_api_base,
         )
     elif endpoint_type == "azure":
@@ -246,19 +248,6 @@ def embedding_model(config: EmbeddingConfig, user_id: Optional[uuid.UUID] = None
                 model_settings.azure_api_version is not None,
             ]
         )
-        # from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
-        ## https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#embeddings
-        # model = "text-embedding-ada-002"
-        # deployment = credentials.azure_embedding_deployment if credentials.azure_embedding_deployment is not None else model
-        # return AzureOpenAIEmbedding(
-        #    model=model,
-        #    deployment_name=deployment,
-        #    api_key=credentials.azure_key,
-        #    azure_endpoint=credentials.azure_endpoint,
-        #    api_version=credentials.azure_version,
-        # )
         return AzureOpenAIEmbedding(
             api_endpoint=model_settings.azure_base_url,
             api_key=model_settings.azure_api_key,

letta/functions/function_sets/multi_agent.py CHANGED Viewed

@@ -9,6 +9,8 @@ from letta.functions.helpers import (
 )
 from letta.schemas.enums import MessageRole
 from letta.schemas.message import MessageCreate
+from letta.server.rest_api.utils import get_letta_server
+from letta.utils import log_telemetry
 if TYPE_CHECKING:
     from letta.agent import Agent
@@ -85,8 +87,51 @@ def send_message_to_agents_matching_tags(self: "Agent", message: str, match_all:
         response corresponds to a single agent. Agents that do not respond will not have an entry
         in the returned list.
     """
+    log_telemetry(
+        self.logger,
+        "_send_message_to_agents_matching_tags_async start",
+        message=message,
+        match_all=match_all,
+        match_some=match_some,
+    )
+    server = get_letta_server()
+    augmented_message = (
+        f"[Incoming message from agent with ID '{self.agent_state.id}' - to reply to this message, "
+        f"make sure to use the 'send_message' at the end, and the system will notify the sender of your response] "
+        f"{message}"
+    )
-    return asyncio.run(_send_message_to_agents_matching_tags_async(self, message, match_all, match_some))
+    # Retrieve up to 100 matching agents
+    log_telemetry(
+        self.logger,
+        "_send_message_to_agents_matching_tags_async listing agents start",
+        message=message,
+        match_all=match_all,
+        match_some=match_some,
+    )
+    matching_agents = server.agent_manager.list_agents_matching_tags(actor=self.user, match_all=match_all, match_some=match_some)
+    log_telemetry(
+        self.logger,
+        "_send_message_to_agents_matching_tags_async  listing agents finish",
+        message=message,
+        match_all=match_all,
+        match_some=match_some,
+    )
+    # Create a system message
+    messages = [MessageCreate(role=MessageRole.system, content=augmented_message, name=self.agent_state.name)]
+    result = asyncio.run(_send_message_to_agents_matching_tags_async(self, server, messages, matching_agents))
+    log_telemetry(
+        self.logger,
+        "_send_message_to_agents_matching_tags_async finish",
+        messages=message,
+        match_all=match_all,
+        match_some=match_some,
+    )
+    return result
 def send_message_to_all_agents_in_group(self: "Agent", message: str) -> List[str]:

letta-nightly 0.6.43.dev20250320104204__py3-none-any.whl → 0.6.43.dev20250322104133__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.6.43.dev20250320104204py3-none-any.whl → 0.6.43.dev20250322104133py3-none-any.whl