PyPI - letta-nightly - Versions diffs - 0.6.16.dev20250128104041__py3-none-any.whl → 0.6.17.dev20250129174639__py3-none-any.whl - Mend

letta-nightly 0.6.16.dev20250128104041py3-none-any.whl → 0.6.17.dev20250129174639py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of letta-nightly might be problematic. Click here for more details.

Files changed (35) hide show

letta/__init__.py +1 -1
letta/agent.py +0 -3
letta/client/client.py +5 -5
letta/client/streaming.py +29 -20
letta/constants.py +1 -1
letta/functions/function_sets/multi_agent.py +55 -49
letta/functions/functions.py +0 -1
letta/functions/helpers.py +149 -9
letta/llm_api/llm_api_tools.py +20 -12
letta/llm_api/openai.py +15 -13
letta/orm/agent.py +14 -2
letta/orm/job.py +1 -1
letta/orm/sqlalchemy_base.py +12 -4
letta/schemas/job.py +17 -1
letta/schemas/letta_request.py +2 -7
letta/schemas/llm_config.py +9 -0
letta/schemas/message.py +51 -22
letta/schemas/openai/chat_completion_response.py +2 -2
letta/schemas/run.py +1 -2
letta/server/rest_api/app.py +5 -1
letta/server/rest_api/chat_completions_interface.py +256 -0
letta/server/rest_api/optimistic_json_parser.py +185 -0
letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +161 -0
letta/server/rest_api/routers/v1/agents.py +22 -32
letta/server/server.py +12 -12
letta/services/job_manager.py +7 -12
letta/services/tool_manager.py +17 -1
letta/system.py +20 -0
letta/utils.py +24 -1
{letta_nightly-0.6.16.dev20250128104041.dist-info → letta_nightly-0.6.17.dev20250129174639.dist-info}/METADATA +4 -4
{letta_nightly-0.6.16.dev20250128104041.dist-info → letta_nightly-0.6.17.dev20250129174639.dist-info}/RECORD +35 -31
{letta_nightly-0.6.16.dev20250128104041.dist-info → letta_nightly-0.6.17.dev20250129174639.dist-info}/LICENSE +0 -0
{letta_nightly-0.6.16.dev20250128104041.dist-info → letta_nightly-0.6.17.dev20250129174639.dist-info}/WHEEL +0 -0
{letta_nightly-0.6.16.dev20250128104041.dist-info → letta_nightly-0.6.17.dev20250129174639.dist-info}/entry_points.txt +0 -0

letta/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-__version__ = "0.6.16"
+__version__ = "0.6.17"
 # import clients

letta/agent.py CHANGED Viewed

@@ -108,9 +108,6 @@ class Agent(BaseAgent):
                 if not isinstance(rule, TerminalToolRule):
                     warnings.warn("Tool rules only work reliably for the latest OpenAI models that support structured outputs.")
                     break
-        # add default rule for having send_message be a terminal tool
-        if agent_state.tool_rules is None:
-            agent_state.tool_rules = []
         self.tool_rules_solver = ToolRulesSolver(tool_rules=agent_state.tool_rules)

letta/client/client.py CHANGED Viewed

@@ -280,7 +280,7 @@ class AbstractClient(object):
     def get_messages(
         self, agent_id: str, after: Optional[str] = None, before: Optional[str] = None, limit: Optional[int] = 1000
-    ) -> List[Message]:
+    ) -> List[LettaMessage]:
         raise NotImplementedError
     def list_model_configs(self) -> List[LLMConfig]:
@@ -812,7 +812,6 @@ class RESTClient(AbstractClient):
         Returns:
             memory (Memory): In-context memory of the agent
         """
         response = requests.get(f"{self.base_url}/{self.api_prefix}/agents/{agent_id}/core-memory", headers=self.headers)
         if response.status_code != 200:
             raise ValueError(f"Failed to get in-context memory: {response.text}")
@@ -966,7 +965,7 @@ class RESTClient(AbstractClient):
     def get_messages(
         self, agent_id: str, before: Optional[str] = None, after: Optional[str] = None, limit: Optional[int] = 1000
-    ) -> List[Message]:
+    ) -> List[LettaMessage]:
         """
         Get messages from an agent with pagination.
@@ -984,7 +983,7 @@ class RESTClient(AbstractClient):
         response = requests.get(f"{self.base_url}/{self.api_prefix}/agents/{agent_id}/messages", params=params, headers=self.headers)
         if response.status_code != 200:
             raise ValueError(f"Failed to get messages: {response.text}")
-        return [Message(**message) for message in response.json()]
+        return [LettaMessage(**message) for message in response.json()]
     def send_message(
         self,
@@ -3356,7 +3355,7 @@ class LocalClient(AbstractClient):
     def get_messages(
         self, agent_id: str, before: Optional[str] = None, after: Optional[str] = None, limit: Optional[int] = 1000
-    ) -> List[Message]:
+    ) -> List[LettaMessage]:
         """
         Get messages from an agent with pagination.
@@ -3378,6 +3377,7 @@ class LocalClient(AbstractClient):
             after=after,
             limit=limit,
             reverse=True,
+            return_message_object=False,
         )
     def list_blocks(self, label: Optional[str] = None, templates_only: Optional[bool] = True) -> List[Block]:

letta/client/streaming.py CHANGED Viewed

@@ -1,18 +1,22 @@
 import json
-from typing import Generator
+from typing import Generator, Union, get_args
 import httpx
 from httpx_sse import SSEError, connect_sse
+from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
 from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
 from letta.errors import LLMError
+from letta.log import get_logger
 from letta.schemas.enums import MessageStreamStatus
 from letta.schemas.letta_message import AssistantMessage, ReasoningMessage, ToolCallMessage, ToolReturnMessage
 from letta.schemas.letta_response import LettaStreamingResponse
 from letta.schemas.usage import LettaUsageStatistics
+logger = get_logger(__name__)
-def _sse_post(url: str, data: dict, headers: dict) -> Generator[LettaStreamingResponse, None, None]:
+def _sse_post(url: str, data: dict, headers: dict) -> Generator[Union[LettaStreamingResponse, ChatCompletionChunk], None, None]:
     with httpx.Client() as client:
         with connect_sse(client, method="POST", url=url, json=data, headers=headers) as event_source:
@@ -20,22 +24,26 @@ def _sse_post(url: str, data: dict, headers: dict) -> Generator[LettaStreamingRe
             # Inspect for errors before iterating (see https://github.com/florimondmanca/httpx-sse/pull/12)
             if not event_source.response.is_success:
                 # handle errors
-                from letta.utils import printd
+                pass
-                printd("Caught error before iterating SSE request:", vars(event_source.response))
-                printd(event_source.response.read())
+                logger.warning("Caught error before iterating SSE request:", vars(event_source.response))
+                logger.warning(event_source.response.read().decode("utf-8"))
                 try:
                     response_bytes = event_source.response.read()
                     response_dict = json.loads(response_bytes.decode("utf-8"))
-                    error_message = response_dict["error"]["message"]
                     # e.g.: This model's maximum context length is 8192 tokens. However, your messages resulted in 8198 tokens (7450 in the messages, 748 in the functions). Please reduce the length of the messages or functions.
-                    if OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING in error_message:
-                        raise LLMError(error_message)
+                    if (
+                        "error" in response_dict
+                        and "message" in response_dict["error"]
+                        and OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING in response_dict["error"]["message"]
+                    ):
+                        logger.error(response_dict["error"]["message"])
+                        raise LLMError(response_dict["error"]["message"])
                 except LLMError:
                     raise
                 except:
-                    print(f"Failed to parse SSE message, throwing SSE HTTP error up the stack")
+                    logger.error(f"Failed to parse SSE message, throwing SSE HTTP error up the stack")
                     event_source.response.raise_for_status()
             try:
@@ -58,33 +66,34 @@ def _sse_post(url: str, data: dict, headers: dict) -> Generator[LettaStreamingRe
                             yield ToolReturnMessage(**chunk_data)
                         elif "step_count" in chunk_data:
                             yield LettaUsageStatistics(**chunk_data)
+                        elif chunk_data.get("object") == get_args(ChatCompletionChunk.__annotations__["object"])[0]:
+                            yield ChatCompletionChunk(**chunk_data)  # Add your processing logic for chat chunks here
                         else:
                             raise ValueError(f"Unknown message type in chunk_data: {chunk_data}")
             except SSEError as e:
-                print("Caught an error while iterating the SSE stream:", str(e))
+                logger.error("Caught an error while iterating the SSE stream:", str(e))
                 if "application/json" in str(e):  # Check if the error is because of JSON response
                     # TODO figure out a better way to catch the error other than re-trying with a POST
                     response = client.post(url=url, json=data, headers=headers)  # Make the request again to get the JSON response
                     if response.headers["Content-Type"].startswith("application/json"):
                         error_details = response.json()  # Parse the JSON to get the error message
-                        print("Request:", vars(response.request))
-                        print("POST Error:", error_details)
-                        print("Original SSE Error:", str(e))
+                        logger.error("Request:", vars(response.request))
+                        logger.error("POST Error:", error_details)
+                        logger.error("Original SSE Error:", str(e))
                     else:
-                        print("Failed to retrieve JSON error message via retry.")
+                        logger.error("Failed to retrieve JSON error message via retry.")
                 else:
-                    print("SSEError not related to 'application/json' content type.")
+                    logger.error("SSEError not related to 'application/json' content type.")
                 # Optionally re-raise the exception if you need to propagate it
                 raise e
             except Exception as e:
                 if event_source.response.request is not None:
-                    print("HTTP Request:", vars(event_source.response.request))
+                    logger.error("HTTP Request:", vars(event_source.response.request))
                 if event_source.response is not None:
-                    print("HTTP Status:", event_source.response.status_code)
-                    print("HTTP Headers:", event_source.response.headers)
-                    # print("HTTP Body:", event_source.response.text)
-                print("Exception message:", str(e))
+                    logger.error("HTTP Status:", event_source.response.status_code)
+                    logger.error("HTTP Headers:", event_source.response.headers)
+                logger.error("Exception message:", str(e))
                 raise e

letta/constants.py CHANGED Viewed

@@ -50,7 +50,7 @@ BASE_TOOLS = ["send_message", "conversation_search", "archival_memory_insert", "
 # Base memory tools CAN be edited, and are added by default by the server
 BASE_MEMORY_TOOLS = ["core_memory_append", "core_memory_replace"]
 # Multi agent tools
-MULTI_AGENT_TOOLS = ["send_message_to_specific_agent", "send_message_to_agents_matching_all_tags"]
+MULTI_AGENT_TOOLS = ["send_message_to_agent_and_wait_for_reply", "send_message_to_agents_matching_all_tags", "send_message_to_agent_async"]
 MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES = 3
 MULTI_AGENT_SEND_MESSAGE_TIMEOUT = 20 * 60

letta/functions/function_sets/multi_agent.py CHANGED Viewed

@@ -1,80 +1,86 @@
 import asyncio
-from typing import TYPE_CHECKING, List, Optional
+from typing import TYPE_CHECKING, List
 from letta.constants import MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES, MULTI_AGENT_SEND_MESSAGE_TIMEOUT
-from letta.functions.helpers import async_send_message_with_retries
-from letta.orm.errors import NoResultFound
+from letta.functions.helpers import async_send_message_with_retries, execute_send_message_to_agent, fire_and_forget_send_to_agent
+from letta.schemas.enums import MessageRole
+from letta.schemas.message import MessageCreate
 from letta.server.rest_api.utils import get_letta_server
 if TYPE_CHECKING:
     from letta.agent import Agent
-def send_message_to_specific_agent(self: "Agent", message: str, other_agent_id: str) -> Optional[str]:
+def send_message_to_agent_and_wait_for_reply(self: "Agent", message: str, other_agent_id: str) -> str:
     """
-    Send a message to a specific Letta agent within the same organization.
+    Sends a message to a specific Letta agent within the same organization and waits for a response. The sender's identity is automatically included, so no explicit introduction is needed in the message. This function is designed for two-way communication where a reply is expected.
     Args:
-        message (str): The message to be sent to the target Letta agent.
-        other_agent_id (str): The identifier of the target Letta agent.
+        message (str): The content of the message to be sent to the target agent.
+        other_agent_id (str): The unique identifier of the target Letta agent.
     Returns:
-        Optional[str]: The response from the Letta agent. It's possible that the agent does not respond.
+        str: The response from the target agent.
     """
-    server = get_letta_server()
+    messages = [MessageCreate(role=MessageRole.user, content=message, name=self.agent_state.name)]
+    return execute_send_message_to_agent(
+        sender_agent=self,
+        messages=messages,
+        other_agent_id=other_agent_id,
+        log_prefix="[send_message_to_agent_and_wait_for_reply]",
+    )
-    # Ensure the target agent is in the same org
-    try:
-        server.agent_manager.get_agent_by_id(agent_id=other_agent_id, actor=self.user)
-    except NoResultFound:
-        raise ValueError(
-            f"The passed-in agent_id {other_agent_id} either does not exist, "
-            f"or does not belong to the same org ({self.user.organization_id})."
-        )
-    # Async logic to send a message with retries and timeout
-    async def async_send_single_agent():
-        return await async_send_message_with_retries(
-            server=server,
-            sender_agent=self,
-            target_agent_id=other_agent_id,
-            message_text=message,
-            max_retries=MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES,  # or your chosen constants
-            timeout=MULTI_AGENT_SEND_MESSAGE_TIMEOUT,  # e.g., 1200 for 20 min
-            logging_prefix="[send_message_to_specific_agent]",
-        )
-    # Run in the current event loop or create one if needed
-    try:
-        return asyncio.run(async_send_single_agent())
-    except RuntimeError:
-        # e.g., in case there's already an active loop
-        loop = asyncio.get_event_loop()
-        if loop.is_running():
-            return loop.run_until_complete(async_send_single_agent())
-        else:
-            raise
+def send_message_to_agent_async(self: "Agent", message: str, other_agent_id: str) -> str:
+    """
+    Sends a message to a specific Letta agent within the same organization. The sender's identity is automatically included, so no explicit introduction is required in the message. This function does not expect a response from the target agent, making it suitable for notifications or one-way communication.
-def send_message_to_agents_matching_all_tags(self: "Agent", message: str, tags: List[str]) -> List[str]:
+    Args:
+        message (str): The content of the message to be sent to the target agent.
+        other_agent_id (str): The unique identifier of the target Letta agent.
+    Returns:
+        str: A confirmation message indicating the message was successfully sent.
     """
-    Send a message to all agents in the same organization that match ALL of the given tags.
+    message = (
+        f"[Incoming message from agent with ID '{self.agent_state.id}' - to reply to this message, "
+        f"make sure to use the 'send_message_to_agent_async' tool, or the agent will not receive your message] "
+        f"{message}"
+    )
+    messages = [MessageCreate(role=MessageRole.system, content=message, name=self.agent_state.name)]
+    # Do the actual fire-and-forget
+    fire_and_forget_send_to_agent(
+        sender_agent=self,
+        messages=messages,
+        other_agent_id=other_agent_id,
+        log_prefix="[send_message_to_agent_async]",
+        use_retries=False,  # or True if you want to use async_send_message_with_retries
+    )
+    # Immediately return to caller
+    return "Successfully sent message"
-    Messages are sent in parallel for improved performance, with retries on flaky calls and timeouts for long-running requests.
-    This function does not use a cursor (pagination) and enforces a limit of 100 agents.
+def send_message_to_agents_matching_all_tags(self: "Agent", message: str, tags: List[str]) -> List[str]:
+    """
+    Sends a message to all agents within the same organization that match all of the specified tags. Messages are dispatched in parallel for improved performance, with retries to handle transient issues and timeouts to ensure responsiveness. This function enforces a limit of 100 agents and does not support pagination (cursor-based queries). Each agent must match all specified tags (`match_all_tags=True`) to be included.
     Args:
-        message (str): The message to be sent to each matching agent.
-        tags (List[str]): The list of tags that each agent must have (match_all_tags=True).
+        message (str): The content of the message to be sent to each matching agent.
+        tags (List[str]): A list of tags that an agent must possess to receive the message.
     Returns:
-        List[str]: A list of responses from the agents that match all tags.
-                   Each response corresponds to one agent.
+        List[str]: A list of responses from the agents that matched all tags. Each
+        response corresponds to a single agent. Agents that do not respond will not
+        have an entry in the returned list.
     """
     server = get_letta_server()
     # Retrieve agents that match ALL specified tags
     matching_agents = server.agent_manager.list_agents(actor=self.user, tags=tags, match_all_tags=True, limit=100)
+    messages = [MessageCreate(role=MessageRole.user, content=message, name=self.agent_state.name)]
     async def send_messages_to_all_agents():
         tasks = [
@@ -82,7 +88,7 @@ def send_message_to_agents_matching_all_tags(self: "Agent", message: str, tags:
                 server=server,
                 sender_agent=self,
                 target_agent_id=agent_state.id,
-                message_text=message,
+                messages=messages,
                 max_retries=MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES,
                 timeout=MULTI_AGENT_SEND_MESSAGE_TIMEOUT,
                 logging_prefix="[send_message_to_agents_matching_all_tags]",

letta/functions/functions.py CHANGED Viewed

@@ -122,7 +122,6 @@ def get_json_schema_from_module(module_name: str, function_name: str) -> dict:
         generated_schema = generate_schema(attr)
         return generated_schema
     except ModuleNotFoundError:
         raise ModuleNotFoundError(f"Module '{module_name}' not found.")
     except AttributeError:

letta/functions/helpers.py CHANGED Viewed

@@ -1,15 +1,25 @@
+import asyncio
 import json
-from typing import Any, Optional, Union
+import threading
+from random import uniform
+from typing import Any, List, Optional, Union
 import humps
 from composio.constants import DEFAULT_ENTITY_ID
 from pydantic import BaseModel
-from letta.constants import COMPOSIO_ENTITY_ENV_VAR_KEY, DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
-from letta.schemas.enums import MessageRole
+from letta.constants import (
+    COMPOSIO_ENTITY_ENV_VAR_KEY,
+    DEFAULT_MESSAGE_TOOL,
+    DEFAULT_MESSAGE_TOOL_KWARG,
+    MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES,
+    MULTI_AGENT_SEND_MESSAGE_TIMEOUT,
+)
+from letta.orm.errors import NoResultFound
 from letta.schemas.letta_message import AssistantMessage, ReasoningMessage, ToolCallMessage
 from letta.schemas.letta_response import LettaResponse
 from letta.schemas.message import MessageCreate
+from letta.server.rest_api.utils import get_letta_server
 # TODO: This is kind of hacky, as this is used to search up the action later on composio's side
@@ -259,16 +269,63 @@ def parse_letta_response_for_assistant_message(
     return None
-import asyncio
-from random import uniform
-from typing import Optional
+def execute_send_message_to_agent(
+    sender_agent: "Agent",
+    messages: List[MessageCreate],
+    other_agent_id: str,
+    log_prefix: str,
+) -> Optional[str]:
+    """
+    Helper function to send a message to a specific Letta agent.
+    Args:
+        sender_agent ("Agent"): The sender agent object.
+        message (str): The message to send.
+        other_agent_id (str): The identifier of the target Letta agent.
+        log_prefix (str): Logging prefix for retries.
+    Returns:
+        Optional[str]: The response from the Letta agent if required by the caller.
+    """
+    server = get_letta_server()
+    # Ensure the target agent is in the same org
+    try:
+        server.agent_manager.get_agent_by_id(agent_id=other_agent_id, actor=sender_agent.user)
+    except NoResultFound:
+        raise ValueError(
+            f"The passed-in agent_id {other_agent_id} either does not exist, "
+            f"or does not belong to the same org ({sender_agent.user.organization_id})."
+        )
+    # Async logic to send a message with retries and timeout
+    async def async_send():
+        return await async_send_message_with_retries(
+            server=server,
+            sender_agent=sender_agent,
+            target_agent_id=other_agent_id,
+            messages=messages,
+            max_retries=MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES,
+            timeout=MULTI_AGENT_SEND_MESSAGE_TIMEOUT,
+            logging_prefix=log_prefix,
+        )
+    # Run in the current event loop or create one if needed
+    try:
+        return asyncio.run(async_send())
+    except RuntimeError:
+        loop = asyncio.get_event_loop()
+        if loop.is_running():
+            return loop.run_until_complete(async_send())
+        else:
+            raise
 async def async_send_message_with_retries(
     server,
     sender_agent: "Agent",
     target_agent_id: str,
-    message_text: str,
+    messages: List[MessageCreate],
     max_retries: int,
     timeout: int,
     logging_prefix: Optional[str] = None,
@@ -290,7 +347,6 @@ async def async_send_message_with_retries(
     logging_prefix = logging_prefix or "[async_send_message_with_retries]"
     for attempt in range(1, max_retries + 1):
         try:
-            messages = [MessageCreate(role=MessageRole.user, content=message_text, name=sender_agent.agent_state.name)]
             # Wrap in a timeout
             response = await asyncio.wait_for(
                 server.send_message_to_agent(
@@ -334,4 +390,88 @@ async def async_send_message_with_retries(
             await asyncio.sleep(backoff)
         else:
             sender_agent.logger.error(f"{logging_prefix} - Fatal error during agent to agent send_message: {error_msg}")
-            return error_msg
+            raise Exception(error_msg)
+def fire_and_forget_send_to_agent(
+    sender_agent: "Agent",
+    messages: List[MessageCreate],
+    other_agent_id: str,
+    log_prefix: str,
+    use_retries: bool = False,
+) -> None:
+    """
+    Fire-and-forget send of messages to a specific agent.
+    Returns immediately in the calling thread, never blocks.
+    Args:
+        sender_agent (Agent): The sender agent object.
+        server: The Letta server instance
+        messages (List[MessageCreate]): The messages to send.
+        other_agent_id (str): The ID of the target agent.
+        log_prefix (str): Prefix for logging.
+        use_retries (bool): If True, uses async_send_message_with_retries;
+                            if False, calls server.send_message_to_agent directly.
+    """
+    server = get_letta_server()
+    # 1) Validate the target agent (raises ValueError if not in same org)
+    try:
+        server.agent_manager.get_agent_by_id(agent_id=other_agent_id, actor=sender_agent.user)
+    except NoResultFound:
+        raise ValueError(
+            f"The passed-in agent_id {other_agent_id} either does not exist, "
+            f"or does not belong to the same org ({sender_agent.user.organization_id})."
+        )
+    # 2) Define the async coroutine to run
+    async def background_task():
+        try:
+            if use_retries:
+                result = await async_send_message_with_retries(
+                    server=server,
+                    sender_agent=sender_agent,
+                    target_agent_id=other_agent_id,
+                    messages=messages,
+                    max_retries=MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES,
+                    timeout=MULTI_AGENT_SEND_MESSAGE_TIMEOUT,
+                    logging_prefix=log_prefix,
+                )
+                sender_agent.logger.info(f"{log_prefix} fire-and-forget success with retries: {result}")
+            else:
+                # Direct call to server.send_message_to_agent, no retry logic
+                await server.send_message_to_agent(
+                    agent_id=other_agent_id,
+                    actor=sender_agent.user,
+                    messages=messages,
+                    stream_steps=False,
+                    stream_tokens=False,
+                    use_assistant_message=True,
+                    assistant_message_tool_name=DEFAULT_MESSAGE_TOOL,
+                    assistant_message_tool_kwarg=DEFAULT_MESSAGE_TOOL_KWARG,
+                )
+                sender_agent.logger.info(f"{log_prefix} fire-and-forget success (no retries).")
+        except Exception as e:
+            sender_agent.logger.error(f"{log_prefix} fire-and-forget send failed: {e}")
+    # 3) Helper to run the coroutine in a brand-new event loop in a separate thread
+    def run_in_background_thread(coro):
+        def runner():
+            loop = asyncio.new_event_loop()
+            try:
+                asyncio.set_event_loop(loop)
+                loop.run_until_complete(coro)
+            finally:
+                loop.close()
+        thread = threading.Thread(target=runner, daemon=True)
+        thread.start()
+    # 4) Try to schedule the coroutine in an existing loop, else spawn a thread
+    try:
+        loop = asyncio.get_running_loop()
+        # If we get here, a loop is running; schedule the coroutine in background
+        loop.create_task(background_task())
+    except RuntimeError:
+        # Means no event loop is running in this thread
+        run_in_background_thread(background_task())

letta/llm_api/llm_api_tools.py CHANGED Viewed

@@ -29,6 +29,7 @@ from letta.schemas.openai.chat_completion_request import ChatCompletionRequest,
 from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
 from letta.settings import ModelSettings
 from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
+from letta.utils import run_async_task
 LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq"]
@@ -156,21 +157,25 @@ def create(
             assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
                 stream_interface, AgentRefreshStreamingInterface
             ), type(stream_interface)
-            response = openai_chat_completions_process_stream(
-                url=llm_config.model_endpoint,  # https://api.openai.com/v1 -> https://api.openai.com/v1/chat/completions
-                api_key=model_settings.openai_api_key,
-                chat_completion_request=data,
-                stream_interface=stream_interface,
+            response = run_async_task(
+                openai_chat_completions_process_stream(
+                    url=llm_config.model_endpoint,
+                    api_key=model_settings.openai_api_key,
+                    chat_completion_request=data,
+                    stream_interface=stream_interface,
+                )
             )
         else:  # Client did not request token streaming (expect a blocking backend response)
             data.stream = False
             if isinstance(stream_interface, AgentChunkStreamingInterface):
                 stream_interface.stream_start()
             try:
-                response = openai_chat_completions_request(
-                    url=llm_config.model_endpoint,  # https://api.openai.com/v1 -> https://api.openai.com/v1/chat/completions
-                    api_key=model_settings.openai_api_key,
-                    chat_completion_request=data,
+                response = run_async_task(
+                    openai_chat_completions_request(
+                        url=llm_config.model_endpoint,
+                        api_key=model_settings.openai_api_key,
+                        chat_completion_request=data,
+                    )
                 )
             finally:
                 if isinstance(stream_interface, AgentChunkStreamingInterface):
@@ -344,9 +349,12 @@ def create(
             stream_interface.stream_start()
         try:
             # groq uses the openai chat completions API, so this component should be reusable
-            response = openai_chat_completions_request(
-                api_key=model_settings.groq_api_key,
-                chat_completion_request=data,
+            response = run_async_task(
+                openai_chat_completions_request(
+                    url=llm_config.model_endpoint,
+                    api_key=model_settings.groq_api_key,
+                    chat_completion_request=data,
+                )
             )
         finally:
             if isinstance(stream_interface, AgentChunkStreamingInterface):

letta-nightly 0.6.16.dev20250128104041__py3-none-any.whl → 0.6.17.dev20250129174639__py3-none-any.whl

Potentially problematic release.

letta-nightly 0.6.16.dev20250128104041py3-none-any.whl → 0.6.17.dev20250129174639py3-none-any.whl