letta-nightly 0.6.16.dev20250128104041__py3-none-any.whl → 0.6.17.dev20250129174639__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of letta-nightly might be problematic. Click here for more details.

Files changed (35) hide show
  1. letta/__init__.py +1 -1
  2. letta/agent.py +0 -3
  3. letta/client/client.py +5 -5
  4. letta/client/streaming.py +29 -20
  5. letta/constants.py +1 -1
  6. letta/functions/function_sets/multi_agent.py +55 -49
  7. letta/functions/functions.py +0 -1
  8. letta/functions/helpers.py +149 -9
  9. letta/llm_api/llm_api_tools.py +20 -12
  10. letta/llm_api/openai.py +15 -13
  11. letta/orm/agent.py +14 -2
  12. letta/orm/job.py +1 -1
  13. letta/orm/sqlalchemy_base.py +12 -4
  14. letta/schemas/job.py +17 -1
  15. letta/schemas/letta_request.py +2 -7
  16. letta/schemas/llm_config.py +9 -0
  17. letta/schemas/message.py +51 -22
  18. letta/schemas/openai/chat_completion_response.py +2 -2
  19. letta/schemas/run.py +1 -2
  20. letta/server/rest_api/app.py +5 -1
  21. letta/server/rest_api/chat_completions_interface.py +256 -0
  22. letta/server/rest_api/optimistic_json_parser.py +185 -0
  23. letta/server/rest_api/routers/openai/chat_completions/__init__.py +0 -0
  24. letta/server/rest_api/routers/openai/chat_completions/chat_completions.py +161 -0
  25. letta/server/rest_api/routers/v1/agents.py +22 -32
  26. letta/server/server.py +12 -12
  27. letta/services/job_manager.py +7 -12
  28. letta/services/tool_manager.py +17 -1
  29. letta/system.py +20 -0
  30. letta/utils.py +24 -1
  31. {letta_nightly-0.6.16.dev20250128104041.dist-info → letta_nightly-0.6.17.dev20250129174639.dist-info}/METADATA +4 -4
  32. {letta_nightly-0.6.16.dev20250128104041.dist-info → letta_nightly-0.6.17.dev20250129174639.dist-info}/RECORD +35 -31
  33. {letta_nightly-0.6.16.dev20250128104041.dist-info → letta_nightly-0.6.17.dev20250129174639.dist-info}/LICENSE +0 -0
  34. {letta_nightly-0.6.16.dev20250128104041.dist-info → letta_nightly-0.6.17.dev20250129174639.dist-info}/WHEEL +0 -0
  35. {letta_nightly-0.6.16.dev20250128104041.dist-info → letta_nightly-0.6.17.dev20250129174639.dist-info}/entry_points.txt +0 -0
letta/__init__.py CHANGED
@@ -1,4 +1,4 @@
1
- __version__ = "0.6.16"
1
+ __version__ = "0.6.17"
2
2
 
3
3
 
4
4
  # import clients
letta/agent.py CHANGED
@@ -108,9 +108,6 @@ class Agent(BaseAgent):
108
108
  if not isinstance(rule, TerminalToolRule):
109
109
  warnings.warn("Tool rules only work reliably for the latest OpenAI models that support structured outputs.")
110
110
  break
111
- # add default rule for having send_message be a terminal tool
112
- if agent_state.tool_rules is None:
113
- agent_state.tool_rules = []
114
111
 
115
112
  self.tool_rules_solver = ToolRulesSolver(tool_rules=agent_state.tool_rules)
116
113
 
letta/client/client.py CHANGED
@@ -280,7 +280,7 @@ class AbstractClient(object):
280
280
 
281
281
  def get_messages(
282
282
  self, agent_id: str, after: Optional[str] = None, before: Optional[str] = None, limit: Optional[int] = 1000
283
- ) -> List[Message]:
283
+ ) -> List[LettaMessage]:
284
284
  raise NotImplementedError
285
285
 
286
286
  def list_model_configs(self) -> List[LLMConfig]:
@@ -812,7 +812,6 @@ class RESTClient(AbstractClient):
812
812
  Returns:
813
813
  memory (Memory): In-context memory of the agent
814
814
  """
815
-
816
815
  response = requests.get(f"{self.base_url}/{self.api_prefix}/agents/{agent_id}/core-memory", headers=self.headers)
817
816
  if response.status_code != 200:
818
817
  raise ValueError(f"Failed to get in-context memory: {response.text}")
@@ -966,7 +965,7 @@ class RESTClient(AbstractClient):
966
965
 
967
966
  def get_messages(
968
967
  self, agent_id: str, before: Optional[str] = None, after: Optional[str] = None, limit: Optional[int] = 1000
969
- ) -> List[Message]:
968
+ ) -> List[LettaMessage]:
970
969
  """
971
970
  Get messages from an agent with pagination.
972
971
 
@@ -984,7 +983,7 @@ class RESTClient(AbstractClient):
984
983
  response = requests.get(f"{self.base_url}/{self.api_prefix}/agents/{agent_id}/messages", params=params, headers=self.headers)
985
984
  if response.status_code != 200:
986
985
  raise ValueError(f"Failed to get messages: {response.text}")
987
- return [Message(**message) for message in response.json()]
986
+ return [LettaMessage(**message) for message in response.json()]
988
987
 
989
988
  def send_message(
990
989
  self,
@@ -3356,7 +3355,7 @@ class LocalClient(AbstractClient):
3356
3355
 
3357
3356
  def get_messages(
3358
3357
  self, agent_id: str, before: Optional[str] = None, after: Optional[str] = None, limit: Optional[int] = 1000
3359
- ) -> List[Message]:
3358
+ ) -> List[LettaMessage]:
3360
3359
  """
3361
3360
  Get messages from an agent with pagination.
3362
3361
 
@@ -3378,6 +3377,7 @@ class LocalClient(AbstractClient):
3378
3377
  after=after,
3379
3378
  limit=limit,
3380
3379
  reverse=True,
3380
+ return_message_object=False,
3381
3381
  )
3382
3382
 
3383
3383
  def list_blocks(self, label: Optional[str] = None, templates_only: Optional[bool] = True) -> List[Block]:
letta/client/streaming.py CHANGED
@@ -1,18 +1,22 @@
1
1
  import json
2
- from typing import Generator
2
+ from typing import Generator, Union, get_args
3
3
 
4
4
  import httpx
5
5
  from httpx_sse import SSEError, connect_sse
6
+ from openai.types.chat.chat_completion_chunk import ChatCompletionChunk
6
7
 
7
8
  from letta.constants import OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING
8
9
  from letta.errors import LLMError
10
+ from letta.log import get_logger
9
11
  from letta.schemas.enums import MessageStreamStatus
10
12
  from letta.schemas.letta_message import AssistantMessage, ReasoningMessage, ToolCallMessage, ToolReturnMessage
11
13
  from letta.schemas.letta_response import LettaStreamingResponse
12
14
  from letta.schemas.usage import LettaUsageStatistics
13
15
 
16
+ logger = get_logger(__name__)
14
17
 
15
- def _sse_post(url: str, data: dict, headers: dict) -> Generator[LettaStreamingResponse, None, None]:
18
+
19
+ def _sse_post(url: str, data: dict, headers: dict) -> Generator[Union[LettaStreamingResponse, ChatCompletionChunk], None, None]:
16
20
 
17
21
  with httpx.Client() as client:
18
22
  with connect_sse(client, method="POST", url=url, json=data, headers=headers) as event_source:
@@ -20,22 +24,26 @@ def _sse_post(url: str, data: dict, headers: dict) -> Generator[LettaStreamingRe
20
24
  # Inspect for errors before iterating (see https://github.com/florimondmanca/httpx-sse/pull/12)
21
25
  if not event_source.response.is_success:
22
26
  # handle errors
23
- from letta.utils import printd
27
+ pass
24
28
 
25
- printd("Caught error before iterating SSE request:", vars(event_source.response))
26
- printd(event_source.response.read())
29
+ logger.warning("Caught error before iterating SSE request:", vars(event_source.response))
30
+ logger.warning(event_source.response.read().decode("utf-8"))
27
31
 
28
32
  try:
29
33
  response_bytes = event_source.response.read()
30
34
  response_dict = json.loads(response_bytes.decode("utf-8"))
31
- error_message = response_dict["error"]["message"]
32
35
  # e.g.: This model's maximum context length is 8192 tokens. However, your messages resulted in 8198 tokens (7450 in the messages, 748 in the functions). Please reduce the length of the messages or functions.
33
- if OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING in error_message:
34
- raise LLMError(error_message)
36
+ if (
37
+ "error" in response_dict
38
+ and "message" in response_dict["error"]
39
+ and OPENAI_CONTEXT_WINDOW_ERROR_SUBSTRING in response_dict["error"]["message"]
40
+ ):
41
+ logger.error(response_dict["error"]["message"])
42
+ raise LLMError(response_dict["error"]["message"])
35
43
  except LLMError:
36
44
  raise
37
45
  except:
38
- print(f"Failed to parse SSE message, throwing SSE HTTP error up the stack")
46
+ logger.error(f"Failed to parse SSE message, throwing SSE HTTP error up the stack")
39
47
  event_source.response.raise_for_status()
40
48
 
41
49
  try:
@@ -58,33 +66,34 @@ def _sse_post(url: str, data: dict, headers: dict) -> Generator[LettaStreamingRe
58
66
  yield ToolReturnMessage(**chunk_data)
59
67
  elif "step_count" in chunk_data:
60
68
  yield LettaUsageStatistics(**chunk_data)
69
+ elif chunk_data.get("object") == get_args(ChatCompletionChunk.__annotations__["object"])[0]:
70
+ yield ChatCompletionChunk(**chunk_data) # Add your processing logic for chat chunks here
61
71
  else:
62
72
  raise ValueError(f"Unknown message type in chunk_data: {chunk_data}")
63
73
 
64
74
  except SSEError as e:
65
- print("Caught an error while iterating the SSE stream:", str(e))
75
+ logger.error("Caught an error while iterating the SSE stream:", str(e))
66
76
  if "application/json" in str(e): # Check if the error is because of JSON response
67
77
  # TODO figure out a better way to catch the error other than re-trying with a POST
68
78
  response = client.post(url=url, json=data, headers=headers) # Make the request again to get the JSON response
69
79
  if response.headers["Content-Type"].startswith("application/json"):
70
80
  error_details = response.json() # Parse the JSON to get the error message
71
- print("Request:", vars(response.request))
72
- print("POST Error:", error_details)
73
- print("Original SSE Error:", str(e))
81
+ logger.error("Request:", vars(response.request))
82
+ logger.error("POST Error:", error_details)
83
+ logger.error("Original SSE Error:", str(e))
74
84
  else:
75
- print("Failed to retrieve JSON error message via retry.")
85
+ logger.error("Failed to retrieve JSON error message via retry.")
76
86
  else:
77
- print("SSEError not related to 'application/json' content type.")
87
+ logger.error("SSEError not related to 'application/json' content type.")
78
88
 
79
89
  # Optionally re-raise the exception if you need to propagate it
80
90
  raise e
81
91
 
82
92
  except Exception as e:
83
93
  if event_source.response.request is not None:
84
- print("HTTP Request:", vars(event_source.response.request))
94
+ logger.error("HTTP Request:", vars(event_source.response.request))
85
95
  if event_source.response is not None:
86
- print("HTTP Status:", event_source.response.status_code)
87
- print("HTTP Headers:", event_source.response.headers)
88
- # print("HTTP Body:", event_source.response.text)
89
- print("Exception message:", str(e))
96
+ logger.error("HTTP Status:", event_source.response.status_code)
97
+ logger.error("HTTP Headers:", event_source.response.headers)
98
+ logger.error("Exception message:", str(e))
90
99
  raise e
letta/constants.py CHANGED
@@ -50,7 +50,7 @@ BASE_TOOLS = ["send_message", "conversation_search", "archival_memory_insert", "
50
50
  # Base memory tools CAN be edited, and are added by default by the server
51
51
  BASE_MEMORY_TOOLS = ["core_memory_append", "core_memory_replace"]
52
52
  # Multi agent tools
53
- MULTI_AGENT_TOOLS = ["send_message_to_specific_agent", "send_message_to_agents_matching_all_tags"]
53
+ MULTI_AGENT_TOOLS = ["send_message_to_agent_and_wait_for_reply", "send_message_to_agents_matching_all_tags", "send_message_to_agent_async"]
54
54
  MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES = 3
55
55
  MULTI_AGENT_SEND_MESSAGE_TIMEOUT = 20 * 60
56
56
 
@@ -1,80 +1,86 @@
1
1
  import asyncio
2
- from typing import TYPE_CHECKING, List, Optional
2
+ from typing import TYPE_CHECKING, List
3
3
 
4
4
  from letta.constants import MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES, MULTI_AGENT_SEND_MESSAGE_TIMEOUT
5
- from letta.functions.helpers import async_send_message_with_retries
6
- from letta.orm.errors import NoResultFound
5
+ from letta.functions.helpers import async_send_message_with_retries, execute_send_message_to_agent, fire_and_forget_send_to_agent
6
+ from letta.schemas.enums import MessageRole
7
+ from letta.schemas.message import MessageCreate
7
8
  from letta.server.rest_api.utils import get_letta_server
8
9
 
9
10
  if TYPE_CHECKING:
10
11
  from letta.agent import Agent
11
12
 
12
13
 
13
- def send_message_to_specific_agent(self: "Agent", message: str, other_agent_id: str) -> Optional[str]:
14
+ def send_message_to_agent_and_wait_for_reply(self: "Agent", message: str, other_agent_id: str) -> str:
14
15
  """
15
- Send a message to a specific Letta agent within the same organization.
16
+ Sends a message to a specific Letta agent within the same organization and waits for a response. The sender's identity is automatically included, so no explicit introduction is needed in the message. This function is designed for two-way communication where a reply is expected.
16
17
 
17
18
  Args:
18
- message (str): The message to be sent to the target Letta agent.
19
- other_agent_id (str): The identifier of the target Letta agent.
19
+ message (str): The content of the message to be sent to the target agent.
20
+ other_agent_id (str): The unique identifier of the target Letta agent.
20
21
 
21
22
  Returns:
22
- Optional[str]: The response from the Letta agent. It's possible that the agent does not respond.
23
+ str: The response from the target agent.
23
24
  """
24
- server = get_letta_server()
25
+ messages = [MessageCreate(role=MessageRole.user, content=message, name=self.agent_state.name)]
26
+ return execute_send_message_to_agent(
27
+ sender_agent=self,
28
+ messages=messages,
29
+ other_agent_id=other_agent_id,
30
+ log_prefix="[send_message_to_agent_and_wait_for_reply]",
31
+ )
25
32
 
26
- # Ensure the target agent is in the same org
27
- try:
28
- server.agent_manager.get_agent_by_id(agent_id=other_agent_id, actor=self.user)
29
- except NoResultFound:
30
- raise ValueError(
31
- f"The passed-in agent_id {other_agent_id} either does not exist, "
32
- f"or does not belong to the same org ({self.user.organization_id})."
33
- )
34
-
35
- # Async logic to send a message with retries and timeout
36
- async def async_send_single_agent():
37
- return await async_send_message_with_retries(
38
- server=server,
39
- sender_agent=self,
40
- target_agent_id=other_agent_id,
41
- message_text=message,
42
- max_retries=MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES, # or your chosen constants
43
- timeout=MULTI_AGENT_SEND_MESSAGE_TIMEOUT, # e.g., 1200 for 20 min
44
- logging_prefix="[send_message_to_specific_agent]",
45
- )
46
-
47
- # Run in the current event loop or create one if needed
48
- try:
49
- return asyncio.run(async_send_single_agent())
50
- except RuntimeError:
51
- # e.g., in case there's already an active loop
52
- loop = asyncio.get_event_loop()
53
- if loop.is_running():
54
- return loop.run_until_complete(async_send_single_agent())
55
- else:
56
- raise
57
33
 
34
+ def send_message_to_agent_async(self: "Agent", message: str, other_agent_id: str) -> str:
35
+ """
36
+ Sends a message to a specific Letta agent within the same organization. The sender's identity is automatically included, so no explicit introduction is required in the message. This function does not expect a response from the target agent, making it suitable for notifications or one-way communication.
58
37
 
59
- def send_message_to_agents_matching_all_tags(self: "Agent", message: str, tags: List[str]) -> List[str]:
38
+ Args:
39
+ message (str): The content of the message to be sent to the target agent.
40
+ other_agent_id (str): The unique identifier of the target Letta agent.
41
+
42
+ Returns:
43
+ str: A confirmation message indicating the message was successfully sent.
60
44
  """
61
- Send a message to all agents in the same organization that match ALL of the given tags.
45
+ message = (
46
+ f"[Incoming message from agent with ID '{self.agent_state.id}' - to reply to this message, "
47
+ f"make sure to use the 'send_message_to_agent_async' tool, or the agent will not receive your message] "
48
+ f"{message}"
49
+ )
50
+ messages = [MessageCreate(role=MessageRole.system, content=message, name=self.agent_state.name)]
51
+
52
+ # Do the actual fire-and-forget
53
+ fire_and_forget_send_to_agent(
54
+ sender_agent=self,
55
+ messages=messages,
56
+ other_agent_id=other_agent_id,
57
+ log_prefix="[send_message_to_agent_async]",
58
+ use_retries=False, # or True if you want to use async_send_message_with_retries
59
+ )
60
+
61
+ # Immediately return to caller
62
+ return "Successfully sent message"
62
63
 
63
- Messages are sent in parallel for improved performance, with retries on flaky calls and timeouts for long-running requests.
64
- This function does not use a cursor (pagination) and enforces a limit of 100 agents.
64
+
65
+ def send_message_to_agents_matching_all_tags(self: "Agent", message: str, tags: List[str]) -> List[str]:
66
+ """
67
+ Sends a message to all agents within the same organization that match all of the specified tags. Messages are dispatched in parallel for improved performance, with retries to handle transient issues and timeouts to ensure responsiveness. This function enforces a limit of 100 agents and does not support pagination (cursor-based queries). Each agent must match all specified tags (`match_all_tags=True`) to be included.
65
68
 
66
69
  Args:
67
- message (str): The message to be sent to each matching agent.
68
- tags (List[str]): The list of tags that each agent must have (match_all_tags=True).
70
+ message (str): The content of the message to be sent to each matching agent.
71
+ tags (List[str]): A list of tags that an agent must possess to receive the message.
69
72
 
70
73
  Returns:
71
- List[str]: A list of responses from the agents that match all tags.
72
- Each response corresponds to one agent.
74
+ List[str]: A list of responses from the agents that matched all tags. Each
75
+ response corresponds to a single agent. Agents that do not respond will not
76
+ have an entry in the returned list.
73
77
  """
78
+
74
79
  server = get_letta_server()
75
80
 
76
81
  # Retrieve agents that match ALL specified tags
77
82
  matching_agents = server.agent_manager.list_agents(actor=self.user, tags=tags, match_all_tags=True, limit=100)
83
+ messages = [MessageCreate(role=MessageRole.user, content=message, name=self.agent_state.name)]
78
84
 
79
85
  async def send_messages_to_all_agents():
80
86
  tasks = [
@@ -82,7 +88,7 @@ def send_message_to_agents_matching_all_tags(self: "Agent", message: str, tags:
82
88
  server=server,
83
89
  sender_agent=self,
84
90
  target_agent_id=agent_state.id,
85
- message_text=message,
91
+ messages=messages,
86
92
  max_retries=MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES,
87
93
  timeout=MULTI_AGENT_SEND_MESSAGE_TIMEOUT,
88
94
  logging_prefix="[send_message_to_agents_matching_all_tags]",
@@ -122,7 +122,6 @@ def get_json_schema_from_module(module_name: str, function_name: str) -> dict:
122
122
  generated_schema = generate_schema(attr)
123
123
 
124
124
  return generated_schema
125
-
126
125
  except ModuleNotFoundError:
127
126
  raise ModuleNotFoundError(f"Module '{module_name}' not found.")
128
127
  except AttributeError:
@@ -1,15 +1,25 @@
1
+ import asyncio
1
2
  import json
2
- from typing import Any, Optional, Union
3
+ import threading
4
+ from random import uniform
5
+ from typing import Any, List, Optional, Union
3
6
 
4
7
  import humps
5
8
  from composio.constants import DEFAULT_ENTITY_ID
6
9
  from pydantic import BaseModel
7
10
 
8
- from letta.constants import COMPOSIO_ENTITY_ENV_VAR_KEY, DEFAULT_MESSAGE_TOOL, DEFAULT_MESSAGE_TOOL_KWARG
9
- from letta.schemas.enums import MessageRole
11
+ from letta.constants import (
12
+ COMPOSIO_ENTITY_ENV_VAR_KEY,
13
+ DEFAULT_MESSAGE_TOOL,
14
+ DEFAULT_MESSAGE_TOOL_KWARG,
15
+ MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES,
16
+ MULTI_AGENT_SEND_MESSAGE_TIMEOUT,
17
+ )
18
+ from letta.orm.errors import NoResultFound
10
19
  from letta.schemas.letta_message import AssistantMessage, ReasoningMessage, ToolCallMessage
11
20
  from letta.schemas.letta_response import LettaResponse
12
21
  from letta.schemas.message import MessageCreate
22
+ from letta.server.rest_api.utils import get_letta_server
13
23
 
14
24
 
15
25
  # TODO: This is kind of hacky, as this is used to search up the action later on composio's side
@@ -259,16 +269,63 @@ def parse_letta_response_for_assistant_message(
259
269
  return None
260
270
 
261
271
 
262
- import asyncio
263
- from random import uniform
264
- from typing import Optional
272
+ def execute_send_message_to_agent(
273
+ sender_agent: "Agent",
274
+ messages: List[MessageCreate],
275
+ other_agent_id: str,
276
+ log_prefix: str,
277
+ ) -> Optional[str]:
278
+ """
279
+ Helper function to send a message to a specific Letta agent.
280
+
281
+ Args:
282
+ sender_agent ("Agent"): The sender agent object.
283
+ message (str): The message to send.
284
+ other_agent_id (str): The identifier of the target Letta agent.
285
+ log_prefix (str): Logging prefix for retries.
286
+
287
+ Returns:
288
+ Optional[str]: The response from the Letta agent if required by the caller.
289
+ """
290
+ server = get_letta_server()
291
+
292
+ # Ensure the target agent is in the same org
293
+ try:
294
+ server.agent_manager.get_agent_by_id(agent_id=other_agent_id, actor=sender_agent.user)
295
+ except NoResultFound:
296
+ raise ValueError(
297
+ f"The passed-in agent_id {other_agent_id} either does not exist, "
298
+ f"or does not belong to the same org ({sender_agent.user.organization_id})."
299
+ )
300
+
301
+ # Async logic to send a message with retries and timeout
302
+ async def async_send():
303
+ return await async_send_message_with_retries(
304
+ server=server,
305
+ sender_agent=sender_agent,
306
+ target_agent_id=other_agent_id,
307
+ messages=messages,
308
+ max_retries=MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES,
309
+ timeout=MULTI_AGENT_SEND_MESSAGE_TIMEOUT,
310
+ logging_prefix=log_prefix,
311
+ )
312
+
313
+ # Run in the current event loop or create one if needed
314
+ try:
315
+ return asyncio.run(async_send())
316
+ except RuntimeError:
317
+ loop = asyncio.get_event_loop()
318
+ if loop.is_running():
319
+ return loop.run_until_complete(async_send())
320
+ else:
321
+ raise
265
322
 
266
323
 
267
324
  async def async_send_message_with_retries(
268
325
  server,
269
326
  sender_agent: "Agent",
270
327
  target_agent_id: str,
271
- message_text: str,
328
+ messages: List[MessageCreate],
272
329
  max_retries: int,
273
330
  timeout: int,
274
331
  logging_prefix: Optional[str] = None,
@@ -290,7 +347,6 @@ async def async_send_message_with_retries(
290
347
  logging_prefix = logging_prefix or "[async_send_message_with_retries]"
291
348
  for attempt in range(1, max_retries + 1):
292
349
  try:
293
- messages = [MessageCreate(role=MessageRole.user, content=message_text, name=sender_agent.agent_state.name)]
294
350
  # Wrap in a timeout
295
351
  response = await asyncio.wait_for(
296
352
  server.send_message_to_agent(
@@ -334,4 +390,88 @@ async def async_send_message_with_retries(
334
390
  await asyncio.sleep(backoff)
335
391
  else:
336
392
  sender_agent.logger.error(f"{logging_prefix} - Fatal error during agent to agent send_message: {error_msg}")
337
- return error_msg
393
+ raise Exception(error_msg)
394
+
395
+
396
+ def fire_and_forget_send_to_agent(
397
+ sender_agent: "Agent",
398
+ messages: List[MessageCreate],
399
+ other_agent_id: str,
400
+ log_prefix: str,
401
+ use_retries: bool = False,
402
+ ) -> None:
403
+ """
404
+ Fire-and-forget send of messages to a specific agent.
405
+ Returns immediately in the calling thread, never blocks.
406
+
407
+ Args:
408
+ sender_agent (Agent): The sender agent object.
409
+ server: The Letta server instance
410
+ messages (List[MessageCreate]): The messages to send.
411
+ other_agent_id (str): The ID of the target agent.
412
+ log_prefix (str): Prefix for logging.
413
+ use_retries (bool): If True, uses async_send_message_with_retries;
414
+ if False, calls server.send_message_to_agent directly.
415
+ """
416
+ server = get_letta_server()
417
+
418
+ # 1) Validate the target agent (raises ValueError if not in same org)
419
+ try:
420
+ server.agent_manager.get_agent_by_id(agent_id=other_agent_id, actor=sender_agent.user)
421
+ except NoResultFound:
422
+ raise ValueError(
423
+ f"The passed-in agent_id {other_agent_id} either does not exist, "
424
+ f"or does not belong to the same org ({sender_agent.user.organization_id})."
425
+ )
426
+
427
+ # 2) Define the async coroutine to run
428
+ async def background_task():
429
+ try:
430
+ if use_retries:
431
+ result = await async_send_message_with_retries(
432
+ server=server,
433
+ sender_agent=sender_agent,
434
+ target_agent_id=other_agent_id,
435
+ messages=messages,
436
+ max_retries=MULTI_AGENT_SEND_MESSAGE_MAX_RETRIES,
437
+ timeout=MULTI_AGENT_SEND_MESSAGE_TIMEOUT,
438
+ logging_prefix=log_prefix,
439
+ )
440
+ sender_agent.logger.info(f"{log_prefix} fire-and-forget success with retries: {result}")
441
+ else:
442
+ # Direct call to server.send_message_to_agent, no retry logic
443
+ await server.send_message_to_agent(
444
+ agent_id=other_agent_id,
445
+ actor=sender_agent.user,
446
+ messages=messages,
447
+ stream_steps=False,
448
+ stream_tokens=False,
449
+ use_assistant_message=True,
450
+ assistant_message_tool_name=DEFAULT_MESSAGE_TOOL,
451
+ assistant_message_tool_kwarg=DEFAULT_MESSAGE_TOOL_KWARG,
452
+ )
453
+ sender_agent.logger.info(f"{log_prefix} fire-and-forget success (no retries).")
454
+ except Exception as e:
455
+ sender_agent.logger.error(f"{log_prefix} fire-and-forget send failed: {e}")
456
+
457
+ # 3) Helper to run the coroutine in a brand-new event loop in a separate thread
458
+ def run_in_background_thread(coro):
459
+ def runner():
460
+ loop = asyncio.new_event_loop()
461
+ try:
462
+ asyncio.set_event_loop(loop)
463
+ loop.run_until_complete(coro)
464
+ finally:
465
+ loop.close()
466
+
467
+ thread = threading.Thread(target=runner, daemon=True)
468
+ thread.start()
469
+
470
+ # 4) Try to schedule the coroutine in an existing loop, else spawn a thread
471
+ try:
472
+ loop = asyncio.get_running_loop()
473
+ # If we get here, a loop is running; schedule the coroutine in background
474
+ loop.create_task(background_task())
475
+ except RuntimeError:
476
+ # Means no event loop is running in this thread
477
+ run_in_background_thread(background_task())
@@ -29,6 +29,7 @@ from letta.schemas.openai.chat_completion_request import ChatCompletionRequest,
29
29
  from letta.schemas.openai.chat_completion_response import ChatCompletionResponse
30
30
  from letta.settings import ModelSettings
31
31
  from letta.streaming_interface import AgentChunkStreamingInterface, AgentRefreshStreamingInterface
32
+ from letta.utils import run_async_task
32
33
 
33
34
  LLM_API_PROVIDER_OPTIONS = ["openai", "azure", "anthropic", "google_ai", "cohere", "local", "groq"]
34
35
 
@@ -156,21 +157,25 @@ def create(
156
157
  assert isinstance(stream_interface, AgentChunkStreamingInterface) or isinstance(
157
158
  stream_interface, AgentRefreshStreamingInterface
158
159
  ), type(stream_interface)
159
- response = openai_chat_completions_process_stream(
160
- url=llm_config.model_endpoint, # https://api.openai.com/v1 -> https://api.openai.com/v1/chat/completions
161
- api_key=model_settings.openai_api_key,
162
- chat_completion_request=data,
163
- stream_interface=stream_interface,
160
+ response = run_async_task(
161
+ openai_chat_completions_process_stream(
162
+ url=llm_config.model_endpoint,
163
+ api_key=model_settings.openai_api_key,
164
+ chat_completion_request=data,
165
+ stream_interface=stream_interface,
166
+ )
164
167
  )
165
168
  else: # Client did not request token streaming (expect a blocking backend response)
166
169
  data.stream = False
167
170
  if isinstance(stream_interface, AgentChunkStreamingInterface):
168
171
  stream_interface.stream_start()
169
172
  try:
170
- response = openai_chat_completions_request(
171
- url=llm_config.model_endpoint, # https://api.openai.com/v1 -> https://api.openai.com/v1/chat/completions
172
- api_key=model_settings.openai_api_key,
173
- chat_completion_request=data,
173
+ response = run_async_task(
174
+ openai_chat_completions_request(
175
+ url=llm_config.model_endpoint,
176
+ api_key=model_settings.openai_api_key,
177
+ chat_completion_request=data,
178
+ )
174
179
  )
175
180
  finally:
176
181
  if isinstance(stream_interface, AgentChunkStreamingInterface):
@@ -344,9 +349,12 @@ def create(
344
349
  stream_interface.stream_start()
345
350
  try:
346
351
  # groq uses the openai chat completions API, so this component should be reusable
347
- response = openai_chat_completions_request(
348
- api_key=model_settings.groq_api_key,
349
- chat_completion_request=data,
352
+ response = run_async_task(
353
+ openai_chat_completions_request(
354
+ url=llm_config.model_endpoint,
355
+ api_key=model_settings.groq_api_key,
356
+ chat_completion_request=data,
357
+ )
350
358
  )
351
359
  finally:
352
360
  if isinstance(stream_interface, AgentChunkStreamingInterface):