mail-swarms 1.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. mail/__init__.py +35 -0
  2. mail/api.py +1964 -0
  3. mail/cli.py +432 -0
  4. mail/client.py +1657 -0
  5. mail/config/__init__.py +8 -0
  6. mail/config/client.py +87 -0
  7. mail/config/server.py +165 -0
  8. mail/core/__init__.py +72 -0
  9. mail/core/actions.py +69 -0
  10. mail/core/agents.py +73 -0
  11. mail/core/message.py +366 -0
  12. mail/core/runtime.py +3537 -0
  13. mail/core/tasks.py +311 -0
  14. mail/core/tools.py +1206 -0
  15. mail/db/__init__.py +0 -0
  16. mail/db/init.py +182 -0
  17. mail/db/types.py +65 -0
  18. mail/db/utils.py +523 -0
  19. mail/examples/__init__.py +27 -0
  20. mail/examples/analyst_dummy/__init__.py +15 -0
  21. mail/examples/analyst_dummy/agent.py +136 -0
  22. mail/examples/analyst_dummy/prompts.py +44 -0
  23. mail/examples/consultant_dummy/__init__.py +15 -0
  24. mail/examples/consultant_dummy/agent.py +136 -0
  25. mail/examples/consultant_dummy/prompts.py +42 -0
  26. mail/examples/data_analysis/__init__.py +40 -0
  27. mail/examples/data_analysis/analyst/__init__.py +9 -0
  28. mail/examples/data_analysis/analyst/agent.py +67 -0
  29. mail/examples/data_analysis/analyst/prompts.py +53 -0
  30. mail/examples/data_analysis/processor/__init__.py +13 -0
  31. mail/examples/data_analysis/processor/actions.py +293 -0
  32. mail/examples/data_analysis/processor/agent.py +67 -0
  33. mail/examples/data_analysis/processor/prompts.py +48 -0
  34. mail/examples/data_analysis/reporter/__init__.py +10 -0
  35. mail/examples/data_analysis/reporter/actions.py +187 -0
  36. mail/examples/data_analysis/reporter/agent.py +67 -0
  37. mail/examples/data_analysis/reporter/prompts.py +49 -0
  38. mail/examples/data_analysis/statistics/__init__.py +18 -0
  39. mail/examples/data_analysis/statistics/actions.py +343 -0
  40. mail/examples/data_analysis/statistics/agent.py +67 -0
  41. mail/examples/data_analysis/statistics/prompts.py +60 -0
  42. mail/examples/mafia/__init__.py +0 -0
  43. mail/examples/mafia/game.py +1537 -0
  44. mail/examples/mafia/narrator_tools.py +396 -0
  45. mail/examples/mafia/personas.py +240 -0
  46. mail/examples/mafia/prompts.py +489 -0
  47. mail/examples/mafia/roles.py +147 -0
  48. mail/examples/mafia/spec.md +350 -0
  49. mail/examples/math_dummy/__init__.py +23 -0
  50. mail/examples/math_dummy/actions.py +252 -0
  51. mail/examples/math_dummy/agent.py +136 -0
  52. mail/examples/math_dummy/prompts.py +46 -0
  53. mail/examples/math_dummy/types.py +5 -0
  54. mail/examples/research/__init__.py +39 -0
  55. mail/examples/research/researcher/__init__.py +9 -0
  56. mail/examples/research/researcher/agent.py +67 -0
  57. mail/examples/research/researcher/prompts.py +54 -0
  58. mail/examples/research/searcher/__init__.py +10 -0
  59. mail/examples/research/searcher/actions.py +324 -0
  60. mail/examples/research/searcher/agent.py +67 -0
  61. mail/examples/research/searcher/prompts.py +53 -0
  62. mail/examples/research/summarizer/__init__.py +18 -0
  63. mail/examples/research/summarizer/actions.py +255 -0
  64. mail/examples/research/summarizer/agent.py +67 -0
  65. mail/examples/research/summarizer/prompts.py +55 -0
  66. mail/examples/research/verifier/__init__.py +10 -0
  67. mail/examples/research/verifier/actions.py +337 -0
  68. mail/examples/research/verifier/agent.py +67 -0
  69. mail/examples/research/verifier/prompts.py +52 -0
  70. mail/examples/supervisor/__init__.py +11 -0
  71. mail/examples/supervisor/agent.py +4 -0
  72. mail/examples/supervisor/prompts.py +93 -0
  73. mail/examples/support/__init__.py +33 -0
  74. mail/examples/support/classifier/__init__.py +10 -0
  75. mail/examples/support/classifier/actions.py +307 -0
  76. mail/examples/support/classifier/agent.py +68 -0
  77. mail/examples/support/classifier/prompts.py +56 -0
  78. mail/examples/support/coordinator/__init__.py +9 -0
  79. mail/examples/support/coordinator/agent.py +67 -0
  80. mail/examples/support/coordinator/prompts.py +48 -0
  81. mail/examples/support/faq/__init__.py +10 -0
  82. mail/examples/support/faq/actions.py +182 -0
  83. mail/examples/support/faq/agent.py +67 -0
  84. mail/examples/support/faq/prompts.py +42 -0
  85. mail/examples/support/sentiment/__init__.py +15 -0
  86. mail/examples/support/sentiment/actions.py +341 -0
  87. mail/examples/support/sentiment/agent.py +67 -0
  88. mail/examples/support/sentiment/prompts.py +54 -0
  89. mail/examples/weather_dummy/__init__.py +23 -0
  90. mail/examples/weather_dummy/actions.py +75 -0
  91. mail/examples/weather_dummy/agent.py +136 -0
  92. mail/examples/weather_dummy/prompts.py +35 -0
  93. mail/examples/weather_dummy/types.py +5 -0
  94. mail/factories/__init__.py +27 -0
  95. mail/factories/action.py +223 -0
  96. mail/factories/base.py +1531 -0
  97. mail/factories/supervisor.py +241 -0
  98. mail/net/__init__.py +7 -0
  99. mail/net/registry.py +712 -0
  100. mail/net/router.py +728 -0
  101. mail/net/server_utils.py +114 -0
  102. mail/net/types.py +247 -0
  103. mail/server.py +1605 -0
  104. mail/stdlib/__init__.py +0 -0
  105. mail/stdlib/anthropic/__init__.py +0 -0
  106. mail/stdlib/fs/__init__.py +15 -0
  107. mail/stdlib/fs/actions.py +209 -0
  108. mail/stdlib/http/__init__.py +19 -0
  109. mail/stdlib/http/actions.py +333 -0
  110. mail/stdlib/interswarm/__init__.py +11 -0
  111. mail/stdlib/interswarm/actions.py +208 -0
  112. mail/stdlib/mcp/__init__.py +19 -0
  113. mail/stdlib/mcp/actions.py +294 -0
  114. mail/stdlib/openai/__init__.py +13 -0
  115. mail/stdlib/openai/agents.py +451 -0
  116. mail/summarizer.py +234 -0
  117. mail/swarms_json/__init__.py +27 -0
  118. mail/swarms_json/types.py +87 -0
  119. mail/swarms_json/utils.py +255 -0
  120. mail/url_scheme.py +51 -0
  121. mail/utils/__init__.py +53 -0
  122. mail/utils/auth.py +194 -0
  123. mail/utils/context.py +17 -0
  124. mail/utils/logger.py +73 -0
  125. mail/utils/openai.py +212 -0
  126. mail/utils/parsing.py +89 -0
  127. mail/utils/serialize.py +292 -0
  128. mail/utils/store.py +49 -0
  129. mail/utils/string_builder.py +119 -0
  130. mail/utils/version.py +20 -0
  131. mail_swarms-1.3.2.dist-info/METADATA +237 -0
  132. mail_swarms-1.3.2.dist-info/RECORD +137 -0
  133. mail_swarms-1.3.2.dist-info/WHEEL +4 -0
  134. mail_swarms-1.3.2.dist-info/entry_points.txt +2 -0
  135. mail_swarms-1.3.2.dist-info/licenses/LICENSE +202 -0
  136. mail_swarms-1.3.2.dist-info/licenses/NOTICE +10 -0
  137. mail_swarms-1.3.2.dist-info/licenses/THIRD_PARTY_NOTICES.md +12334 -0
mail/core/runtime.py ADDED
@@ -0,0 +1,3537 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright (c) 2025 Addison Kline, Ryan Heaton
3
+
4
+ import asyncio
5
+ import copy
6
+ import datetime
7
+ import logging
8
+ import traceback
9
+ import uuid
10
+ from asyncio import PriorityQueue, Task
11
+ from collections import defaultdict
12
+ from collections.abc import AsyncGenerator
13
+ from typing import Any, Literal
14
+
15
+ import langsmith as ls
16
+ import rich
17
+ import tiktoken
18
+ import ujson
19
+ from litellm import aresponses
20
+ from sse_starlette import ServerSentEvent
21
+
22
+ from mail.db.utils import (
23
+ create_agent_history,
24
+ create_task,
25
+ create_task_event,
26
+ create_task_response,
27
+ load_agent_histories,
28
+ load_task_events,
29
+ load_task_responses,
30
+ load_tasks,
31
+ update_task,
32
+ )
33
+ from mail.net import InterswarmRouter, SwarmRegistry
34
+ from mail.utils.context import get_model_ctx_len
35
+ from mail.utils.serialize import _REDACT_KEYS, _format_event_sections, _serialize_event
36
+ from mail.utils.string_builder import build_mail_help_string
37
+
38
+ from .actions import (
39
+ ActionCore,
40
+ ActionOverrideFunction,
41
+ )
42
+ from .agents import (
43
+ AgentCore,
44
+ )
45
+ from .message import (
46
+ MAIL_ALL_LOCAL_AGENTS,
47
+ MAILAddress,
48
+ MAILBroadcast,
49
+ MAILInterswarmMessage,
50
+ MAILMessage,
51
+ MAILRequest,
52
+ MAILResponse,
53
+ build_interswarm_mail_xml,
54
+ build_mail_xml,
55
+ create_agent_address,
56
+ create_system_address,
57
+ parse_agent_address,
58
+ )
59
+ from .tasks import MAILTask
60
+ from .tools import (
61
+ AgentToolCall,
62
+ convert_call_to_mail_message,
63
+ convert_manual_step_call_to_mail_message,
64
+ normalize_breakpoint_tool_call,
65
+ )
66
+
67
+ logger = logging.getLogger("mail.runtime")
68
+
69
+ AGENT_HISTORY_KEY = "{task_id}::{agent_name}"
70
+ _UNSET = object()
71
+
72
+
73
+ class _SSEPayload(dict):
74
+ def __str__(self) -> str:
75
+ return ujson.dumps(self)
76
+
77
+
78
+ class MAILRuntime:
79
+ """
80
+ Runtime for an individual MAIL swarm instance.
81
+ Handles the local message queue and provides an action executor for tools.
82
+ """
83
+
84
+ def __init__(
85
+ self,
86
+ agents: dict[str, AgentCore],
87
+ actions: dict[str, ActionCore],
88
+ user_id: str,
89
+ user_role: Literal["admin", "agent", "user"],
90
+ swarm_name: str,
91
+ entrypoint: str,
92
+ swarm_registry: SwarmRegistry | None = None,
93
+ enable_interswarm: bool = False,
94
+ breakpoint_tools: list[str] | None = None,
95
+ exclude_tools: list[str] | None = None,
96
+ enable_db_agent_histories: bool = False,
97
+ ):
98
+ # Use a priority queue with a deterministic tiebreaker to avoid comparing dicts
99
+ # Structure: (priority, seq, message)
100
+ self.message_queue: PriorityQueue[tuple[int, int, MAILMessage]] = (
101
+ PriorityQueue()
102
+ )
103
+ self._message_seq: int = 0
104
+ self.response_queue: asyncio.Queue[tuple[str, MAILMessage]] = asyncio.Queue()
105
+ self.agents = agents
106
+ self.actions = actions
107
+ # Agent histories in an LLM-friendly format
108
+ self.agent_histories: dict[str, list[dict[str, Any]]] = defaultdict(list)
109
+ self.enable_db_agent_histories = enable_db_agent_histories
110
+ # MAIL tasks in swarm memory
111
+ self.mail_tasks: dict[str, MAILTask] = {}
112
+ # asyncio tasks that are currently active
113
+ self.active_tasks: set[Task[Any]] = set()
114
+ self.shutdown_event = asyncio.Event()
115
+ self.is_running = False
116
+ self.pending_requests: dict[str, asyncio.Future[MAILMessage]] = {}
117
+ self.user_id = user_id
118
+ self.user_role = user_role
119
+ self._steps_by_task: dict[str, int] = defaultdict(int)
120
+ self._max_steps_by_task: dict[str, int | None] = {}
121
+ # Per-task event notifier for streaming to avoid busy-waiting
122
+ self._events_available_by_task: dict[str, asyncio.Event] = defaultdict(
123
+ asyncio.Event
124
+ )
125
+ # Interswarm messaging support
126
+ self.swarm_name = swarm_name
127
+ self.enable_interswarm = enable_interswarm
128
+ self.swarm_registry = swarm_registry
129
+ self.interswarm_router: InterswarmRouter | None = None
130
+ self.entrypoint = entrypoint
131
+ if enable_interswarm and swarm_registry:
132
+ self.interswarm_router = InterswarmRouter(swarm_registry, swarm_name)
133
+ # Register local message handler
134
+ self.interswarm_router.register_message_handler(
135
+ "local_message_handler", self._handle_local_message
136
+ )
137
+ self.breakpoint_tools = list(breakpoint_tools or [])
138
+ self._is_continuous = False
139
+ self._is_manual = False
140
+ # Message buffer for manual mode
141
+ self.manual_message_buffer: dict[str, list[MAILMessage]] = defaultdict(list)
142
+ self.manual_return_events: dict[str, asyncio.Event] = defaultdict(asyncio.Event)
143
+ self.manual_return_messages: dict[str, MAILMessage | None] = defaultdict(None)
144
+ self.exclude_tools = list(exclude_tools or [])
145
+ self.response_messages: dict[str, MAILMessage] = {}
146
+ self.last_breakpoint_caller: dict[str, str] = {}
147
+ self.last_breakpoint_tool_calls: dict[str, list[AgentToolCall]] = {}
148
+ self.this_owner = f"{self.user_role}:{self.user_id}@{self.swarm_name}"
149
+ # Track outstanding requests per task per agent for await_message
150
+ # Structure: task_id -> sender_agent_name -> count of outstanding requests
151
+ self.outstanding_requests: dict[str, dict[str, int]] = defaultdict(
152
+ lambda: defaultdict(int)
153
+ )
154
+
155
+ def _log_prelude(self) -> str:
156
+ """
157
+ Build the string that will be prepended to all log messages.
158
+ """
159
+ return f"[[yellow]{self.user_role}[/yellow]:{self.user_id}@[green]{self.swarm_name}[/green]]"
160
+
161
+ def _reset_step_counter(self, task_id: str) -> None:
162
+ """
163
+ Reset the step counter for a task (used to enforce per-task max_steps).
164
+ """
165
+ self._steps_by_task[task_id] = 0
166
+
167
+ def _set_task_max_steps(self, task_id: str, max_steps: int | None) -> None:
168
+ """
169
+ Record a per-task max_steps override (None disables the limit).
170
+ """
171
+ self._max_steps_by_task[task_id] = max_steps
172
+
173
+ def _normalize_max_steps(self, max_steps: Any) -> int | None:
174
+ """
175
+ Normalize a max_steps override into an int or None.
176
+ """
177
+ if max_steps is None:
178
+ return None
179
+ if isinstance(max_steps, int):
180
+ return max_steps
181
+ try:
182
+ return int(max_steps)
183
+ except (TypeError, ValueError) as exc:
184
+ raise ValueError("max_steps must be an int or null") from exc
185
+
186
+ def _clear_task_step_state(self, task_id: str) -> None:
187
+ """
188
+ Clear step counters and overrides when a task completes.
189
+ """
190
+ self._steps_by_task.pop(task_id, None)
191
+ self._max_steps_by_task.pop(task_id, None)
192
+
193
+ async def start_interswarm(self) -> None:
194
+ """
195
+ Start interswarm messaging capabilities.
196
+ """
197
+ if self.enable_interswarm and self.interswarm_router:
198
+ await self.interswarm_router.start()
199
+ logger.info(f"{self._log_prelude()} started interswarm messaging")
200
+
201
+ async def stop_interswarm(self) -> None:
202
+ """
203
+ Stop interswarm messaging capabilities.
204
+ """
205
+ if self.interswarm_router:
206
+ await self.interswarm_router.stop()
207
+ logger.info(f"{self._log_prelude()} stopped interswarm messaging")
208
+
209
+ async def load_agent_histories_from_db(self) -> None:
210
+ """
211
+ Load existing agent histories from the database.
212
+ Only called when enable_db_agent_histories is True.
213
+ """
214
+ if not self.enable_db_agent_histories:
215
+ return
216
+
217
+ try:
218
+ histories = await load_agent_histories(
219
+ swarm_name=self.swarm_name,
220
+ caller_role=self.user_role,
221
+ caller_id=self.user_id,
222
+ )
223
+ # Merge loaded histories into agent_histories
224
+ for key, history_list in histories.items():
225
+ if key in self.agent_histories:
226
+ # Prepend loaded history to any existing history
227
+ self.agent_histories[key] = history_list + self.agent_histories[key]
228
+ else:
229
+ self.agent_histories[key] = history_list
230
+
231
+ logger.info(
232
+ f"{self._log_prelude()} loaded {len(histories)} agent histories from database"
233
+ )
234
+ except Exception as e:
235
+ logger.warning(
236
+ f"{self._log_prelude()} failed to load agent histories from database: {e}"
237
+ )
238
+
239
+ # Also load tasks from DB
240
+ await self.load_tasks_from_db()
241
+
242
+ async def load_tasks_from_db(self) -> None:
243
+ """
244
+ Load existing tasks from the database.
245
+ Only called when enable_db_agent_histories is True.
246
+ """
247
+ if not self.enable_db_agent_histories:
248
+ return
249
+
250
+ try:
251
+ # Load task metadata
252
+ task_records = await load_tasks(
253
+ swarm_name=self.swarm_name,
254
+ caller_role=self.user_role,
255
+ caller_id=self.user_id,
256
+ )
257
+
258
+ for task_data in task_records:
259
+ task_id = task_data["task_id"]
260
+ if task_id in self.mail_tasks:
261
+ continue # Don't overwrite existing tasks
262
+
263
+ # Reconstruct task from DB data
264
+ task = MAILTask.from_db_dict(task_data)
265
+ self.mail_tasks[task_id] = task
266
+
267
+ # Load events for this task
268
+ events = await load_task_events(
269
+ task_id=task_id,
270
+ swarm_name=self.swarm_name,
271
+ caller_role=self.user_role,
272
+ caller_id=self.user_id,
273
+ )
274
+ for event_data in events:
275
+ task.add_event_from_db(event_data)
276
+
277
+ # Load response messages
278
+ responses = await load_task_responses(
279
+ swarm_name=self.swarm_name,
280
+ caller_role=self.user_role,
281
+ caller_id=self.user_id,
282
+ )
283
+ for task_id, response in responses.items():
284
+ self.response_messages[task_id] = response # type: ignore
285
+
286
+ logger.info(
287
+ f"{self._log_prelude()} loaded {len(task_records)} tasks and {len(responses)} responses from database"
288
+ )
289
+ except Exception as e:
290
+ logger.warning(
291
+ f"{self._log_prelude()} failed to load tasks from database: {e}"
292
+ )
293
+
294
+ async def is_interswarm_running(self) -> bool:
295
+ """
296
+ Check if interswarm messaging is running.
297
+ """
298
+ if self.interswarm_router:
299
+ return await self.interswarm_router.is_running()
300
+ return False
301
+
302
+ async def _handle_local_message(self, message: MAILInterswarmMessage) -> None:
303
+ """
304
+ Handle a message that should be processed locally.
305
+ """
306
+ await self._receive_interswarm_message(message)
307
+
308
+ async def _notify_remote_task_complete(
309
+ self,
310
+ task_id: str,
311
+ finish_message: str,
312
+ caller: str,
313
+ ) -> None:
314
+ """
315
+ Inform any participating remote swarms that the task has completed locally.
316
+ """
317
+ if not self.enable_interswarm or not self.interswarm_router:
318
+ return
319
+
320
+ task_state = self.mail_tasks.get(task_id)
321
+ if task_state is None or not task_state.remote_swarms:
322
+ return
323
+
324
+ sender_address = create_agent_address(caller)
325
+
326
+ for remote_swarm in task_state.remote_swarms:
327
+ recipient = create_agent_address(f"{self.entrypoint}@{remote_swarm}")
328
+ try:
329
+ message = MAILMessage(
330
+ id=str(uuid.uuid4()),
331
+ timestamp=datetime.datetime.now(datetime.UTC).isoformat(),
332
+ message=MAILResponse(
333
+ task_id=task_id,
334
+ request_id=str(uuid.uuid4()),
335
+ sender=sender_address,
336
+ recipient=recipient,
337
+ subject="::task_complete::",
338
+ body=finish_message,
339
+ sender_swarm=self.swarm_name,
340
+ recipient_swarm=remote_swarm,
341
+ routing_info={
342
+ "origin_swarm": self.swarm_name,
343
+ "remote_swarm": remote_swarm,
344
+ },
345
+ ),
346
+ msg_type="response",
347
+ )
348
+ await self._send_interswarm_message(message)
349
+ except Exception as exc:
350
+ logger.error(
351
+ f"{self._log_prelude()} failed to notify remote swarm '{remote_swarm}' of completion for task '{task_id}': '{exc}'"
352
+ )
353
+
354
+ # Don't immediately complete the pending request here
355
+ # Let the local processing flow handle it naturally
356
+ # The supervisor agent should process the response and generate
357
+ # a final response that will complete the user's request
358
+
359
+ async def run_task(
360
+ self,
361
+ task_id: str | None = None,
362
+ action_override: ActionOverrideFunction | None = None,
363
+ resume_from: Literal["user_response", "breakpoint_tool_call"] | None = None,
364
+ max_steps: int | None = None,
365
+ **kwargs: Any,
366
+ ) -> MAILMessage:
367
+ """
368
+ Run the MAIL system until the specified task is complete or shutdown is requested.
369
+ This method can be called multiple times for different requests.
370
+ """
371
+ match resume_from:
372
+ case "user_response":
373
+ if task_id is None:
374
+ logger.error(
375
+ f"{self._log_prelude()} task_id is required when resuming from a user response"
376
+ )
377
+ return self._system_broadcast(
378
+ task_id="null",
379
+ subject="::runtime_error::",
380
+ body="""The parameter 'task_id' is required when resuming from a user response.
381
+ It is impossible to resume a task without `task_id` specified.""",
382
+ task_complete=True,
383
+ )
384
+ if task_id not in self.mail_tasks:
385
+ logger.error(f"{self._log_prelude()} task '{task_id}' not found")
386
+ return self._system_broadcast(
387
+ task_id=task_id,
388
+ subject="::runtime_error::",
389
+ body=f"The task '{task_id}' was not found.",
390
+ task_complete=True,
391
+ )
392
+
393
+ await self.mail_tasks[task_id].queue_load(self.message_queue)
394
+ self.mail_tasks[task_id].is_running = True
395
+ self.mail_tasks[task_id].completed = False
396
+
397
+ try:
398
+ result = await self._run_loop_for_task(task_id, action_override)
399
+ finally:
400
+ self.mail_tasks[task_id].is_running = False
401
+
402
+ case "breakpoint_tool_call":
403
+ if task_id is None:
404
+ logger.error(
405
+ f"{self._log_prelude()} task_id is required when resuming from a breakpoint tool call"
406
+ )
407
+ return self._system_broadcast(
408
+ task_id="null",
409
+ subject="::runtime_error::",
410
+ body="""The parameter 'task_id' is required when resuming from a breakpoint tool call.
411
+ It is impossible to resume a task without `task_id` specified.""",
412
+ task_complete=True,
413
+ )
414
+ if task_id not in self.mail_tasks:
415
+ logger.error(f"{self._log_prelude()} task '{task_id}' not found")
416
+ return self._system_broadcast(
417
+ task_id=task_id,
418
+ subject="::runtime_error::",
419
+ body=f"The task '{task_id}' was not found.",
420
+ task_complete=True,
421
+ )
422
+
423
+ REQUIRED_KWARGS = [
424
+ "breakpoint_tool_call_result",
425
+ ]
426
+ for kwarg in REQUIRED_KWARGS:
427
+ if kwarg not in kwargs:
428
+ logger.error(
429
+ f"{self._log_prelude()} required keyword argument '{kwarg}' not provided"
430
+ )
431
+ return self._system_broadcast(
432
+ task_id=task_id,
433
+ subject="Runtime Error",
434
+ body=f"""The keyword argument '{kwarg}' is required when resuming from a breakpoint tool call.
435
+ It is impossible to resume a task without `{kwarg}` specified.""",
436
+ task_complete=True,
437
+ )
438
+ if (
439
+ task_id not in self.last_breakpoint_caller
440
+ or self.last_breakpoint_caller[task_id] is None
441
+ ):
442
+ logger.error(
443
+ f"{self._log_prelude()} last breakpoint caller for task '{task_id}' is not set"
444
+ )
445
+ return self._system_broadcast(
446
+ task_id=task_id,
447
+ subject="::runtime_error::",
448
+ body="The last breakpoint caller is not set.",
449
+ task_complete=True,
450
+ )
451
+ breakpoint_tool_caller = self.last_breakpoint_caller[task_id]
452
+ breakpoint_tool_call_result = kwargs["breakpoint_tool_call_result"]
453
+
454
+ self.mail_tasks[task_id].completed = False
455
+ self.mail_tasks[task_id].is_running = True
456
+
457
+ try:
458
+ result = await self._resume_task_from_breakpoint_tool_call(
459
+ task_id,
460
+ breakpoint_tool_caller,
461
+ breakpoint_tool_call_result,
462
+ action_override=action_override,
463
+ )
464
+ finally:
465
+ self.mail_tasks[task_id].is_running = False
466
+
467
+ case None: # start a new task
468
+ if task_id is None:
469
+ task_id = str(uuid.uuid4())
470
+ await self._ensure_task_exists(task_id)
471
+
472
+ self.mail_tasks[task_id].is_running = True
473
+
474
+ try:
475
+ result = await self._run_loop_for_task(
476
+ task_id, action_override, max_steps
477
+ )
478
+ finally:
479
+ self.mail_tasks[task_id].is_running = False
480
+
481
+ return result
482
+
483
+ async def _run_loop_for_task(
484
+ self,
485
+ task_id: str,
486
+ action_override: ActionOverrideFunction | None = None,
487
+ max_steps: int | None = None,
488
+ ) -> MAILMessage:
489
+ """
490
+ Run the MAIL system for a specific task until the task is complete or shutdown is requested.
491
+ """
492
+ logger.debug(
493
+ f"{self._log_prelude()} _run_loop_for_task: starting for task_id={task_id}, "
494
+ f"queue size={self.message_queue.qsize()}"
495
+ )
496
+ steps = 0
497
+ while True:
498
+ try:
499
+ # Wait for either a message or shutdown signal
500
+ logger.debug(
501
+ f"{self._log_prelude()} _run_loop_for_task: waiting for message, "
502
+ f"queue size={self.message_queue.qsize()}"
503
+ )
504
+ get_message_task = asyncio.create_task(self.message_queue.get())
505
+ shutdown_task = asyncio.create_task(self.shutdown_event.wait())
506
+
507
+ done, pending = await asyncio.wait(
508
+ [get_message_task, shutdown_task],
509
+ return_when=asyncio.FIRST_COMPLETED,
510
+ )
511
+
512
+ # Cancel pending tasks
513
+ for task in pending:
514
+ task.cancel()
515
+ try:
516
+ await task
517
+ except asyncio.CancelledError:
518
+ pass
519
+
520
+ # Check if shutdown was requested
521
+ if shutdown_task in done:
522
+ logger.info(f"{self._log_prelude()} shutdown requested")
523
+ return self._system_broadcast(
524
+ task_id="null",
525
+ subject="::shutdown_requested::",
526
+ body="The shutdown was requested.",
527
+ task_complete=True,
528
+ )
529
+
530
+ # Process the message
531
+ message_tuple = get_message_task.result()
532
+ # message_tuple structure: (priority, seq, message)
533
+ message = message_tuple[2]
534
+ logger.debug(
535
+ f"{self._log_prelude()} _run_loop_for_task: got message from queue, "
536
+ f"priority={message_tuple[0]}, seq={message_tuple[1]}, "
537
+ f"remaining queue size={self.message_queue.qsize()}"
538
+ )
539
+ logger.info(
540
+ f"{self._log_prelude()} processing message with task ID '{message['message']['task_id']}': '{message['message']['subject']}'"
541
+ )
542
+ if message["msg_type"] == "broadcast_complete":
543
+ task_id_completed = message["message"].get("task_id")
544
+ if isinstance(task_id_completed, str):
545
+ self.response_messages[task_id_completed] = message
546
+ await self._ensure_task_exists(task_id_completed)
547
+ self.mail_tasks[task_id_completed].mark_complete()
548
+ await self.mail_tasks[task_id_completed].queue_stash(
549
+ self.message_queue
550
+ )
551
+ self._clear_task_step_state(task_id_completed)
552
+ # Mark this message as done before breaking
553
+ self.message_queue.task_done()
554
+ return message
555
+
556
+ if (
557
+ not message["message"]["subject"].startswith("::")
558
+ and not message["message"]["sender"]["address_type"] == "system"
559
+ ):
560
+ steps += 1
561
+ if max_steps is not None and steps > max_steps:
562
+ ev = self.get_events_by_task_id(task_id)
563
+ serialized_events = []
564
+ for event in ev:
565
+ serialized = _serialize_event(
566
+ event, exclude_keys=_REDACT_KEYS
567
+ )
568
+ if serialized is not None:
569
+ serialized_events.append(serialized)
570
+ event_sections = _format_event_sections(serialized_events)
571
+ message = self._system_response(
572
+ task_id=task_id,
573
+ subject="::maximum_steps_reached::",
574
+ body=f"The swarm has reached the maximum number of steps allowed. You must now call `task_complete` and provide a response to the best of your ability. Below is a transcript of the entire swarm conversation for context:\n\n{event_sections}",
575
+ recipient=create_agent_address(self.entrypoint),
576
+ )
577
+ logger.info(
578
+ f"{self._log_prelude()} maximum number of steps reached for task '{task_id}', sending system response"
579
+ )
580
+
581
+ await self._process_message(message, action_override)
582
+ # Note: task_done() is called by the schedule function for regular messages
583
+
584
+ except asyncio.CancelledError:
585
+ logger.info(
586
+ f"{self._log_prelude()} run loop cancelled, initiating shutdown..."
587
+ )
588
+ self._submit_event(
589
+ "run_loop_cancelled",
590
+ message["message"]["task_id"],
591
+ "run loop cancelled",
592
+ )
593
+ return self._system_broadcast(
594
+ task_id=message["message"]["task_id"],
595
+ subject="::run_loop_cancelled::",
596
+ body="The run loop was cancelled.",
597
+ task_complete=True,
598
+ )
599
+ except Exception as e:
600
+ logger.error(f"{self._log_prelude()} error in run loop: {e}")
601
+ self._submit_event(
602
+ "run_loop_error",
603
+ message["message"]["task_id"],
604
+ f"error in run loop: {e}",
605
+ )
606
+ return self._system_broadcast(
607
+ task_id=message["message"]["task_id"],
608
+ subject="::run_loop_error::",
609
+ body=f"An error occurred while running the MAIL system: {e}",
610
+ task_complete=True,
611
+ )
612
+
613
+ async def _resume_task_from_breakpoint_tool_call(
614
+ self,
615
+ task_id: str,
616
+ breakpoint_tool_caller: Any,
617
+ breakpoint_tool_call_result: Any,
618
+ action_override: ActionOverrideFunction | None = None,
619
+ ) -> MAILMessage:
620
+ """
621
+ Resume a task from a breakpoint tool call.
622
+ """
623
+ logger.debug(
624
+ f"{self._log_prelude()} _resume_task_from_breakpoint_tool_call: "
625
+ f"task_id={task_id}, caller={breakpoint_tool_caller}, "
626
+ f"result_type={type(breakpoint_tool_call_result).__name__}"
627
+ )
628
+ if (
629
+ not isinstance(breakpoint_tool_call_result, str)
630
+ and not isinstance(breakpoint_tool_call_result, list)
631
+ and not isinstance(breakpoint_tool_call_result, dict)
632
+ ):
633
+ logger.error(
634
+ f"{self._log_prelude()} breakpoint_tool_call_result must be a string, list, or dict"
635
+ )
636
+ return self._system_broadcast(
637
+ task_id=task_id,
638
+ subject="::runtime_error::",
639
+ body="""The parameter 'breakpoint_tool_call_result' must be a string, list, or dict.
640
+ `breakpoint_tool_call_result` specifies the result of the breakpoint tool call.""",
641
+ task_complete=True,
642
+ )
643
+ if breakpoint_tool_caller not in self.agents:
644
+ logger.error(
645
+ f"{self._log_prelude()} agent '{breakpoint_tool_caller}' not found"
646
+ )
647
+ return self._system_broadcast(
648
+ task_id=task_id,
649
+ subject="::runtime_error::",
650
+ body=f"The agent '{breakpoint_tool_caller}' was not found.",
651
+ task_complete=True,
652
+ )
653
+
654
+ self.mail_tasks[task_id].resume()
655
+ await self.mail_tasks[task_id].queue_load(self.message_queue)
656
+ logger.debug(
657
+ f"{self._log_prelude()} _resume_task_from_breakpoint_tool_call: "
658
+ f"queue loaded, queue size={self.message_queue.qsize()}"
659
+ )
660
+ result_msgs: list[dict[str, Any]] = []
661
+ if isinstance(breakpoint_tool_call_result, str):
662
+ payload = ujson.loads(breakpoint_tool_call_result)
663
+ else:
664
+ payload = breakpoint_tool_call_result
665
+
666
+ if task_id not in self.last_breakpoint_tool_calls:
667
+ logger.error(
668
+ f"{self._log_prelude()} last breakpoint tool calls for task '{task_id}' is not set"
669
+ )
670
+ return self._system_broadcast(
671
+ task_id=task_id,
672
+ subject="::runtime_error::",
673
+ body="The last breakpoint tool calls is not set.",
674
+ task_complete=True,
675
+ )
676
+
677
+ if isinstance(payload, list):
678
+ for resp in payload:
679
+ og_call = next(
680
+ (
681
+ call
682
+ for call in self.last_breakpoint_tool_calls[task_id]
683
+ if call.tool_call_id == resp["call_id"]
684
+ ),
685
+ None,
686
+ )
687
+ if og_call is not None:
688
+ result_msgs.append(og_call.create_response_msg(resp["content"]))
689
+ self._submit_event(
690
+ "breakpoint_action_complete",
691
+ task_id,
692
+ f"breakpoint action complete (caller = {breakpoint_tool_caller}):\n{resp['content']}",
693
+ )
694
+ else:
695
+ if len(self.last_breakpoint_tool_calls[task_id]) > 1:
696
+ logger.error(
697
+ f"{self._log_prelude()} last breakpoint tool calls is a list but only one call response was provided"
698
+ )
699
+ return self._system_broadcast(
700
+ task_id=task_id,
701
+ subject="::runtime_error::",
702
+ body="The last breakpoint tool calls is a list but only one call response was provided.",
703
+ task_complete=True,
704
+ )
705
+ result_msgs.append(
706
+ self.last_breakpoint_tool_calls[task_id][0].create_response_msg(
707
+ payload["content"]
708
+ )
709
+ )
710
+ self._submit_event(
711
+ "breakpoint_action_complete",
712
+ task_id,
713
+ f"breakpoint action complete (caller = {breakpoint_tool_caller}):\n{payload['content']}",
714
+ )
715
+
716
+ # append the breakpoint tool call result to the agent history
717
+ logger.debug(
718
+ f"{self._log_prelude()} _resume_task_from_breakpoint_tool_call: "
719
+ f"appending {len(result_msgs)} result message(s) to history"
720
+ )
721
+ self.agent_histories[
722
+ AGENT_HISTORY_KEY.format(task_id=task_id, agent_name=breakpoint_tool_caller)
723
+ ].extend(result_msgs)
724
+
725
+ # send action complete broadcast to tool caller
726
+ logger.debug(
727
+ f"{self._log_prelude()} _resume_task_from_breakpoint_tool_call: "
728
+ f"submitting ::action_complete_broadcast:: to {breakpoint_tool_caller}"
729
+ )
730
+ await self.submit(
731
+ self._system_broadcast(
732
+ task_id=task_id,
733
+ subject="::action_complete_broadcast::",
734
+ body="",
735
+ recipients=[create_agent_address(breakpoint_tool_caller)],
736
+ )
737
+ )
738
+
739
+ # resume the task
740
+ logger.debug(
741
+ f"{self._log_prelude()} _resume_task_from_breakpoint_tool_call: "
742
+ f"entering _run_loop_for_task, queue size={self.message_queue.qsize()}"
743
+ )
744
+ self.mail_tasks[task_id].is_running = True
745
+ try:
746
+ result = await self._run_loop_for_task(task_id, action_override)
747
+ finally:
748
+ self.mail_tasks[task_id].is_running = False
749
+
750
+ logger.debug(
751
+ f"{self._log_prelude()} _resume_task_from_breakpoint_tool_call: "
752
+ f"_run_loop_for_task completed"
753
+ )
754
+ return result
755
+
756
+ async def run_continuous(
757
+ self,
758
+ max_steps: int | None = None,
759
+ action_override: ActionOverrideFunction | None = None,
760
+ mode: Literal["continuous", "manual"] = "continuous",
761
+ ) -> None:
762
+ """
763
+ Run the MAIL system continuously, handling multiple requests.
764
+ This method runs indefinitely until shutdown is requested.
765
+ """
766
+ self._is_continuous = True
767
+ self._is_manual = mode == "manual"
768
+ if self._is_manual:
769
+ logger.info(
770
+ f"{self._log_prelude()} starting manual MAIL operation for user '{self.user_id}'..."
771
+ )
772
+ else:
773
+ logger.info(
774
+ f"{self._log_prelude()} starting continuous MAIL operation for user '{self.user_id}'..."
775
+ )
776
+ while not self.shutdown_event.is_set():
777
+ try:
778
+ logger.debug(
779
+ f"{self._log_prelude()} pending requests: {self.pending_requests.keys()}"
780
+ )
781
+
782
+ # Wait for either a message or shutdown signal
783
+ get_message_task = asyncio.create_task(self.message_queue.get())
784
+ shutdown_task = asyncio.create_task(self.shutdown_event.wait())
785
+
786
+ done, pending = await asyncio.wait(
787
+ [get_message_task, shutdown_task],
788
+ return_when=asyncio.FIRST_COMPLETED,
789
+ )
790
+
791
+ # Cancel pending tasks
792
+ for task in pending:
793
+ task.cancel()
794
+ try:
795
+ await task
796
+ except asyncio.CancelledError:
797
+ pass
798
+
799
+ # Check if shutdown was requested
800
+ if shutdown_task in done:
801
+ logger.info(
802
+ f"{self._log_prelude()} shutdown requested in continuous mode"
803
+ )
804
+ self._submit_event(
805
+ "shutdown_requested",
806
+ "*",
807
+ "shutdown requested in continuous mode",
808
+ )
809
+ break
810
+
811
+ # Process the message
812
+ message_tuple = get_message_task.result()
813
+ # message_tuple structure: (priority, seq, message)
814
+ message = message_tuple[2]
815
+ logger.info(f"{self._log_prelude()} queue state: {self.message_queue}")
816
+ logger.info(
817
+ f"{self._log_prelude()} processing message with task ID '{message['message']['task_id']}' and type '{message['msg_type']}' in continuous mode: '{message['message']['subject']}'"
818
+ )
819
+ task_id = message["message"]["task_id"]
820
+
821
+ if message["msg_type"] == "broadcast_complete":
822
+ # Check if this completes a pending request
823
+ self.response_messages[task_id] = message
824
+ if isinstance(task_id, str):
825
+ await self._ensure_task_exists(task_id)
826
+ self.mail_tasks[task_id].mark_complete()
827
+ await self.mail_tasks[task_id].queue_stash(self.message_queue)
828
+ self._clear_task_step_state(task_id)
829
+ if isinstance(task_id, str) and task_id in self.pending_requests:
830
+ # Resolve the pending request
831
+ logger.info(
832
+ f"{self._log_prelude()} task '{task_id}' completed, resolving pending request"
833
+ )
834
+ future = self.pending_requests.pop(task_id)
835
+ future.set_result(message)
836
+ continue
837
+ else:
838
+ # Mark this message as done and continue processing
839
+ self.message_queue.task_done()
840
+ continue
841
+
842
+ if (
843
+ not message["message"]["subject"].startswith("::")
844
+ and not message["message"]["sender"]["address_type"] == "system"
845
+ ):
846
+ self._steps_by_task[task_id] += 1
847
+ max_steps_for_task = self._max_steps_by_task.get(
848
+ task_id, max_steps
849
+ )
850
+ if (
851
+ max_steps_for_task is not None
852
+ and self._steps_by_task[task_id] > max_steps_for_task
853
+ ):
854
+ ev = self.get_events_by_task_id(task_id)
855
+ serialized_events = []
856
+ for event in ev:
857
+ serialized = _serialize_event(
858
+ event, exclude_keys=_REDACT_KEYS
859
+ )
860
+ if serialized is not None:
861
+ serialized_events.append(serialized)
862
+ event_sections = _format_event_sections(serialized_events)
863
+ message = self._system_response(
864
+ task_id=task_id,
865
+ subject="::maximum_steps_reached::",
866
+ body=f"The swarm has reached the maximum number of steps allowed. You must now call `task_complete` and provide a response to the best of your ability. Below is a transcript of the entire swarm conversation for context:\n\n{event_sections}",
867
+ recipient=create_agent_address(self.entrypoint),
868
+ )
869
+ logger.info(
870
+ f"{self._log_prelude()} maximum number of steps reached for task '{task_id}', sending system response"
871
+ )
872
+
873
+ await self._process_message(message, action_override)
874
+ # Note: task_done() is called by the schedule function for regular messages
875
+
876
+ except asyncio.CancelledError:
877
+ logger.info(f"{self._log_prelude()} continuous run loop cancelled")
878
+ self._submit_event(
879
+ "run_loop_cancelled",
880
+ "*",
881
+ "continuous run loop cancelled",
882
+ )
883
+ self._is_continuous = False
884
+ break
885
+ except Exception as e:
886
+ logger.error(f"{self._log_prelude()} error in continuous run loop: {e}")
887
+ self._submit_event(
888
+ "run_loop_error",
889
+ "*",
890
+ f"continuous run loop error: {e}",
891
+ )
892
+ self._is_continuous = False
893
+ # Continue processing other messages instead of shutting down
894
+ continue
895
+
896
+ logger.info(f"{self._log_prelude()} continuous MAIL operation stopped")
897
+
898
+ async def submit_and_wait(
899
+ self,
900
+ message: MAILMessage,
901
+ timeout: float = 3600.0,
902
+ resume_from: Literal["user_response", "breakpoint_tool_call"] | None = None,
903
+ **kwargs: Any,
904
+ ) -> MAILMessage:
905
+ """
906
+ Submit a message and wait for the response.
907
+ This method is designed for handling individual task requests in a persistent MAIL instance.
908
+ """
909
+ task_id = message["message"]["task_id"]
910
+
911
+ logger.info(
912
+ f"{self._log_prelude()} `submit_and_wait`: creating future for task '{task_id}'"
913
+ )
914
+
915
+ # Create a future to wait for the response
916
+ future: asyncio.Future[MAILMessage] = asyncio.Future()
917
+ self.pending_requests[task_id] = future
918
+
919
+ try:
920
+ max_steps_override = kwargs.pop("max_steps", _UNSET)
921
+ if max_steps_override is not _UNSET:
922
+ self._set_task_max_steps(
923
+ task_id, self._normalize_max_steps(max_steps_override)
924
+ )
925
+ match resume_from:
926
+ case "user_response":
927
+ await self._submit_user_response(task_id, message, **kwargs)
928
+ case "breakpoint_tool_call":
929
+ await self._submit_breakpoint_tool_call_result(task_id, **kwargs)
930
+ case (
931
+ None
932
+ ): # start a new task (task_id should be provided in the message)
933
+ await self._ensure_task_exists(task_id)
934
+
935
+ self.mail_tasks[task_id].is_running = True
936
+
937
+ await self.submit(message)
938
+
939
+ # Wait for the response with timeout
940
+ logger.info(
941
+ f"{self._log_prelude()} `submit_and_wait`: waiting for future for task '{task_id}'"
942
+ )
943
+ response = await asyncio.wait_for(future, timeout=timeout)
944
+ logger.info(
945
+ f"{self._log_prelude()} `submit_and_wait`: got response for task '{task_id}' with body: '{response['message']['body'][:50]}...'..."
946
+ )
947
+ self._submit_event(
948
+ "task_complete", task_id, f"response: '{response['message']['body']}'"
949
+ )
950
+ self.mail_tasks[task_id].is_running = False
951
+
952
+ return response
953
+
954
+ except TimeoutError:
955
+ # Remove the pending request
956
+ self.pending_requests.pop(task_id, None)
957
+ logger.error(
958
+ f"{self._log_prelude()} `submit_and_wait`: timeout for task '{task_id}'"
959
+ )
960
+ self._submit_event("task_error", task_id, f"timeout for task '{task_id}'")
961
+ return self._system_broadcast(
962
+ task_id=task_id,
963
+ subject="::task_timeout::",
964
+ body="The task timed out.",
965
+ task_complete=True,
966
+ )
967
+ except Exception as e:
968
+ # Remove the pending request
969
+ self.pending_requests.pop(task_id, None)
970
+ logger.error(
971
+ f"{self._log_prelude()} `submit_and_wait`: exception for task '{task_id}' with error: {e}"
972
+ )
973
+ self._submit_event("task_error", task_id, f"error for task: {e}")
974
+ return self._system_broadcast(
975
+ task_id=task_id,
976
+ subject="::task_error::",
977
+ body=f"The task encountered an error: {e}.",
978
+ task_complete=True,
979
+ )
980
+
981
+ async def submit_and_stream(
982
+ self,
983
+ message: MAILMessage,
984
+ timeout: float = 3600.0,
985
+ resume_from: Literal["user_response", "breakpoint_tool_call"] | None = None,
986
+ **kwargs: Any,
987
+ ) -> AsyncGenerator[ServerSentEvent, None]:
988
+ """
989
+ Submit a message and stream the response.
990
+ This method is designed for handling individual task requests in a persistent MAIL instance.
991
+ """
992
+ task_id = message["message"]["task_id"]
993
+
994
+ logger.info(
995
+ f"{self._log_prelude()} `submit_and_stream`: creating future for task '{task_id}'"
996
+ )
997
+
998
+ future: asyncio.Future[MAILMessage] = asyncio.Future()
999
+ self.pending_requests[task_id] = future
1000
+
1001
+ try:
1002
+ max_steps_override = kwargs.pop("max_steps", _UNSET)
1003
+ if max_steps_override is not _UNSET:
1004
+ self._set_task_max_steps(
1005
+ task_id, self._normalize_max_steps(max_steps_override)
1006
+ )
1007
+ task_state = self.mail_tasks.get(task_id)
1008
+ if task_state is None and resume_from is None:
1009
+ await self._ensure_task_exists(task_id)
1010
+ task_state = self.mail_tasks.get(task_id)
1011
+ next_event_index = len(task_state.events) if task_state else 0
1012
+
1013
+ match resume_from:
1014
+ case "user_response":
1015
+ await self._submit_user_response(task_id, message, **kwargs)
1016
+ case "breakpoint_tool_call":
1017
+ await self._submit_breakpoint_tool_call_result(task_id, **kwargs)
1018
+ case None: # start a new task
1019
+ await self.submit(message)
1020
+
1021
+ # Stream events as they become available, emitting periodic heartbeats
1022
+ task_event = self._events_available_by_task[task_id]
1023
+ while not future.done():
1024
+ task_state = self.mail_tasks.get(task_id)
1025
+ task_events = task_state.events if task_state else []
1026
+ if next_event_index < len(task_events):
1027
+ for ev in task_events[next_event_index:]:
1028
+ payload = ev.data
1029
+ if isinstance(payload, dict) and not isinstance(
1030
+ payload, _SSEPayload
1031
+ ):
1032
+ payload = _SSEPayload(payload)
1033
+ yield ServerSentEvent(
1034
+ event=ev.event,
1035
+ data=payload,
1036
+ id=getattr(ev, "id", None),
1037
+ )
1038
+ next_event_index = len(task_events)
1039
+ continue
1040
+
1041
+ # Reset the event flag before waiting to avoid busy loops.
1042
+ task_event.clear()
1043
+ task_state = self.mail_tasks.get(task_id)
1044
+ task_events = task_state.events if task_state else []
1045
+ if next_event_index < len(task_events):
1046
+ continue
1047
+
1048
+ try:
1049
+ # Wait up to 15s for new events; on timeout send a heartbeat
1050
+ await asyncio.wait_for(task_event.wait(), timeout=15.0)
1051
+ except TimeoutError:
1052
+ # Heartbeat to keep the connection alive
1053
+ yield ServerSentEvent(
1054
+ data=ujson.dumps({
1055
+ "timestamp": datetime.datetime.now(
1056
+ datetime.UTC
1057
+ ).isoformat(),
1058
+ "task_id": task_id,
1059
+ }),
1060
+ event="ping",
1061
+ )
1062
+ continue
1063
+
1064
+ # Future completed; drain any remaining events before emitting final response
1065
+ task_state = self.mail_tasks.get(task_id)
1066
+ task_events = task_state.events if task_state else []
1067
+ if next_event_index < len(task_events):
1068
+ for ev in task_events[next_event_index:]:
1069
+ payload = ev.data
1070
+ if isinstance(payload, dict) and not isinstance(
1071
+ payload, _SSEPayload
1072
+ ):
1073
+ payload = _SSEPayload(payload)
1074
+ yield ServerSentEvent(
1075
+ event=ev.event,
1076
+ data=payload,
1077
+ id=getattr(ev, "id", None),
1078
+ )
1079
+
1080
+ # Emit the final task_complete event with the response body
1081
+ try:
1082
+ response = future.result()
1083
+ yield ServerSentEvent(
1084
+ data=_SSEPayload({
1085
+ "timestamp": datetime.datetime.now(datetime.UTC).isoformat(),
1086
+ "task_id": task_id,
1087
+ "response": response["message"]["body"],
1088
+ }),
1089
+ event="task_complete",
1090
+ )
1091
+ except Exception as e:
1092
+ # If retrieving the response fails, still signal completion
1093
+ logger.error(
1094
+ f"{self._log_prelude()} `submit_and_stream`: exception for task '{task_id}' with error: {e}"
1095
+ )
1096
+ yield ServerSentEvent(
1097
+ data=_SSEPayload({
1098
+ "timestamp": datetime.datetime.now(datetime.UTC).isoformat(),
1099
+ "task_id": task_id,
1100
+ "response": f"{e}",
1101
+ }),
1102
+ event="task_error",
1103
+ )
1104
+
1105
+ except TimeoutError:
1106
+ self.pending_requests.pop(task_id, None)
1107
+ logger.error(
1108
+ f"{self._log_prelude()} `submit_and_stream`: timeout for task '{task_id}'"
1109
+ )
1110
+ yield ServerSentEvent(
1111
+ data=_SSEPayload({
1112
+ "timestamp": datetime.datetime.now(datetime.UTC).isoformat(),
1113
+ "task_id": task_id,
1114
+ "response": "timeout",
1115
+ }),
1116
+ event="task_error",
1117
+ )
1118
+
1119
+ except Exception as e:
1120
+ self.pending_requests.pop(task_id, None)
1121
+ logger.error(
1122
+ f"{self._log_prelude()} `submit_and_stream`: exception for task '{task_id}' with error: {e}"
1123
+ )
1124
+ yield ServerSentEvent(
1125
+ data=_SSEPayload({
1126
+ "timestamp": datetime.datetime.now(datetime.UTC).isoformat(),
1127
+ "task_id": task_id,
1128
+ "response": f"{e}",
1129
+ }),
1130
+ event="task_error",
1131
+ )
1132
+
1133
+ async def _submit_user_response(
1134
+ self,
1135
+ task_id: str,
1136
+ message: MAILMessage,
1137
+ **kwargs: Any,
1138
+ ) -> None:
1139
+ """
1140
+ Submit a user response to a pre-existing task.
1141
+ """
1142
+ if task_id not in self.mail_tasks:
1143
+ logger.error(
1144
+ f"{self._log_prelude()} `submit_user_response`: task '{task_id}' not found"
1145
+ )
1146
+ raise ValueError(f"task '{task_id}' not found")
1147
+
1148
+ self._reset_step_counter(task_id)
1149
+ self.mail_tasks[task_id].resume()
1150
+ await self.mail_tasks[task_id].queue_load(self.message_queue)
1151
+
1152
+ await self.submit(message)
1153
+
1154
+ async def _submit_breakpoint_tool_call_result(
1155
+ self,
1156
+ task_id: str,
1157
+ **kwargs: Any,
1158
+ ) -> None:
1159
+ """
1160
+ Submit a breakpoint tool call result to the task.
1161
+ """
1162
+ # ensure the task exists already
1163
+ if task_id not in self.mail_tasks:
1164
+ logger.error(
1165
+ f"{self._log_prelude()} `submit_breakpoint_tool_call_result`: task '{task_id}' not found"
1166
+ )
1167
+ raise ValueError(f"task '{task_id}' not found")
1168
+
1169
+ self._reset_step_counter(task_id)
1170
+ self.mail_tasks[task_id].resume()
1171
+ await self.mail_tasks[task_id].queue_load(self.message_queue)
1172
+
1173
+ # ensure valid kwargs
1174
+ REQUIRED_KWARGS: dict[str, type] = {
1175
+ "breakpoint_tool_call_result": str,
1176
+ }
1177
+ for kwarg, _type in REQUIRED_KWARGS.items():
1178
+ if kwarg not in kwargs:
1179
+ logger.error(
1180
+ f"{self._log_prelude()} `submit_breakpoint_tool_call_result`: required keyword argument '{kwarg}' not provided"
1181
+ )
1182
+ raise ValueError(f"required keyword argument '{kwarg}' not provided")
1183
+ breakpoint_tool_caller = kwargs.get("breakpoint_tool_caller", None)
1184
+ if breakpoint_tool_caller is None:
1185
+ if task_id not in self.last_breakpoint_caller:
1186
+ logger.error(
1187
+ f"{self._log_prelude} `submmit_breakpoint_tool_call_result`: last breakpoint caller for task '{task_id}' is not set and no breakpoint tool caller was provided"
1188
+ )
1189
+ raise ValueError(
1190
+ f"last breakpoint caller for task '{task_id}' is not set and no breakpoint tool caller was provided"
1191
+ )
1192
+ breakpoint_tool_caller = self.last_breakpoint_caller[task_id]
1193
+ breakpoint_tool_call_result = kwargs["breakpoint_tool_call_result"]
1194
+
1195
+ if breakpoint_tool_caller is None:
1196
+ logger.error(
1197
+ f"{self._log_prelude()} `submit_breakpoint_tool_call_result`: breakpoint tool caller unknown"
1198
+ )
1199
+ raise ValueError(
1200
+ "breakpoint tool caller is required to resume from a breakpoint"
1201
+ )
1202
+
1203
+ # ensure the agent exists already
1204
+ if breakpoint_tool_caller not in self.agents:
1205
+ logger.error(
1206
+ f"{self._log_prelude()} `submit_breakpoint_tool_call_result`: agent '{breakpoint_tool_caller}' not found"
1207
+ )
1208
+ raise ValueError(f"agent '{breakpoint_tool_caller}' not found")
1209
+
1210
+ result_msgs: list[dict[str, Any]] = []
1211
+ if isinstance(breakpoint_tool_call_result, str):
1212
+ try:
1213
+ payload = ujson.loads(breakpoint_tool_call_result)
1214
+ except ValueError:
1215
+ payload = breakpoint_tool_call_result
1216
+ else:
1217
+ payload = breakpoint_tool_call_result
1218
+
1219
+ logger.info(
1220
+ f"{self._log_prelude()} `submit_breakpoint_tool_call_result`: payload: '{payload}'"
1221
+ )
1222
+ if task_id not in self.last_breakpoint_tool_calls:
1223
+ logger.error(
1224
+ f"{self._log_prelude()} `submit_breakpoint_tool_call_result`: last breakpoint tool calls for task '{task_id}' is not set"
1225
+ )
1226
+ raise ValueError(
1227
+ f"last breakpoint tool calls for task '{task_id}' is not set"
1228
+ )
1229
+ last_breakpoint_tool_calls = self.last_breakpoint_tool_calls[task_id]
1230
+ has_breakpoint_context = bool(last_breakpoint_tool_calls)
1231
+
1232
+ if isinstance(payload, list) and has_breakpoint_context:
1233
+ logger.info(
1234
+ f"{self._log_prelude()} `submit_breakpoint_tool_call_result`: payload is a list and has breakpoint context"
1235
+ )
1236
+ logger.info(f"Current breakpoint tool calls: {last_breakpoint_tool_calls}")
1237
+ for resp in payload:
1238
+ og_call = next(
1239
+ (
1240
+ call
1241
+ for call in last_breakpoint_tool_calls
1242
+ if call.tool_call_id == resp["call_id"]
1243
+ ),
1244
+ None,
1245
+ )
1246
+ if og_call is not None:
1247
+ result_msgs.append(og_call.create_response_msg(resp["content"]))
1248
+ self._submit_event(
1249
+ "breakpoint_action_complete",
1250
+ task_id,
1251
+ f"breakpoint action complete (caller = {breakpoint_tool_caller}):\n{resp['content']}",
1252
+ )
1253
+ else:
1254
+ logger.warning(
1255
+ f"{self._log_prelude()} `submit_breakpoint_tool_call_result`: no matching breakpoint tool call found for response: {resp}"
1256
+ )
1257
+ else:
1258
+ if isinstance(payload, dict) and has_breakpoint_context:
1259
+ if len(last_breakpoint_tool_calls) > 1:
1260
+ logger.error(
1261
+ f"{self._log_prelude()} last breakpoint tool calls is a list but only one call response was provided"
1262
+ )
1263
+ raise ValueError(
1264
+ "The last breakpoint tool calls is a list but only one call response was provided."
1265
+ )
1266
+ result_msgs.append(
1267
+ last_breakpoint_tool_calls[0].create_response_msg(
1268
+ payload["content"]
1269
+ )
1270
+ )
1271
+ self._submit_event(
1272
+ "breakpoint_action_complete",
1273
+ task_id,
1274
+ f"breakpoint action complete (caller = {breakpoint_tool_caller}):\n{payload['content']}",
1275
+ )
1276
+ else:
1277
+ self._submit_event(
1278
+ "breakpoint_action_complete",
1279
+ task_id,
1280
+ f"breakpoint action complete (caller = {breakpoint_tool_caller}):\n{payload}",
1281
+ )
1282
+
1283
+ if isinstance(payload, list) and not has_breakpoint_context:
1284
+ logger.warning(
1285
+ f"{self._log_prelude()} `submit_breakpoint_tool_call_result`: received list payload but no breakpoint context is cached"
1286
+ )
1287
+ elif isinstance(payload, dict) and not has_breakpoint_context:
1288
+ logger.warning(
1289
+ f"{self._log_prelude()} `submit_breakpoint_tool_call_result`: received dict payload but no breakpoint context is cached"
1290
+ )
1291
+ elif has_breakpoint_context and not result_msgs:
1292
+ logger.warning(
1293
+ f"{self._log_prelude()} `submit_breakpoint_tool_call_result`: breakpoint context was available but no result messages were produced"
1294
+ )
1295
+
1296
+ if (
1297
+ has_breakpoint_context
1298
+ and isinstance(payload, dict)
1299
+ and "content" not in payload
1300
+ ):
1301
+ logger.error(
1302
+ f"{self._log_prelude()} last breakpoint tool call payload missing 'content'"
1303
+ )
1304
+ raise ValueError("breakpoint tool call payload must include 'content'")
1305
+
1306
+ # ensure result_msgs is not empty
1307
+ if not result_msgs:
1308
+ logger.warning(
1309
+ f"{self._log_prelude()} `submit_breakpoint_tool_call_result`: no result messages were produced"
1310
+ )
1311
+ result_msgs.append(
1312
+ {
1313
+ "role": "tool",
1314
+ "content": str(payload),
1315
+ }
1316
+ )
1317
+
1318
+ # append the breakpoint tool call result to the agent history
1319
+ self.agent_histories[
1320
+ AGENT_HISTORY_KEY.format(task_id=task_id, agent_name=breakpoint_tool_caller)
1321
+ ].extend(result_msgs)
1322
+
1323
+ await self.mail_tasks[task_id].queue_load(self.message_queue)
1324
+
1325
+ # submit an action complete broadcast to the task
1326
+ logger.info(
1327
+ f"{self._log_prelude()} `submit_breakpoint_tool_call_result`: submitting action complete broadcast to the task"
1328
+ )
1329
+ await self.submit(
1330
+ self._system_broadcast(
1331
+ task_id=task_id,
1332
+ subject="::action_complete_broadcast::",
1333
+ body="",
1334
+ recipients=[create_agent_address(breakpoint_tool_caller)],
1335
+ )
1336
+ )
1337
+
1338
+ async def shutdown(self) -> None:
1339
+ """
1340
+ Request a graceful shutdown of the MAIL system.
1341
+ """
1342
+ logger.info(f"{self._log_prelude()} requesting shutdown")
1343
+ self._is_continuous = False
1344
+
1345
+ # Stop interswarm messaging first
1346
+ if self.enable_interswarm:
1347
+ await self.stop_interswarm()
1348
+
1349
+ self.shutdown_event.set()
1350
+
1351
+ async def _graceful_shutdown(self) -> None:
1352
+ """
1353
+ Perform graceful shutdown operations.
1354
+ """
1355
+ logger.info(f"{self._log_prelude()} starting graceful shutdown")
1356
+
1357
+ # Graceful shutdown: wait for all active tasks to complete
1358
+ if self.active_tasks:
1359
+ logger.info(
1360
+ f"{self._log_prelude()} waiting for {len(self.active_tasks)} active tasks to complete"
1361
+ )
1362
+ # Copy the set to avoid modification during iteration
1363
+ tasks_to_wait = list(self.active_tasks)
1364
+ logger.info(
1365
+ f"{self._log_prelude()} tasks to wait for: {[task.get_name() if hasattr(task, 'get_name') else str(task) for task in tasks_to_wait]}"
1366
+ )
1367
+
1368
+ try:
1369
+ # Wait for tasks with a timeout of 30 seconds
1370
+ await asyncio.wait_for(
1371
+ asyncio.gather(*tasks_to_wait, return_exceptions=True), timeout=30.0
1372
+ )
1373
+ logger.info(f"{self._log_prelude()} all active tasks completed")
1374
+ except TimeoutError:
1375
+ logger.info(
1376
+ f"{self._log_prelude()} timeout waiting for tasks to complete. cancelling remaining tasks..."
1377
+ )
1378
+ # Cancel any remaining tasks
1379
+ for task in tasks_to_wait:
1380
+ if not task.done():
1381
+ logger.info(f"{self._log_prelude()} cancelling task: {task}")
1382
+ task.cancel()
1383
+ # Wait a bit more for cancellation to complete
1384
+ try:
1385
+ await asyncio.wait_for(
1386
+ asyncio.gather(*tasks_to_wait, return_exceptions=True),
1387
+ timeout=5.0,
1388
+ )
1389
+ except TimeoutError:
1390
+ logger.info(
1391
+ f"{self._log_prelude()} some tasks could not be cancelled cleanly"
1392
+ )
1393
+ logger.info(f"{self._log_prelude()} task cancellation completed")
1394
+ except Exception as e:
1395
+ logger.error(f"{self._log_prelude()} error during shutdown: {e}")
1396
+ else:
1397
+ logger.info(f"{self._log_prelude()} has no active tasks to wait for")
1398
+
1399
+ logger.info(f"{self._log_prelude()} graceful shutdown completed")
1400
+
1401
+ async def submit(self, message: MAILMessage) -> None:
1402
+ """
1403
+ Add a message to the priority queue
1404
+ Priority order:
1405
+ 1. System message of any type
1406
+ 2. User message of any type
1407
+ 3. Agent interrupt, broadcast_complete
1408
+ 4. Agent broadcast
1409
+ 5. Agent request, response
1410
+ Within each category, messages are processed in FIFO order using a
1411
+ monotonically increasing sequence number to avoid dict comparisons.
1412
+ """
1413
+ recipients = (
1414
+ message["message"]["recipients"] # type: ignore
1415
+ if "recipients" in message["message"]
1416
+ else [message["message"]["recipient"]]
1417
+ )
1418
+ logger.info(
1419
+ f"{self._log_prelude()} submitting message: [yellow]{message['message']['sender']['address_type']}:{message['message']['sender']['address']}[/yellow] -> [yellow]{[f'{recipient["address_type"]}:{recipient["address"]}' for recipient in recipients]}[/yellow] with subject '{message['message']['subject']}'"
1420
+ )
1421
+
1422
+ priority = 0
1423
+ if message["message"]["sender"]["address_type"] == "system":
1424
+ priority = 1
1425
+ elif message["message"]["sender"]["address_type"] == "user":
1426
+ priority = 2
1427
+ elif message["message"]["sender"]["address_type"] == "agent":
1428
+ match message["msg_type"]:
1429
+ case "interrupt" | "broadcast_complete":
1430
+ priority = 3
1431
+ case "broadcast":
1432
+ priority = 4
1433
+ case "request" | "response":
1434
+ priority = 5
1435
+
1436
+ # Monotonic sequence to break ties for same priority
1437
+ self._message_seq += 1
1438
+ seq = self._message_seq
1439
+
1440
+ await self.message_queue.put((priority, seq, message))
1441
+
1442
+ return
1443
+
1444
+ async def _ensure_task_exists(
1445
+ self,
1446
+ task_id: str,
1447
+ task_owner: str | None = None,
1448
+ task_contributors: list[str] | None = None,
1449
+ ) -> None:
1450
+ """
1451
+ Ensure a task exists in swarm memory.
1452
+ """
1453
+ if task_id not in self.mail_tasks:
1454
+ if not task_owner:
1455
+ task_owner = self.this_owner
1456
+ if not task_contributors:
1457
+ task_contributors = [task_owner]
1458
+ else:
1459
+ task_contributors.append(task_owner)
1460
+ task = MAILTask(task_id, task_owner, task_contributors)
1461
+ self.mail_tasks[task_id] = task
1462
+
1463
+ # Persist to DB if enabled
1464
+ if self.enable_db_agent_histories:
1465
+ await self._persist_task_to_db(task)
1466
+
1467
+ async def _persist_task_to_db(self, task: MAILTask) -> None:
1468
+ """
1469
+ Persist a task to the database.
1470
+ """
1471
+ try:
1472
+ task_data = task.to_db_dict()
1473
+ await create_task(
1474
+ task_id=task_data["task_id"],
1475
+ swarm_name=self.swarm_name,
1476
+ caller_role=self.user_role,
1477
+ caller_id=self.user_id,
1478
+ task_owner=task_data["task_owner"],
1479
+ task_contributors=task_data["task_contributors"],
1480
+ remote_swarms=task_data["remote_swarms"],
1481
+ start_time=task_data["start_time"],
1482
+ is_running=task_data["is_running"],
1483
+ completed=task_data["completed"],
1484
+ title=task_data.get("title"),
1485
+ )
1486
+ logger.debug(
1487
+ f"{self._log_prelude()} persisted task '{task.task_id}' to database"
1488
+ )
1489
+ except Exception as e:
1490
+ logger.warning(
1491
+ f"{self._log_prelude()} failed to persist task '{task.task_id}' to database: {e}"
1492
+ )
1493
+
1494
+ def _add_remote_task(
1495
+ self,
1496
+ task_id: str,
1497
+ task_owner: str,
1498
+ task_contributors: list[str],
1499
+ ) -> None:
1500
+ """
1501
+ Add a remote task to swarm memory.
1502
+ """
1503
+ if task_id in self.mail_tasks:
1504
+ logger.warning(f"a task with ID '{task_id}' already exists in swarm memory")
1505
+ raise ValueError(
1506
+ f"a task with ID '{task_id}' already exists in swarm memory"
1507
+ )
1508
+ self.mail_tasks[task_id] = MAILTask(task_id, task_owner, task_contributors)
1509
+
1510
+ def _update_local_task(
1511
+ self,
1512
+ task_id: str,
1513
+ task_owner: str,
1514
+ task_contributors: list[str],
1515
+ ) -> None:
1516
+ """
1517
+ Update a local task in swarm memory.
1518
+ """
1519
+ if task_id not in self.mail_tasks:
1520
+ logger.warning(f"a task with ID '{task_id}' does not exist in swarm memory")
1521
+ raise ValueError(
1522
+ f"a task with ID '{task_id}' does not exist in swarm memory"
1523
+ )
1524
+ self.mail_tasks[task_id].task_owner = task_owner
1525
+ self.mail_tasks[task_id].task_contributors = task_contributors
1526
+
1527
+ async def _process_message(
1528
+ self,
1529
+ message: MAILMessage,
1530
+ action_override: ActionOverrideFunction | None = None,
1531
+ ) -> None:
1532
+ """
1533
+ The internal process for sending a message to the recipient agent(s)
1534
+ """
1535
+ # make sure this task_id exists in swarm memory
1536
+ task_id = message["message"]["task_id"]
1537
+ await self._ensure_task_exists(task_id)
1538
+ task_state = self.mail_tasks[task_id]
1539
+
1540
+ if (
1541
+ task_state.completed
1542
+ and message["message"]["sender"]["address_type"] == "agent"
1543
+ and message["msg_type"] != "broadcast_complete"
1544
+ ):
1545
+ logger.info(
1546
+ f"{self._log_prelude()} ignoring message for completed task '{task_id}': '{message['message']['subject']}'"
1547
+ )
1548
+ try:
1549
+ self.message_queue.task_done()
1550
+ except Exception:
1551
+ pass
1552
+ return
1553
+
1554
+ msg_content = message["message"]
1555
+
1556
+ if "recipients" in msg_content and message["msg_type"] == "broadcast_complete":
1557
+ # Append broadcast completion to every agent history and stop
1558
+ for agent in self.agents:
1559
+ self.agent_histories[
1560
+ AGENT_HISTORY_KEY.format(task_id=task_id, agent_name=agent)
1561
+ ].append(build_mail_xml(message))
1562
+ task_state.mark_complete()
1563
+ await task_state.queue_stash(self.message_queue)
1564
+ return
1565
+
1566
+ recipients_for_routing: list[MAILAddress] = []
1567
+ if "recipients" in msg_content:
1568
+ recipients_for_routing = msg_content["recipients"] # type: ignore
1569
+ if recipients_for_routing == [MAIL_ALL_LOCAL_AGENTS]: # type: ignore[comparison-overlap]
1570
+ recipients_for_routing = [
1571
+ create_agent_address(agent) for agent in self.agents.keys()
1572
+ ]
1573
+ elif "recipient" in msg_content:
1574
+ recipients_for_routing = [msg_content["recipient"]]
1575
+
1576
+ sender_info = msg_content.get("sender")
1577
+ disallowed_targets = self._find_disallowed_comm_targets(
1578
+ sender_info if isinstance(sender_info, dict) else None,
1579
+ recipients_for_routing,
1580
+ message["msg_type"],
1581
+ )
1582
+ if disallowed_targets:
1583
+ sender_label = (
1584
+ sender_info.get("address")
1585
+ if isinstance(sender_info, dict)
1586
+ else "unknown"
1587
+ )
1588
+ logger.warning(
1589
+ f"{self._log_prelude()} agent '{sender_label}' attempted to message targets outside comm_targets: {', '.join(disallowed_targets)}"
1590
+ )
1591
+ targets_str = ", ".join(disallowed_targets)
1592
+ body = (
1593
+ "Your message was not delivered because the following recipients "
1594
+ f"are not in your comm_targets: {targets_str}. "
1595
+ "Update the swarm configuration or choose an allowed recipient."
1596
+ )
1597
+ self._submit_event(
1598
+ "agent_error",
1599
+ task_id,
1600
+ f"agent attempted to contact disallowed recipients: {targets_str}",
1601
+ )
1602
+ if isinstance(sender_info, dict):
1603
+ await self.submit(
1604
+ self._system_response(
1605
+ task_id=task_id,
1606
+ recipient=sender_info, # type: ignore[arg-type]
1607
+ subject="::invalid_recipient::",
1608
+ body=body,
1609
+ )
1610
+ )
1611
+ try:
1612
+ self.message_queue.task_done()
1613
+ except Exception:
1614
+ pass
1615
+ return
1616
+
1617
+ if self.enable_interswarm and self.interswarm_router and recipients_for_routing:
1618
+ has_interswarm_recipients = False
1619
+ for recipient in recipients_for_routing:
1620
+ _, recipient_swarm = parse_agent_address(recipient["address"])
1621
+ if recipient_swarm and recipient_swarm != self.swarm_name:
1622
+ has_interswarm_recipients = True
1623
+ break
1624
+
1625
+ if has_interswarm_recipients:
1626
+ asyncio.create_task(self._send_interswarm_message(message))
1627
+ try:
1628
+ self.message_queue.task_done()
1629
+ except Exception:
1630
+ pass
1631
+ return
1632
+
1633
+ # Fall back to local processing
1634
+ await self._process_local_message(message, action_override)
1635
+
1636
+ def _convert_interswarm_message_to_local(
1637
+ self,
1638
+ message: MAILInterswarmMessage,
1639
+ ) -> MAILMessage:
1640
+ """
1641
+ Convert an interswarm message (`MAILInterswarmMessage`) to a local message (`MAILMessage`).
1642
+ """
1643
+ return MAILMessage(
1644
+ id=message["message_id"],
1645
+ timestamp=message["timestamp"],
1646
+ message=message["payload"],
1647
+ msg_type=message["msg_type"],
1648
+ )
1649
+
1650
+ async def _receive_interswarm_message(
1651
+ self,
1652
+ message: MAILInterswarmMessage,
1653
+ ) -> None:
1654
+ """
1655
+ Receive a message from a remote swarm and route it to the appropriate local agent.
1656
+ """
1657
+ payload = message["payload"]
1658
+ task_id = payload["task_id"]
1659
+ recipients = payload.get("recipients") or [payload.get("recipient")]
1660
+ task_owner = message["task_owner"]
1661
+ task_contributors = message["task_contributors"]
1662
+
1663
+ logger.debug(
1664
+ f"{self._log_prelude()} receiving interswarm message for task '{task_id}' with contributors: {task_contributors}"
1665
+ )
1666
+
1667
+ assert isinstance(recipients, list)
1668
+ for recipient in recipients:
1669
+ assert isinstance(recipient, dict)
1670
+ assert "address" in recipient
1671
+ assert "address_type" in recipient
1672
+ recipient_agent, recipient_swarm = parse_agent_address(recipient["address"])
1673
+ if recipient_swarm != self.swarm_name:
1674
+ logger.debug(
1675
+ f"{self._log_prelude()} skipping remote agent '{recipient_agent}' in interswarm message"
1676
+ )
1677
+ continue
1678
+ if recipient_agent not in self.agents:
1679
+ logger.warning(
1680
+ f"{self._log_prelude()} unknown local agent: '{recipient_agent}'"
1681
+ )
1682
+ raise ValueError(f"unknown local agent: '{recipient_agent}'")
1683
+
1684
+ # direction = forward
1685
+ if self.this_owner not in task_contributors:
1686
+ if task_id in self.mail_tasks:
1687
+ logger.warning(
1688
+ f"a task with ID '{task_id}' already exists in swarm memory"
1689
+ )
1690
+ raise ValueError(
1691
+ f"a task with ID '{task_id}' already exists in swarm memory"
1692
+ )
1693
+ self._add_remote_task(task_id, task_owner, task_contributors)
1694
+ # direction = back
1695
+ else:
1696
+ if task_id not in self.mail_tasks:
1697
+ logger.warning(
1698
+ f"a task with ID '{task_id}' does not exist in swarm memory"
1699
+ )
1700
+ raise ValueError(
1701
+ f"a task with ID '{task_id}' does not exist in swarm memory"
1702
+ )
1703
+ if self.mail_tasks[task_id].task_owner != task_owner:
1704
+ logger.warning(
1705
+ f"task owner mismatch: expected '{self.mail_tasks[task_id].task_owner}', got '{task_owner}'"
1706
+ )
1707
+ raise ValueError(
1708
+ f"task owner mismatch: expected '{self.mail_tasks[task_id].task_owner}', got '{task_owner}'"
1709
+ )
1710
+ # update task contributors in swarm memory
1711
+ self._update_local_task(task_id, task_owner, task_contributors)
1712
+
1713
+ try:
1714
+ await self.submit(self._convert_interswarm_message_to_local(message))
1715
+ self._submit_event(
1716
+ "interswarm_message_received",
1717
+ task_id,
1718
+ f"received interswarm message from swarm {message['source_swarm']}",
1719
+ )
1720
+ except Exception as e:
1721
+ logger.error(
1722
+ f"{self._log_prelude()} error receiving interswarm message: {e}"
1723
+ )
1724
+ self._submit_event(
1725
+ "interswarm_message_error",
1726
+ task_id,
1727
+ f"error receiving interswarm message: {e}",
1728
+ )
1729
+ raise ValueError(f"error receiving interswarm message: {e}")
1730
+
1731
+ async def _send_interswarm_message(
1732
+ self,
1733
+ message: MAILMessage,
1734
+ ) -> None:
1735
+ """
1736
+ Send a message to a remote swarm via the interswarm router.
1737
+ """
1738
+ # append this instance to the contributors list, if not already present
1739
+ if (
1740
+ self.this_owner
1741
+ not in self.mail_tasks[message["message"]["task_id"]].task_contributors
1742
+ ):
1743
+ self.mail_tasks[message["message"]["task_id"]].task_contributors.append(
1744
+ self.this_owner
1745
+ )
1746
+
1747
+ task_id = message["message"]["task_id"]
1748
+ task_owner = self.mail_tasks[task_id].task_owner
1749
+ task_contributors = self.mail_tasks[task_id].task_contributors
1750
+
1751
+ if self.interswarm_router is None:
1752
+ logger.error(f"{self._log_prelude()} interswarm router not available")
1753
+ raise ValueError("interswarm router not available")
1754
+
1755
+ interswarm_message = self.interswarm_router.convert_local_message_to_interswarm(
1756
+ message,
1757
+ task_owner,
1758
+ task_contributors,
1759
+ )
1760
+
1761
+ target_contributor = None
1762
+ for contributor in task_contributors:
1763
+ if contributor.split("@")[1] == interswarm_message["target_swarm"]:
1764
+ target_contributor = contributor
1765
+ break
1766
+ # direction = forward
1767
+ if target_contributor is None:
1768
+ try:
1769
+ await self.interswarm_router.send_interswarm_message_forward(
1770
+ interswarm_message
1771
+ )
1772
+ self._submit_event(
1773
+ "interswarm_message_sent",
1774
+ task_id,
1775
+ f"sent interswarm message forward to swarm {interswarm_message['target_swarm']}:\n{build_interswarm_mail_xml(interswarm_message)['content']}",
1776
+ )
1777
+ except Exception as e:
1778
+ logger.error(
1779
+ f"{self._log_prelude()} runtime failed to send interswarm message forward: {e}"
1780
+ )
1781
+ self._submit_event(
1782
+ "interswarm_message_error",
1783
+ task_id,
1784
+ f"error sending interswarm message forward: {e}",
1785
+ )
1786
+ raise ValueError(
1787
+ f"runtime failed to send interswarm message forward: {e}"
1788
+ )
1789
+ # direction = back
1790
+ else:
1791
+ try:
1792
+ await self.interswarm_router.send_interswarm_message_back(
1793
+ interswarm_message
1794
+ )
1795
+ self._submit_event(
1796
+ "interswarm_message_sent",
1797
+ task_id,
1798
+ f"sent interswarm message back to swarm {interswarm_message['target_swarm']}:\n{build_interswarm_mail_xml(interswarm_message)['content']}",
1799
+ )
1800
+ except Exception as e:
1801
+ logger.error(
1802
+ f"{self._log_prelude()} runtime failed to send interswarm message back: {e}"
1803
+ )
1804
+ self._submit_event(
1805
+ "interswarm_message_error",
1806
+ task_id,
1807
+ f"error sending interswarm message back: {e}",
1808
+ )
1809
+ raise ValueError(f"runtime failed to send interswarm message back: {e}")
1810
+
1811
+ def _find_disallowed_comm_targets(
1812
+ self,
1813
+ sender: MAILAddress | None,
1814
+ recipients: list[MAILAddress] | None,
1815
+ msg_type: str,
1816
+ ) -> list[str]:
1817
+ """
1818
+ Determine which recipients are not reachable for the sender based on comm_targets.
1819
+ """
1820
+ if (
1821
+ sender is None
1822
+ or recipients is None
1823
+ or msg_type in {"broadcast", "broadcast_complete"}
1824
+ ):
1825
+ return []
1826
+ if sender.get("address_type") != "agent":
1827
+ return []
1828
+
1829
+ sender_agent, sender_swarm = parse_agent_address(sender["address"])
1830
+ if sender_swarm and sender_swarm != self.swarm_name:
1831
+ # Enforce comm_targets only for local agents
1832
+ return []
1833
+
1834
+ agent_core = self.agents.get(sender_agent)
1835
+ if agent_core is None:
1836
+ return []
1837
+
1838
+ allowed_targets = set(agent_core.comm_targets)
1839
+ disallowed: list[str] = []
1840
+ for recipient in recipients:
1841
+ if recipient.get("address_type") != "agent":
1842
+ continue
1843
+ recipient_address = recipient.get("address")
1844
+ if recipient_address in {None, MAIL_ALL_LOCAL_AGENTS["address"]}:
1845
+ continue
1846
+ if recipient_address not in allowed_targets:
1847
+ assert isinstance(recipient_address, str)
1848
+ disallowed.append(recipient_address)
1849
+
1850
+ return disallowed
1851
+
1852
+ async def _process_local_message(
1853
+ self,
1854
+ message: MAILMessage,
1855
+ action_override: ActionOverrideFunction | None = None,
1856
+ ) -> None:
1857
+ """
1858
+ Process a message locally (original _process_message logic)
1859
+ """
1860
+ # if the message is a `broadcast_complete`, don't send it to the recipient agents
1861
+ # but DO append it to the agent history as tool calls (the actual broadcast)
1862
+ if message["msg_type"] == "broadcast_complete":
1863
+ for agent in self.agents:
1864
+ self.agent_histories[
1865
+ AGENT_HISTORY_KEY.format(
1866
+ task_id=message["message"]["task_id"], agent_name=agent
1867
+ )
1868
+ ].append(build_mail_xml(message))
1869
+ return
1870
+
1871
+ msg_content = message["message"]
1872
+
1873
+ # Normalise recipients into a list of address strings (agent names or interswarm ids)
1874
+ raw_recipients: list[MAILAddress]
1875
+ if "recipients" in msg_content:
1876
+ raw_recipients = msg_content["recipients"] # type: ignore
1877
+ else:
1878
+ raw_recipients = [msg_content["recipient"]] # type: ignore[list-item]
1879
+
1880
+ sender_address = message["message"]["sender"]["address"]
1881
+
1882
+ recipient_addresses: list[str] = []
1883
+ for address in raw_recipients:
1884
+ addr_str = address["address"]
1885
+ if (
1886
+ addr_str == MAIL_ALL_LOCAL_AGENTS["address"]
1887
+ and address["address_type"] == "agent"
1888
+ ):
1889
+ recipient_addresses.extend(self.agents.keys())
1890
+ else:
1891
+ recipient_addresses.append(addr_str)
1892
+
1893
+ # Drop duplicate addresses while preserving order
1894
+ seen: set[str] = set()
1895
+ deduped: list[str] = []
1896
+ for addr in recipient_addresses:
1897
+ if addr not in seen:
1898
+ seen.add(addr)
1899
+ deduped.append(addr)
1900
+
1901
+ # Prevent agents from broadcasting to themselves (but allow system messages
1902
+ # to agents even if the swarm name matches the agent name)
1903
+ sender_type = message["message"]["sender"]["address_type"]
1904
+ if sender_type == "system":
1905
+ recipients = deduped
1906
+ else:
1907
+ recipients = [addr for addr in deduped if addr != sender_address]
1908
+
1909
+ for recipient in recipients:
1910
+ # Parse recipient address to get local agent name
1911
+ recipient_agent, recipient_swarm = parse_agent_address(recipient)
1912
+
1913
+ # Only process if this is a local agent or no swarm specified
1914
+ if not recipient_swarm or recipient_swarm == self.swarm_name:
1915
+ sender_agent = message["message"]["sender"]
1916
+ if recipient_agent in self.agents:
1917
+ if (
1918
+ not self._is_manual
1919
+ or message["message"]["sender"]["address_type"] == "system"
1920
+ ):
1921
+ self._send_message(recipient_agent, message, action_override)
1922
+ else:
1923
+ key = AGENT_HISTORY_KEY.format(
1924
+ task_id=message["message"]["task_id"],
1925
+ agent_name=recipient_agent,
1926
+ )
1927
+ self.manual_message_buffer[key].append(message)
1928
+ logger.info(
1929
+ f"{self._log_prelude()} added message to manual message buffer for agent '{recipient_agent}'"
1930
+ )
1931
+ else:
1932
+ logger.warning(
1933
+ f"{self._log_prelude()} unknown local agent: '{recipient_agent}'"
1934
+ )
1935
+
1936
+ # if the recipient is actually the user, indicate that
1937
+ if recipient_agent == self.user_id:
1938
+ self._submit_event(
1939
+ "agent_error",
1940
+ message["message"]["task_id"],
1941
+ f"agent {message['message']['sender']['address']} attempted to send a message to the user ({self.user_id})",
1942
+ )
1943
+ self._send_message(
1944
+ sender_agent["address"],
1945
+ self._system_response(
1946
+ task_id=message["message"]["task_id"],
1947
+ recipient=create_agent_address(sender_agent["address"]),
1948
+ subject="::improper_response_to_user::",
1949
+ body=f"""The user ('{self.user_id}') is unable to respond to your message.
1950
+ If the user's task is complete, use the 'task_complete' tool.
1951
+ Otherwise, continue working with your agents to complete the user's task.""",
1952
+ ),
1953
+ action_override,
1954
+ )
1955
+ elif recipient_agent == self.swarm_name:
1956
+ self._submit_event(
1957
+ "task_error",
1958
+ message["message"]["task_id"],
1959
+ f"agent {recipient_agent} is the swarm name; message from {message['message']['sender']['address']} cannot be delivered to it",
1960
+ )
1961
+ await self.submit(
1962
+ self._system_broadcast(
1963
+ task_id=message["message"]["task_id"],
1964
+ subject="::runtime_error::",
1965
+ body=f"""A message was detected with sender '{message["message"]["sender"]["address"]}' and recipient '{recipient_agent}'.
1966
+ This likely means that an error message intended for an agent was sent to the system.
1967
+ This, in turn, was probably caused by an agent failing to respond to a system response.
1968
+ In order to prevent infinite loops, system-to-system messages immediately end the task.""",
1969
+ task_complete=True,
1970
+ )
1971
+ )
1972
+ return None
1973
+ else:
1974
+ # otherwise, just a normal unknown agent
1975
+ self._submit_event(
1976
+ "agent_error",
1977
+ message["message"]["task_id"],
1978
+ f"agent {recipient_agent} is unknown; message from {message['message']['sender']['address']} cannot be delivered to it",
1979
+ )
1980
+ self._send_message(
1981
+ sender_agent["address"],
1982
+ self._system_response(
1983
+ task_id=message["message"]["task_id"],
1984
+ recipient=create_agent_address(sender_agent["address"]),
1985
+ subject="::agent_error::",
1986
+ body=f"""The agent '{recipient_agent}' is not known to this swarm.
1987
+ Your directly reachable agents can be found in the tool definitions for `send_request` and `send_response`.""",
1988
+ ),
1989
+ action_override,
1990
+ )
1991
+ else:
1992
+ logger.debug(
1993
+ f"{self._log_prelude()} skipping remote agent '{recipient}' in local processing"
1994
+ )
1995
+
1996
+ return None
1997
+
1998
+ async def _manual_step(
1999
+ self,
2000
+ task_id: str,
2001
+ target: str,
2002
+ response_targets: list[str] | None = None,
2003
+ response_type: Literal["broadcast", "response", "request"] = "broadcast",
2004
+ payload: str | None = None,
2005
+ dynamic_ctx_ratio: float = 0.0,
2006
+ _llm: str | None = None,
2007
+ _system: str | None = None,
2008
+ ) -> MAILMessage:
2009
+ """
2010
+ Manually step a target agent and return the response message.
2011
+ """
2012
+ if not response_type == "broadcast" and response_targets is None:
2013
+ raise ValueError(
2014
+ "response_targets must be provided for non-broadcast response types"
2015
+ )
2016
+ response_targets = response_targets or ["all"]
2017
+ while not self.message_queue.empty():
2018
+ await asyncio.sleep(0.1)
2019
+ if target not in self.agents:
2020
+ logger.warning(f"{self._log_prelude()} unknown agent: '{target}'")
2021
+ raise ValueError(f"unknown agent target: '{target}'")
2022
+ if response_targets is not None:
2023
+ for response_target in response_targets:
2024
+ if (
2025
+ response_target not in self.agents
2026
+ and not response_target == MAIL_ALL_LOCAL_AGENTS["address"]
2027
+ ):
2028
+ logger.warning(
2029
+ f"{self._log_prelude()} unknown agent: '{response_target}'"
2030
+ )
2031
+ raise ValueError(
2032
+ f"unknown agent response target: '{response_target}'"
2033
+ )
2034
+ buffer_key = AGENT_HISTORY_KEY.format(task_id=task_id, agent_name=target)
2035
+ self.manual_return_events[buffer_key].clear()
2036
+ self.manual_return_messages[buffer_key] = None
2037
+ buffer = self.manual_message_buffer.get(buffer_key, [])
2038
+ body = ""
2039
+ for message in buffer:
2040
+ public_message = False
2041
+ private_group = False
2042
+ if "recipients" in message["message"]:
2043
+ if (
2044
+ message["message"]["recipients"][0]["address"] # type: ignore
2045
+ == MAIL_ALL_LOCAL_AGENTS["address"]
2046
+ ):
2047
+ body += "<public_message>\n"
2048
+ public_message = True
2049
+ else:
2050
+ body += "<private_message>\n"
2051
+ private_group = True
2052
+ else:
2053
+ body += "<private_message>\n"
2054
+ body += f"<from>{message['message']['sender']['address']}</from>\n"
2055
+ if private_group:
2056
+ to = [
2057
+ address["address"]
2058
+ for address in message["message"]["recipients"] # type: ignore
2059
+ ]
2060
+ body += f"<to>{', '.join(to)}</to>\n"
2061
+ body += f"{message['message']['body']}\n"
2062
+ if public_message:
2063
+ body += "</public_message>\n\n\n"
2064
+ else:
2065
+ body += "</private_message>\n\n\n"
2066
+
2067
+ body += payload or ""
2068
+ body = body.rstrip()
2069
+
2070
+ message = MAILMessage(
2071
+ id=str(uuid.uuid4()),
2072
+ timestamp=datetime.datetime.now(datetime.UTC).isoformat(),
2073
+ message=MAILRequest(
2074
+ task_id=task_id,
2075
+ request_id=str(uuid.uuid4()),
2076
+ sender=create_system_address("system"),
2077
+ recipient=create_agent_address(target),
2078
+ subject="::manual_step::",
2079
+ body=body,
2080
+ sender_swarm=None,
2081
+ recipient_swarm=None,
2082
+ routing_info={
2083
+ "manual_response_type": response_type,
2084
+ "manual_response_targets": response_targets,
2085
+ },
2086
+ ),
2087
+ msg_type="buffered",
2088
+ )
2089
+ self.manual_message_buffer[buffer_key].clear()
2090
+
2091
+ self._send_message(target, message, None, dynamic_ctx_ratio, _llm, _system)
2092
+ await self.manual_return_events[buffer_key].wait()
2093
+ if self.manual_return_messages[buffer_key] is None:
2094
+ raise RuntimeError(
2095
+ f"no return message for agent '{target}' for task '{task_id}'"
2096
+ )
2097
+ return self.manual_return_messages[buffer_key] # type: ignore
2098
+
2099
+ def _send_message(
2100
+ self,
2101
+ recipient: str,
2102
+ message: MAILMessage,
2103
+ action_override: ActionOverrideFunction | None = None,
2104
+ dynamic_ctx_ratio: float = 0.0,
2105
+ _llm: str | None = None,
2106
+ _system: str | None = None,
2107
+ ) -> None:
2108
+ """
2109
+ Send a message to a recipient.
2110
+ """
2111
+ logger.info(
2112
+ f"{self._log_prelude()} sending message: [yellow]{message['message']['sender']['address_type']}:{message['message']['sender']['address']}[/yellow] -> [yellow]agent:{recipient}[/yellow] with subject: '{message['message']['subject']}'"
2113
+ )
2114
+ if not message["message"]["subject"].startswith(
2115
+ "::action_complete_broadcast::"
2116
+ ):
2117
+ self._submit_event(
2118
+ "new_message",
2119
+ message["message"]["task_id"],
2120
+ f"sending message:\n{build_mail_xml(message)['content']}",
2121
+ extra_data={
2122
+ "full_message": message,
2123
+ },
2124
+ )
2125
+
2126
+ async def schedule(message: MAILMessage) -> None:
2127
+ """
2128
+ Schedule a message for processing.
2129
+ Agent functions are called here.
2130
+ """
2131
+ try:
2132
+ # prepare the message for agent input
2133
+ task_id = message["message"]["task_id"]
2134
+ tool_choice: str | dict[str, str] = (
2135
+ "required" if not self._is_manual else "auto"
2136
+ )
2137
+ routing_info = message["message"].get("routing_info", {})
2138
+
2139
+ # get agent history for this task
2140
+ agent_history_key = AGENT_HISTORY_KEY.format(
2141
+ task_id=task_id, agent_name=recipient
2142
+ )
2143
+ history = self.agent_histories[agent_history_key]
2144
+
2145
+ if (
2146
+ message["message"]["sender"]["address_type"] == "system"
2147
+ and message["message"]["subject"] == "::maximum_steps_reached::"
2148
+ and not self._is_manual
2149
+ ):
2150
+ tool_choice = {"type": "function", "name": "task_complete"}
2151
+
2152
+ if not message["message"]["subject"].startswith(
2153
+ "::action_complete_broadcast::"
2154
+ ):
2155
+ if not message["msg_type"] == "buffered":
2156
+ incoming_message = build_mail_xml(message)
2157
+ history.append(incoming_message)
2158
+ else:
2159
+ history.append(
2160
+ {"role": "user", "content": message["message"]["body"]}
2161
+ )
2162
+
2163
+ if dynamic_ctx_ratio > 0.0 and _llm is not None:
2164
+ history = await self._compress_context(
2165
+ self.agents[recipient],
2166
+ _llm,
2167
+ _system,
2168
+ dynamic_ctx_ratio,
2169
+ history,
2170
+ )
2171
+
2172
+ # agent function is called here
2173
+ agent_fn = self.agents[recipient].function
2174
+ _output_text, tool_calls = await agent_fn(history, tool_choice) # type: ignore
2175
+
2176
+ # append the agent's response to the history
2177
+ if tool_calls[0].completion:
2178
+ history.append(tool_calls[0].completion)
2179
+ else:
2180
+ history.extend(tool_calls[0].responses)
2181
+
2182
+ # Emit tool_call events for all calls (before any mutations)
2183
+ # Track last call with reasoning for reasoning_ref
2184
+ last_reasoning_call_id: str | None = None
2185
+ for call in tool_calls:
2186
+ if call.reasoning:
2187
+ self._emit_tool_call_event(task_id, recipient, call)
2188
+ last_reasoning_call_id = call.tool_call_id
2189
+ else:
2190
+ self._emit_tool_call_event(
2191
+ task_id, recipient, call, reasoning_ref=last_reasoning_call_id
2192
+ )
2193
+
2194
+ breakpoint_calls = [
2195
+ call
2196
+ for call in tool_calls
2197
+ if call.tool_name in self.breakpoint_tools
2198
+ ]
2199
+ if breakpoint_calls:
2200
+ logger.info(
2201
+ f"{self._log_prelude()} agent '{recipient}' used breakpoint tools '{', '.join([call.tool_name for call in breakpoint_calls])}'"
2202
+ )
2203
+ self._submit_event(
2204
+ "breakpoint_tool_call",
2205
+ task_id,
2206
+ f"agent {recipient} used breakpoint tools {', '.join([call.tool_name for call in breakpoint_calls])} with args: {', '.join([ujson.dumps(call.tool_args) for call in breakpoint_calls])}",
2207
+ )
2208
+ self.last_breakpoint_caller[task_id] = recipient
2209
+ self.last_breakpoint_tool_calls[task_id] = breakpoint_calls
2210
+ bp_dumps: list[dict[str, Any]] = []
2211
+ for call in breakpoint_calls:
2212
+ raw_block: dict[str, Any] | None = None
2213
+ if call.completion:
2214
+ completion = call.completion
2215
+ content = completion.get("content", [])
2216
+ for block in content:
2217
+ if (
2218
+ isinstance(block, dict)
2219
+ and block.get("type") == "tool_use"
2220
+ and block.get("id") == call.tool_call_id
2221
+ ):
2222
+ raw_block = block
2223
+ break
2224
+ else:
2225
+ for resp in call.responses:
2226
+ if (
2227
+ isinstance(resp, dict)
2228
+ and resp.get("type") == "function_call"
2229
+ and resp.get("call_id") == call.tool_call_id
2230
+ ):
2231
+ raw_block = resp
2232
+ break
2233
+ bp_dumps.append(
2234
+ normalize_breakpoint_tool_call(call, raw_block)
2235
+ )
2236
+ await self.submit(
2237
+ self._system_broadcast(
2238
+ task_id=task_id,
2239
+ subject="::breakpoint_tool_call::",
2240
+ body=f"{ujson.dumps(bp_dumps)}",
2241
+ task_complete=True,
2242
+ )
2243
+ )
2244
+ # Remove breakpoint tools from processing
2245
+ tool_calls = [
2246
+ tc
2247
+ for tc in tool_calls
2248
+ if tc.tool_name not in self.breakpoint_tools
2249
+ ]
2250
+
2251
+ # handle tool calls
2252
+ has_action_completed = False
2253
+ action_errors: list[tuple[str, Exception]] = []
2254
+ for call in tool_calls:
2255
+ match call.tool_name:
2256
+ case "text_output":
2257
+ logger.info(
2258
+ f"{self._log_prelude()} agent '{recipient}' sent raw text output with content: '{call.tool_args['content']}'"
2259
+ )
2260
+ call.tool_args["target"] = message["message"]["sender"][
2261
+ "address"
2262
+ ]
2263
+ assert routing_info is not None
2264
+ res_type = routing_info.get(
2265
+ "manual_response_type", "broadcast"
2266
+ )
2267
+ res_targets = routing_info.get(
2268
+ "manual_response_targets", ["all"]
2269
+ )
2270
+ outgoing_message = convert_manual_step_call_to_mail_message(
2271
+ call, recipient, task_id, res_targets, res_type
2272
+ )
2273
+ self.manual_return_messages[agent_history_key] = (
2274
+ outgoing_message
2275
+ )
2276
+ await self.submit(outgoing_message)
2277
+ self.manual_return_events[agent_history_key].set()
2278
+ case "acknowledge_broadcast":
2279
+ try:
2280
+ # Only store if this was a broadcast; otherwise treat as no-op
2281
+ if message["msg_type"] == "broadcast":
2282
+ # note = call.tool_args.get("note")
2283
+ # async with get_langmem_store() as store:
2284
+ # manager = create_memory_store_manager(
2285
+ # "anthropic:claude-sonnet-4-20250514",
2286
+ # query_model="anthropic:claude-sonnet-4-20250514",
2287
+ # query_limit=10,
2288
+ # namespace=(f"{recipient}_memory",),
2289
+ # store=store,
2290
+ # )
2291
+ # assistant_content = (
2292
+ # f"<acknowledged broadcast/>\n{note}".strip()
2293
+ # if note
2294
+ # else "<acknowledged broadcast/>"
2295
+ # )
2296
+ # await manager.ainvoke(
2297
+ # {
2298
+ # "messages": [
2299
+ # {
2300
+ # "role": "user",
2301
+ # "content": incoming_message[
2302
+ # "content"
2303
+ # ],
2304
+ # },
2305
+ # {
2306
+ # "role": "assistant",
2307
+ # "content": assistant_content,
2308
+ # },
2309
+ # ]
2310
+ # }
2311
+ # )
2312
+ self._tool_call_response(
2313
+ task_id=task_id,
2314
+ caller=recipient,
2315
+ tool_call=call,
2316
+ status="success",
2317
+ details="broadcast acknowledged",
2318
+ )
2319
+ else:
2320
+ logger.warning(
2321
+ f"{self._log_prelude()} agent '{recipient}' used 'acknowledge_broadcast' on a '{message['msg_type']}'"
2322
+ )
2323
+ self._tool_call_response(
2324
+ task_id=task_id,
2325
+ caller=recipient,
2326
+ tool_call=call,
2327
+ status="error",
2328
+ details="improper use of `acknowledge_broadcast`",
2329
+ )
2330
+ await self.submit(
2331
+ self._system_response(
2332
+ task_id=task_id,
2333
+ recipient=create_agent_address(recipient),
2334
+ subject="::tool_call_error::",
2335
+ body=f"""The `acknowledge_broadcast` tool cannot be used in response to a message of type '{message["msg_type"]}'.
2336
+ If your sender's message is a 'request', consider using `send_response` instead.
2337
+ Otherwise, determine the best course of action to complete your task.""",
2338
+ )
2339
+ )
2340
+ except Exception as e:
2341
+ logger.error(
2342
+ f"{self._log_prelude()} error acknowledging broadcast for agent '{recipient}': {e}"
2343
+ )
2344
+ self._tool_call_response(
2345
+ task_id=task_id,
2346
+ caller=recipient,
2347
+ tool_call=call,
2348
+ status="error",
2349
+ details=f"error acknowledging broadcast: {e}",
2350
+ )
2351
+ self._submit_event(
2352
+ "agent_error",
2353
+ task_id,
2354
+ f"error acknowledging broadcast for agent {recipient}: {e}",
2355
+ )
2356
+ await self.submit(
2357
+ self._system_response(
2358
+ task_id=task_id,
2359
+ recipient=create_agent_address(recipient),
2360
+ subject="::tool_call_error::",
2361
+ body=f"""An error occurred while acknowledging the broadcast from '{message["message"]["sender"]["address"]}'.
2362
+ Specifically, the MAIL runtime encountered the following error: {e}.
2363
+ It is possible that the `acknowledge_broadcast` tool is not implemented properly.
2364
+ Use this information to decide how to complete your task.""",
2365
+ )
2366
+ )
2367
+ # No outgoing message submission for acknowledge
2368
+ case "ignore_broadcast":
2369
+ # Explicitly ignore without storing or responding
2370
+ logger.info(
2371
+ f"{self._log_prelude()} agent {recipient} called ignore_broadcast"
2372
+ )
2373
+ self._tool_call_response(
2374
+ task_id=task_id,
2375
+ caller=recipient,
2376
+ tool_call=call,
2377
+ status="success",
2378
+ details="broadcast ignored",
2379
+ )
2380
+ self._submit_event(
2381
+ "broadcast_ignored",
2382
+ task_id,
2383
+ f"agent {recipient} called ignore_broadcast",
2384
+ )
2385
+ # No further action
2386
+ case "await_message":
2387
+ # Allow await if there are outstanding requests OR messages in queue
2388
+ outstanding = self.outstanding_requests[task_id][recipient]
2389
+ queue_empty = self.message_queue.empty()
2390
+ if queue_empty and outstanding == 0:
2391
+ logger.warning(
2392
+ f"{self._log_prelude()} agent '{recipient}' called 'await_message' "
2393
+ f"but has no outstanding requests and message queue is empty"
2394
+ )
2395
+ self._tool_call_response(
2396
+ task_id=task_id,
2397
+ caller=recipient,
2398
+ tool_call=call,
2399
+ status="error",
2400
+ details="no outstanding requests and message queue is empty",
2401
+ )
2402
+ self._submit_event(
2403
+ "agent_error",
2404
+ task_id,
2405
+ f"agent {recipient} called await_message but has no outstanding requests and message queue is empty",
2406
+ )
2407
+ await self.submit(
2408
+ self._system_response(
2409
+ task_id=task_id,
2410
+ recipient=create_agent_address(recipient),
2411
+ subject="::tool_call_error::",
2412
+ body="""The tool call `await_message` was attempted but you have no outstanding requests and the message queue is empty.
2413
+ In order to prevent frozen tasks, `await_message` only works if you have sent requests that haven't been responded to yet, or if there are messages waiting in the queue.
2414
+ Consider sending a request to another agent before calling `await_message`.""",
2415
+ )
2416
+ )
2417
+ return
2418
+ logger.debug(
2419
+ f"{self._log_prelude()} agent '{recipient}' awaiting "
2420
+ f"(outstanding={outstanding}, queue_empty={queue_empty})"
2421
+ )
2422
+ wait_reason = call.tool_args.get("reason")
2423
+ logger.info(
2424
+ f"{self._log_prelude()} agent '{recipient}' called 'await_message'{f': {wait_reason}' if wait_reason else ''}",
2425
+ )
2426
+ details = "waiting for a new message"
2427
+ if wait_reason:
2428
+ details = f"{details} (reason: '{wait_reason}')"
2429
+ self._tool_call_response(
2430
+ task_id=task_id,
2431
+ caller=recipient,
2432
+ tool_call=call,
2433
+ status="success",
2434
+ details=details,
2435
+ )
2436
+ event_description = (
2437
+ f"agent '{recipient}' is awaiting a new message"
2438
+ )
2439
+ if wait_reason:
2440
+ event_description = (
2441
+ f"{event_description}: {wait_reason}"
2442
+ )
2443
+ self._submit_event(
2444
+ "await_message",
2445
+ task_id,
2446
+ event_description,
2447
+ extra_data={"reason": wait_reason}
2448
+ if wait_reason
2449
+ else {},
2450
+ )
2451
+ # No further action
2452
+ return
2453
+ case (
2454
+ "send_request"
2455
+ | "send_response"
2456
+ | "send_interrupt"
2457
+ | "send_broadcast"
2458
+ ):
2459
+ try:
2460
+ outgoing_message = convert_call_to_mail_message(
2461
+ call, recipient, task_id
2462
+ )
2463
+ self._attach_interswarm_routing_metadata(
2464
+ task_id, message, outgoing_message, call
2465
+ )
2466
+ await self.submit(outgoing_message)
2467
+ # Track outstanding requests for await_message
2468
+ if call.tool_name == "send_request":
2469
+ # Sender is waiting for a response
2470
+ self.outstanding_requests[task_id][recipient] += 1
2471
+ logger.debug(
2472
+ f"{self._log_prelude()} agent '{recipient}' sent request, "
2473
+ f"outstanding={self.outstanding_requests[task_id][recipient]}"
2474
+ )
2475
+ elif call.tool_name == "send_response":
2476
+ # Response received, decrement target's outstanding count
2477
+ target = call.tool_args.get("target", "")
2478
+ if self.outstanding_requests[task_id][target] > 0:
2479
+ self.outstanding_requests[task_id][target] -= 1
2480
+ logger.debug(
2481
+ f"{self._log_prelude()} agent '{recipient}' sent response to '{target}', "
2482
+ f"target outstanding={self.outstanding_requests[task_id][target]}"
2483
+ )
2484
+ self._tool_call_response(
2485
+ task_id=task_id,
2486
+ caller=recipient,
2487
+ tool_call=call,
2488
+ status="success",
2489
+ details="message sent",
2490
+ )
2491
+ except Exception as e:
2492
+ logger.error(
2493
+ f"{self._log_prelude()} error sending message for agent '{recipient}': {e}"
2494
+ )
2495
+ self._tool_call_response(
2496
+ task_id=task_id,
2497
+ caller=recipient,
2498
+ tool_call=call,
2499
+ status="error",
2500
+ details=f"error sending message: {e}",
2501
+ )
2502
+ self._submit_event(
2503
+ "agent_error",
2504
+ task_id,
2505
+ f"error sending message for agent {recipient}: {e}",
2506
+ )
2507
+ await self.submit(
2508
+ self._system_response(
2509
+ task_id=task_id,
2510
+ recipient=create_agent_address(recipient),
2511
+ subject="::tool_call_error::",
2512
+ body=f"""An error occurred while sending the message from '{message["message"]["sender"]["address"]}'.
2513
+ Specifically, the MAIL runtime encountered the following error: {e}.
2514
+ It is possible that the message sending tool is not implemented properly.
2515
+ Use this information to decide how to complete your task.""",
2516
+ )
2517
+ )
2518
+ case "task_complete":
2519
+ if task_id:
2520
+ await self._handle_task_complete_call(
2521
+ task_id, recipient, call
2522
+ )
2523
+ else:
2524
+ logger.error(
2525
+ f"{self._log_prelude()} agent '{recipient}' called 'task_complete' but no task id was provided"
2526
+ )
2527
+ continue
2528
+ case "help":
2529
+ try:
2530
+ help_string = build_mail_help_string(
2531
+ name=recipient,
2532
+ swarm=self.swarm_name,
2533
+ get_summary=call.tool_args.get("get_summary", True),
2534
+ get_identity=call.tool_args.get(
2535
+ "get_identity", False
2536
+ ),
2537
+ get_tool_help=call.tool_args.get(
2538
+ "get_tool_help", []
2539
+ ),
2540
+ get_full_protocol=call.tool_args.get(
2541
+ "get_full_protocol", False
2542
+ ),
2543
+ )
2544
+ self._tool_call_response(
2545
+ task_id=task_id,
2546
+ caller=recipient,
2547
+ tool_call=call,
2548
+ status="success",
2549
+ details="help string generated; will be sent to you in a subsequent prompt",
2550
+ )
2551
+ self._submit_event(
2552
+ "help_called",
2553
+ task_id,
2554
+ f"agent {recipient} called help",
2555
+ )
2556
+ await self.submit(
2557
+ self._system_broadcast(
2558
+ task_id=task_id,
2559
+ subject="::help::",
2560
+ body=help_string,
2561
+ recipients=[create_agent_address(recipient)],
2562
+ )
2563
+ )
2564
+ except Exception as e:
2565
+ logger.error(
2566
+ f"{self._log_prelude()} error calling help tool for agent '{recipient}': {e}"
2567
+ )
2568
+ self._tool_call_response(
2569
+ task_id=task_id,
2570
+ caller=recipient,
2571
+ tool_call=call,
2572
+ status="error",
2573
+ details=f"error calling help tool: {e}",
2574
+ )
2575
+ self._submit_event(
2576
+ "agent_error",
2577
+ task_id,
2578
+ f"error calling help tool for agent {recipient}: {e}",
2579
+ )
2580
+ await self.submit(
2581
+ self._system_broadcast(
2582
+ task_id=task_id,
2583
+ subject="::tool_call_error::",
2584
+ body=f"""An error occurred while calling the help tool for agent '{recipient}'.
2585
+ Specifically, the MAIL runtime encountered the following error: {e}.
2586
+ This should never happen; consider informing the MAIL developers of this issue if you see it.""",
2587
+ task_complete=True,
2588
+ )
2589
+ )
2590
+ continue
2591
+
2592
+ continue
2593
+
2594
+ case "web_search_call":
2595
+ # Built-in OpenAI tool - already executed, just emit trace event
2596
+ logger.info(
2597
+ f"{self._log_prelude()} agent '{recipient}' used web_search: query='{call.tool_args.get('query', '')}'"
2598
+ )
2599
+ self._submit_event(
2600
+ "builtin_tool_call",
2601
+ task_id,
2602
+ f"agent {recipient} used web_search with query: {call.tool_args.get('query', '')}",
2603
+ extra_data={
2604
+ "tool_type": "web_search_call",
2605
+ "tool_args": call.tool_args,
2606
+ },
2607
+ )
2608
+ # No execution needed - OpenAI already ran this
2609
+ continue
2610
+
2611
+ case "code_interpreter_call":
2612
+ # Built-in OpenAI tool - already executed, just emit trace event
2613
+ code_preview = (call.tool_args.get("code", "") or "")[:100]
2614
+ logger.info(
2615
+ f"{self._log_prelude()} agent '{recipient}' used code_interpreter: code='{code_preview}...'"
2616
+ )
2617
+ self._submit_event(
2618
+ "builtin_tool_call",
2619
+ task_id,
2620
+ f"agent {recipient} used code_interpreter",
2621
+ extra_data={
2622
+ "tool_type": "code_interpreter_call",
2623
+ "tool_args": call.tool_args,
2624
+ },
2625
+ )
2626
+ # No execution needed - OpenAI already ran this
2627
+ continue
2628
+
2629
+ case _:
2630
+ action_name = call.tool_name
2631
+ action_caller = self.agents.get(recipient)
2632
+
2633
+ if action_caller is None:
2634
+ logger.error(
2635
+ f"{self._log_prelude()} agent '{recipient}' not found"
2636
+ )
2637
+ self._tool_call_response(
2638
+ task_id=task_id,
2639
+ caller=recipient,
2640
+ tool_call=call,
2641
+ status="error",
2642
+ details="agent not found",
2643
+ )
2644
+ self._submit_event(
2645
+ "action_error",
2646
+ task_id,
2647
+ f"agent {recipient} not found",
2648
+ )
2649
+ has_action_completed = True
2650
+ action_errors.append(
2651
+ (
2652
+ call.tool_name,
2653
+ Exception(f"""An agent called `{call.tool_name}` but the agent was not found.
2654
+ This should never happen; consider informing the MAIL developers of this issue if you see it."""),
2655
+ )
2656
+ )
2657
+ continue
2658
+
2659
+ action = self.actions.get(action_name)
2660
+ if action is None:
2661
+ logger.warning(
2662
+ f"{self._log_prelude()} action '{action_name}' not found"
2663
+ )
2664
+ self._tool_call_response(
2665
+ task_id=task_id,
2666
+ caller=recipient,
2667
+ tool_call=call,
2668
+ status="error",
2669
+ details="action not found",
2670
+ )
2671
+ self._submit_event(
2672
+ "action_error",
2673
+ task_id,
2674
+ f"action {action_name} not found",
2675
+ )
2676
+ has_action_completed = True
2677
+ action_errors.append(
2678
+ (
2679
+ call.tool_name,
2680
+ Exception(
2681
+ f"""The action '{action_name}' cannot be found in this swarm."""
2682
+ ),
2683
+ )
2684
+ )
2685
+ continue
2686
+
2687
+ if not action_caller.can_access_action(action_name):
2688
+ logger.warning(
2689
+ f"{self._log_prelude()} agent '{action_caller}' cannot access action '{action_name}'"
2690
+ )
2691
+ self._tool_call_response(
2692
+ task_id=task_id,
2693
+ caller=recipient,
2694
+ tool_call=call,
2695
+ status="error",
2696
+ details="agent cannot access action",
2697
+ )
2698
+ self._submit_event(
2699
+ "action_error",
2700
+ task_id,
2701
+ f"agent {action_caller} cannot access action {action_name}",
2702
+ )
2703
+ has_action_completed = True
2704
+ action_errors.append(
2705
+ (
2706
+ call.tool_name,
2707
+ Exception(
2708
+ f"""The action '{action_name}' is not available."""
2709
+ ),
2710
+ )
2711
+ )
2712
+ continue
2713
+
2714
+ logger.info(
2715
+ f"{self._log_prelude()} agent '{recipient}' executing action tool: '{call.tool_name}'"
2716
+ )
2717
+ self._submit_event(
2718
+ "action_call",
2719
+ task_id,
2720
+ f"agent {recipient} executing action tool: {call.tool_name} with args: {ujson.dumps(call.tool_args)}",
2721
+ )
2722
+ try:
2723
+ # execute the action function
2724
+ result_status, result_message = await action.execute(
2725
+ call,
2726
+ actions=self.actions,
2727
+ action_override=action_override,
2728
+ )
2729
+
2730
+ self._tool_call_response(
2731
+ task_id=task_id,
2732
+ caller=recipient,
2733
+ tool_call=call,
2734
+ status=result_status,
2735
+ details=result_message.get("content", ""),
2736
+ )
2737
+ self._submit_event(
2738
+ "action_complete",
2739
+ task_id,
2740
+ f"action complete (caller = {recipient}):\n{result_message.get('content')}",
2741
+ )
2742
+ has_action_completed = True
2743
+ continue
2744
+ except Exception as e:
2745
+ logger.error(
2746
+ f"{self._log_prelude()} error executing action tool '{call.tool_name}': {e}"
2747
+ )
2748
+ self._tool_call_response(
2749
+ task_id=task_id,
2750
+ caller=recipient,
2751
+ tool_call=call,
2752
+ status="error",
2753
+ details=f"failed to execute action tool: {e}",
2754
+ )
2755
+ self._submit_event(
2756
+ "action_error",
2757
+ task_id,
2758
+ f"action error (caller = {recipient}, tool = {call.tool_name}):\n{e}",
2759
+ )
2760
+ has_action_completed = True
2761
+ action_errors.append(
2762
+ (
2763
+ call.tool_name,
2764
+ Exception(f"""An error occurred while executing the action tool `{call.tool_name}`.
2765
+ Specifically, the MAIL runtime encountered the following error: {e}.
2766
+ It is possible that the action tool `{call.tool_name}` is not implemented properly.
2767
+ Use this information to decide how to complete your task."""),
2768
+ )
2769
+ )
2770
+ continue
2771
+
2772
+ if len(action_errors) > 0:
2773
+ error_msg = "\n".join(
2774
+ [f"Error: {error[0]}\n{error[1]}" for error in action_errors]
2775
+ )
2776
+ await self.submit(
2777
+ self._system_response(
2778
+ task_id=task_id,
2779
+ recipient=create_agent_address(recipient),
2780
+ subject="::action_error::",
2781
+ body=error_msg,
2782
+ )
2783
+ )
2784
+ elif has_action_completed:
2785
+ await self.submit(
2786
+ self._system_broadcast(
2787
+ task_id=task_id,
2788
+ subject="::action_complete::",
2789
+ body="Action completed successfully",
2790
+ recipients=[create_agent_address(recipient)],
2791
+ )
2792
+ )
2793
+
2794
+ self.agent_histories.setdefault(agent_history_key, [])
2795
+ except Exception as e:
2796
+ logger.error(
2797
+ f"{self._log_prelude()} error scheduling message for agent '{recipient}': {e}"
2798
+ )
2799
+ traceback.print_exc()
2800
+ self._tool_call_response(
2801
+ task_id=task_id,
2802
+ caller=recipient,
2803
+ tool_call=call,
2804
+ status="error",
2805
+ details=f"failed to schedule message: {e}",
2806
+ )
2807
+ self._submit_event(
2808
+ "agent_error",
2809
+ task_id,
2810
+ f"error scheduling message for agent {recipient}: {e}",
2811
+ )
2812
+ await self.submit(
2813
+ self._system_response(
2814
+ task_id=task_id,
2815
+ recipient=message["message"]["sender"],
2816
+ subject="::agent_error::",
2817
+ body=f"""An error occurred while scheduling the message for agent '{recipient}'.
2818
+ Specifically, the MAIL runtime encountered the following error: {e}.
2819
+ It is possible that the agent function for '{recipient}' is not valid.
2820
+ Use this information to decide how to complete your task.""",
2821
+ )
2822
+ )
2823
+ finally:
2824
+ self.message_queue.task_done()
2825
+
2826
+ task = asyncio.create_task(schedule(message))
2827
+ self.active_tasks.add(task)
2828
+
2829
+ task.add_done_callback(self.active_tasks.discard)
2830
+
2831
+ return None
2832
+
2833
+ def _attach_interswarm_routing_metadata(
2834
+ self,
2835
+ task_id: str,
2836
+ source_message: MAILMessage,
2837
+ outgoing_message: MAILMessage,
2838
+ call: AgentToolCall,
2839
+ ) -> None:
2840
+ """
2841
+ Propagate remote routing metadata so subsequent interswarm messages reuse the
2842
+ original remote task identifier.
2843
+ """
2844
+ try:
2845
+ outgoing_content = outgoing_message["message"]
2846
+ routing_info = outgoing_content.get("routing_info")
2847
+ if not isinstance(routing_info, dict):
2848
+ routing_info = {}
2849
+
2850
+ parent_message = source_message.get("message")
2851
+ parent_routing: dict[str, Any] | None = None
2852
+ if isinstance(parent_message, dict):
2853
+ candidate_routing = parent_message.get("routing_info")
2854
+ if isinstance(candidate_routing, dict):
2855
+ parent_routing = candidate_routing
2856
+
2857
+ remote_task_id: str | None = None
2858
+ remote_swarm: str | None = None
2859
+ if parent_routing is not None:
2860
+ remote_task_id = parent_routing.get("remote_task_id")
2861
+ remote_swarm = parent_routing.get("remote_swarm")
2862
+
2863
+ target_addresses: set[str] = set()
2864
+
2865
+ target_arg = call.tool_args.get("target")
2866
+ if isinstance(target_arg, str):
2867
+ target_addresses.add(target_arg)
2868
+
2869
+ if not target_addresses and call.tool_name == "send_broadcast":
2870
+ # Broadcasts default to local swarm; nothing to attach.
2871
+ outgoing_content["routing_info"] = routing_info
2872
+ return
2873
+
2874
+ remote_swarms: set[str] = set()
2875
+ for address in target_addresses:
2876
+ _, swarm = parse_agent_address(address)
2877
+ if swarm:
2878
+ remote_swarms.add(swarm)
2879
+
2880
+ task_state = self.mail_tasks.get(task_id)
2881
+ if task_state is not None:
2882
+ for r_swarm in remote_swarms:
2883
+ task_state.add_remote_swarm(r_swarm)
2884
+
2885
+ if remote_swarms:
2886
+ routing_info.setdefault("remote_swarm", next(iter(remote_swarms)))
2887
+
2888
+ outgoing_content["routing_info"] = routing_info
2889
+ except Exception:
2890
+ # Routing hints are best-effort; avoid interrupting the agent loop if unavailable.
2891
+ pass
2892
+
2893
+ def _normalize_interswarm_response(
2894
+ self,
2895
+ task_id: str | None,
2896
+ response: MAILMessage,
2897
+ ) -> MAILMessage:
2898
+ """
2899
+ Normalize an interswarm response so it can be processed by the local queue.
2900
+ Converts remote task_complete broadcasts into response messages targeting the
2901
+ local entrypoint.
2902
+ """
2903
+ normalised = copy.deepcopy(response)
2904
+ message = normalised.get("message")
2905
+
2906
+ if isinstance(message, dict):
2907
+ if isinstance(task_id, str):
2908
+ message["task_id"] = task_id
2909
+ routing_info = message.get("routing_info")
2910
+ if not isinstance(routing_info, dict):
2911
+ routing_info = {}
2912
+ routing_info.setdefault("origin_swarm", message.get("sender_swarm"))
2913
+ message["routing_info"] = routing_info
2914
+
2915
+ if (
2916
+ normalised["msg_type"] == "broadcast_complete"
2917
+ and isinstance(message, dict)
2918
+ and message.get("subject") == "::task_complete::"
2919
+ ):
2920
+ finish_body = message.get("body", "")
2921
+ sender = message.get("sender")
2922
+ sender_swarm = message.get("sender_swarm")
2923
+
2924
+ recipient = create_agent_address(self.entrypoint)
2925
+
2926
+ if task_id is None:
2927
+ raise ValueError(
2928
+ "task_id is required for interswarm task complete messages"
2929
+ )
2930
+
2931
+ normalised = self._system_response(
2932
+ task_id=task_id,
2933
+ subject="::interswarm_task_complete::",
2934
+ body=f"""The remote agent '{sender}@{sender_swarm}' has completed the task with this ID in their swarm.
2935
+ The final response message is: '{finish_body}'""",
2936
+ recipient=recipient,
2937
+ )
2938
+
2939
+ return normalised
2940
+
2941
+ async def _compress_context(
2942
+ self,
2943
+ agent: AgentCore,
2944
+ llm: str,
2945
+ system: str | None,
2946
+ ratio: float,
2947
+ messages: list[dict[str, Any]],
2948
+ ) -> list[dict[str, Any]]:
2949
+ """
2950
+ Compress the context of a list of messages based on the dynamic context ratio.
2951
+
2952
+ When context exceeds (ratio * model_context_length) tokens, older messages
2953
+ (excluding the system prompt and recent messages) are summarized into a
2954
+ single compressed context block.
2955
+ """
2956
+ if llm.startswith("openai/gpt-5") or llm.startswith("openai/o3"):
2957
+ tokenizer = tiktoken.get_encoding("o200k_base")
2958
+ else:
2959
+ tokenizer = tiktoken.get_encoding("cl100k_base")
2960
+ try:
2961
+ ctx_len = get_model_ctx_len(llm)
2962
+ except Exception:
2963
+ logger.warning("failed to get context length for agent")
2964
+ logger.error(traceback.format_exc())
2965
+ return messages
2966
+
2967
+ def get_content_from_messages(msgs: list[dict[str, Any]]) -> str:
2968
+ """Extract text content from messages, handling both dict and object formats."""
2969
+ full_content = ""
2970
+ for item in msgs:
2971
+ # Handle both dict-style and object-style messages
2972
+ if isinstance(item, dict):
2973
+ content = item.get("content")
2974
+ else:
2975
+ content = getattr(item, "content", None)
2976
+
2977
+ if content is None:
2978
+ continue
2979
+
2980
+ if isinstance(content, str):
2981
+ full_content += content + "\n\n"
2982
+ elif hasattr(content, "text"):
2983
+ full_content += content.text + "\n\n"
2984
+ elif isinstance(content, list):
2985
+ # Handle list of content blocks (e.g., OpenAI responses format)
2986
+ for block in content:
2987
+ if isinstance(block, dict) and "text" in block:
2988
+ full_content += block["text"] + "\n\n"
2989
+ elif hasattr(block, "text"):
2990
+ full_content += block.text + "\n\n"
2991
+ return full_content
2992
+
2993
+ if system is None:
2994
+ system = ""
2995
+ full_content = get_content_from_messages(messages)
2996
+ full_content = system + full_content
2997
+ tokens = tokenizer.encode(full_content)
2998
+ threshold = int(ctx_len * ratio)
2999
+ rich.print(
3000
+ f"Context: [{len(tokens)}/{ctx_len}] ({(len(tokens) / ctx_len) * 100:.2f}%)"
3001
+ )
3002
+
3003
+ if len(tokens) > threshold:
3004
+ # Find a good cutoff point, keeping at least 4 recent messages
3005
+ rev_messages = messages[::-1]
3006
+ stop_idx = -1
3007
+ min_recent_to_keep = 4
3008
+
3009
+ for idx, item in enumerate(rev_messages[min_recent_to_keep:]):
3010
+ # Check if this is a "boundary" message we can split at
3011
+ is_boundary = False
3012
+ if isinstance(item, dict):
3013
+ role = item.get("role")
3014
+ msg_type = item.get("type")
3015
+ is_boundary = (
3016
+ role in ("user", "assistant") or msg_type == "function_call"
3017
+ )
3018
+ else:
3019
+ is_boundary = hasattr(item, "role") or (
3020
+ hasattr(item, "type") and item.type == "function_call"
3021
+ )
3022
+
3023
+ if is_boundary:
3024
+ # Account for the slice offset when calculating original index
3025
+ stop_idx = len(rev_messages) - (idx + min_recent_to_keep) - 1
3026
+ break
3027
+
3028
+ if stop_idx > 1: # Ensure we have messages to compact (after system prompt)
3029
+ is_first_msg_sys = messages[0].get("role") == "system"
3030
+ msgs_to_compact = (
3031
+ messages[1:stop_idx] if is_first_msg_sys else messages[0:stop_idx]
3032
+ )
3033
+ if not msgs_to_compact:
3034
+ return messages
3035
+
3036
+ compacted_content = get_content_from_messages(msgs_to_compact)
3037
+ if not compacted_content.strip():
3038
+ return messages
3039
+
3040
+ # Keep system prompt (messages[0]) and recent messages (messages[stop_idx:])
3041
+ if is_first_msg_sys:
3042
+ messages = [messages[0]] + messages[stop_idx:]
3043
+ else:
3044
+ messages = messages[stop_idx:]
3045
+
3046
+ with ls.trace(
3047
+ name="compress_context",
3048
+ run_type="llm",
3049
+ inputs={
3050
+ "messages": messages,
3051
+ "compacted_content": compacted_content,
3052
+ },
3053
+ ) as rt:
3054
+ res = await aresponses(
3055
+ input="Compress the following messages of LLM context into a single, concise summary:\n"
3056
+ + compacted_content,
3057
+ instructions="Your goal is to compress given LLM context in a manner that is most likely to be useful to the LLM. Your summary will be inserted into the LLM's context in place of the original messages, so it should be concise while retaining important information.",
3058
+ model="openai/gpt-5.1",
3059
+ reasoning={"effort": "none"},
3060
+ )
3061
+ rt.end(outputs={"output": res})
3062
+
3063
+ # Extract the summary and insert after system prompt
3064
+ for output in res.output:
3065
+ summary_text = None
3066
+ if isinstance(output, dict):
3067
+ if output.get("type") == "message":
3068
+ content_list = output.get("content", [])
3069
+ if content_list and isinstance(content_list[0], dict):
3070
+ summary_text = content_list[0].get("text", "")
3071
+ elif hasattr(output, "type") and output.type == "message":
3072
+ if hasattr(output, "content") and output.content:
3073
+ summary_text = output.content[0].text
3074
+
3075
+ if summary_text:
3076
+ # Insert after system prompt (position 1), not before it
3077
+ messages.insert(
3078
+ 1 if is_first_msg_sys else 0,
3079
+ {
3080
+ "role": "user",
3081
+ "content": f"[COMPRESSED CONTEXT FROM EARLIER IN CONVERSATION]\n\n{summary_text}",
3082
+ },
3083
+ )
3084
+ break # Only insert one summary
3085
+
3086
+ return messages
3087
+
3088
+ async def _handle_task_complete_call(
3089
+ self,
3090
+ task_id: str,
3091
+ caller: str,
3092
+ call: AgentToolCall,
3093
+ ) -> None:
3094
+ """
3095
+ Handle a task_complete tool invocation from an agent.
3096
+ Ensures the task is marked complete, the queue is stashed, and subsequent
3097
+ duplicate calls are treated as idempotent.
3098
+ """
3099
+ finish_message = call.tool_args.get(
3100
+ "finish_message", "Task completed successfully"
3101
+ )
3102
+
3103
+ await self._ensure_task_exists(task_id)
3104
+ task_state = self.mail_tasks[task_id]
3105
+
3106
+ if task_state.completed:
3107
+ logger.warning(
3108
+ f"{self._log_prelude()} agent '{caller}' called 'task_complete' for already completed task '{task_id}'"
3109
+ )
3110
+ self._tool_call_response(
3111
+ task_id=task_id,
3112
+ caller=caller,
3113
+ tool_call=call,
3114
+ status="success",
3115
+ details="task already completed",
3116
+ )
3117
+ self._submit_event(
3118
+ "task_complete_call_duplicate",
3119
+ task_id,
3120
+ f"agent {caller} called task_complete on already completed task",
3121
+ )
3122
+ return
3123
+
3124
+ logger.info(
3125
+ f"{self._log_prelude()} task '{task_id}' completed by agent '{caller}'"
3126
+ )
3127
+
3128
+ response_message = self._agent_task_complete(
3129
+ task_id=task_id,
3130
+ caller=caller,
3131
+ finish_message=finish_message,
3132
+ )
3133
+
3134
+ self._tool_call_response(
3135
+ task_id=task_id,
3136
+ caller=caller,
3137
+ tool_call=call,
3138
+ status="success",
3139
+ details="task completed",
3140
+ )
3141
+
3142
+ self._submit_event(
3143
+ "task_complete_call",
3144
+ task_id,
3145
+ f"agent {caller} called task_complete, full response to follow",
3146
+ )
3147
+
3148
+ await task_state.queue_stash(self.message_queue)
3149
+ task_state.mark_complete()
3150
+ self._clear_task_step_state(task_id)
3151
+
3152
+ # Clean up outstanding requests tracking for this task
3153
+ if task_id in self.outstanding_requests:
3154
+ del self.outstanding_requests[task_id]
3155
+
3156
+ # Persist agent histories to the database if enabled
3157
+ await self._persist_agent_histories_to_db(task_id)
3158
+
3159
+ # Persist task completion status and response to DB
3160
+ if self.enable_db_agent_histories:
3161
+ await self._persist_task_completion_to_db(task_id, response_message)
3162
+
3163
+ self.response_messages[task_id] = response_message
3164
+
3165
+ # Emit a synthetic new_message event so streaming clients receive the final content.
3166
+ # This must happen BEFORE resolving the pending request, otherwise the streaming
3167
+ # loop will exit before this event is emitted.
3168
+ self._submit_event(
3169
+ "new_message",
3170
+ task_id,
3171
+ f"task_complete response from {caller}:\n{build_mail_xml(response_message)['content']}",
3172
+ extra_data={"full_message": response_message},
3173
+ )
3174
+
3175
+ await self._notify_remote_task_complete(task_id, finish_message, caller)
3176
+ await self.submit(response_message)
3177
+
3178
+ # Resolve pending request if one exists - do this LAST so streaming clients
3179
+ # have a chance to receive the new_message event before the stream closes
3180
+ if task_id in self.pending_requests:
3181
+ logger.info(
3182
+ f"{self._log_prelude()} task '{task_id}' completed, resolving pending request"
3183
+ )
3184
+ future = self.pending_requests.pop(task_id)
3185
+ future.set_result(response_message)
3186
+
3187
+ def _system_broadcast(
3188
+ self,
3189
+ task_id: str,
3190
+ subject: str,
3191
+ body: str,
3192
+ task_complete: bool = False,
3193
+ recipients: list[MAILAddress] | None = None,
3194
+ ) -> MAILMessage:
3195
+ """
3196
+ Create a system broadcast message.
3197
+ """
3198
+ if recipients is None and not task_complete:
3199
+ raise ValueError(
3200
+ "recipients must be provided for non-task-complete broadcasts"
3201
+ )
3202
+
3203
+ return MAILMessage(
3204
+ id=str(uuid.uuid4()),
3205
+ timestamp=datetime.datetime.now(datetime.UTC).isoformat(),
3206
+ message=MAILBroadcast(
3207
+ task_id=task_id,
3208
+ broadcast_id=str(uuid.uuid4()),
3209
+ sender=create_system_address(self.swarm_name),
3210
+ recipients=[create_agent_address("all")]
3211
+ if task_complete
3212
+ else (recipients or []),
3213
+ subject=subject,
3214
+ body=body,
3215
+ sender_swarm=self.swarm_name,
3216
+ recipient_swarms=[self.swarm_name],
3217
+ routing_info={},
3218
+ ),
3219
+ msg_type="broadcast" if not task_complete else "broadcast_complete",
3220
+ )
3221
+
3222
+ def _system_response(
3223
+ self,
3224
+ task_id: str,
3225
+ subject: str,
3226
+ body: str,
3227
+ recipient: MAILAddress,
3228
+ ) -> MAILMessage:
3229
+ """
3230
+ Create a system response message for a recipient.
3231
+ Said recipient must be either an agent or the user.
3232
+ """
3233
+ return MAILMessage(
3234
+ id=str(uuid.uuid4()),
3235
+ timestamp=datetime.datetime.now(datetime.UTC).isoformat(),
3236
+ message=MAILResponse(
3237
+ task_id=task_id,
3238
+ request_id=str(uuid.uuid4()),
3239
+ sender=create_system_address(self.swarm_name),
3240
+ recipient=recipient,
3241
+ subject=subject,
3242
+ body=body,
3243
+ sender_swarm=self.swarm_name,
3244
+ recipient_swarm=self.swarm_name,
3245
+ routing_info={},
3246
+ ),
3247
+ msg_type="response",
3248
+ )
3249
+
3250
+ def _agent_task_complete(
3251
+ self,
3252
+ task_id: str,
3253
+ caller: str,
3254
+ finish_message: str,
3255
+ ) -> MAILMessage:
3256
+ """
3257
+ Create a task complete message for an agent.
3258
+ """
3259
+ return MAILMessage(
3260
+ id=str(uuid.uuid4()),
3261
+ timestamp=datetime.datetime.now(datetime.UTC).isoformat(),
3262
+ message=MAILBroadcast(
3263
+ task_id=task_id,
3264
+ broadcast_id=str(uuid.uuid4()),
3265
+ sender=create_agent_address(caller),
3266
+ recipients=[create_agent_address("all")],
3267
+ subject="::task_complete::",
3268
+ body=finish_message,
3269
+ sender_swarm=self.swarm_name,
3270
+ recipient_swarms=[self.swarm_name],
3271
+ routing_info={},
3272
+ ),
3273
+ msg_type="broadcast_complete",
3274
+ )
3275
+
3276
+ def _tool_call_response(
3277
+ self,
3278
+ task_id: str,
3279
+ caller: str,
3280
+ tool_call: AgentToolCall,
3281
+ status: Literal["success", "error"],
3282
+ details: str | None = None,
3283
+ ) -> None:
3284
+ """
3285
+ Create a tool call response message for a caller and append to its agent history.
3286
+ """
3287
+ agent_history_key = AGENT_HISTORY_KEY.format(task_id=task_id, agent_name=caller)
3288
+
3289
+ status_str = "SUCCESS" if status == "success" else "ERROR"
3290
+ response_content = f"{status_str}: {details}" if details else status_str
3291
+ self.agent_histories[agent_history_key].append(
3292
+ tool_call.create_response_msg(response_content)
3293
+ )
3294
+
3295
+ return
3296
+
3297
+ def _detect_tool_format(
3298
+ self, history: list[dict[str, Any]]
3299
+ ) -> Literal["completions", "responses"]:
3300
+ """
3301
+ Detect the tool format used in a history based on entry structure.
3302
+ Returns 'completions' if entries have 'role' key, 'responses' if they have 'type' key.
3303
+ Defaults to 'responses' if unable to determine.
3304
+ """
3305
+ for entry in history:
3306
+ if isinstance(entry, dict):
3307
+ if "role" in entry:
3308
+ return "completions"
3309
+ if "type" in entry:
3310
+ return "responses"
3311
+ return "responses"
3312
+
3313
+ async def _persist_agent_histories_to_db(self, task_id: str) -> None:
3314
+ """
3315
+ Persist all agent histories for a given task to the database.
3316
+ Only called when enable_db_agent_histories is True.
3317
+ """
3318
+ if not self.enable_db_agent_histories:
3319
+ return
3320
+
3321
+ for agent_name in self.agents:
3322
+ agent_history_key = AGENT_HISTORY_KEY.format(
3323
+ task_id=task_id, agent_name=agent_name
3324
+ )
3325
+ history = self.agent_histories.get(agent_history_key, [])
3326
+
3327
+ if not history:
3328
+ continue
3329
+
3330
+ tool_format = self._detect_tool_format(history)
3331
+
3332
+ try:
3333
+ await create_agent_history(
3334
+ swarm_name=self.swarm_name,
3335
+ caller_role=self.user_role,
3336
+ caller_id=self.user_id,
3337
+ tool_format=tool_format,
3338
+ task_id=task_id,
3339
+ agent_name=agent_name,
3340
+ history=history,
3341
+ )
3342
+ logger.info(
3343
+ f"{self._log_prelude()} persisted history for agent '{agent_name}' (task '{task_id}', format '{tool_format}')"
3344
+ )
3345
+ except Exception as e:
3346
+ logger.error(
3347
+ f"{self._log_prelude()} failed to persist history for agent '{agent_name}' (task '{task_id}'): {e}"
3348
+ )
3349
+
3350
+ def _emit_tool_call_event(
3351
+ self,
3352
+ task_id: str,
3353
+ caller: str,
3354
+ call: AgentToolCall,
3355
+ reasoning_ref: str | None = None,
3356
+ ) -> None:
3357
+ """
3358
+ Emit a tool_call event for a tool call.
3359
+
3360
+ Reasoning and preamble come from the AgentToolCall object fields (populated by factory).
3361
+ If the call has no reasoning but reasoning_ref is provided, include that instead.
3362
+ """
3363
+ extra_data: dict[str, Any] = {
3364
+ "tool_name": call.tool_name,
3365
+ "tool_args": call.tool_args,
3366
+ "tool_call_id": call.tool_call_id,
3367
+ }
3368
+
3369
+ # Use reasoning from call object (populated by factory)
3370
+ # Filter empty/whitespace blocks and join with double newlines
3371
+ if call.reasoning:
3372
+ filtered = [r for r in call.reasoning if r and r.strip()]
3373
+ if filtered:
3374
+ extra_data["reasoning"] = "\n\n".join(filtered)
3375
+ elif reasoning_ref:
3376
+ # Had reasoning list but all blocks were empty/whitespace
3377
+ extra_data["reasoning_ref"] = reasoning_ref
3378
+ elif reasoning_ref:
3379
+ extra_data["reasoning_ref"] = reasoning_ref
3380
+
3381
+ if call.preamble:
3382
+ extra_data["preamble"] = call.preamble
3383
+
3384
+ self._submit_event(
3385
+ "tool_call",
3386
+ task_id,
3387
+ f"agent {caller} called {call.tool_name}",
3388
+ extra_data=extra_data,
3389
+ )
3390
+
3391
+ def _submit_event(
3392
+ self,
3393
+ event: str,
3394
+ task_id: str,
3395
+ description: str,
3396
+ extra_data: dict[str, Any] | None = None,
3397
+ ) -> None:
3398
+ """
3399
+ Submit an event to the event queue.
3400
+ """
3401
+ # Ensure task exists in memory (sync check, DB persistence happens elsewhere)
3402
+ if task_id not in self.mail_tasks:
3403
+ task_owner = self.this_owner
3404
+ task = MAILTask(task_id, task_owner, [task_owner])
3405
+ self.mail_tasks[task_id] = task
3406
+ # Schedule DB persistence in background if enabled
3407
+ if self.enable_db_agent_histories:
3408
+ asyncio.create_task(self._persist_task_to_db(task))
3409
+
3410
+ if extra_data is None:
3411
+ extra_data = {}
3412
+
3413
+ # Pre-serialize to JSON to ensure proper formatting (sse_starlette may use str() instead)
3414
+ sse = ServerSentEvent(
3415
+ data=ujson.dumps({
3416
+ "timestamp": datetime.datetime.now(datetime.UTC).isoformat(),
3417
+ "description": description,
3418
+ "task_id": task_id,
3419
+ "extra_data": extra_data,
3420
+ }),
3421
+ event=event,
3422
+ )
3423
+ self.mail_tasks[task_id].add_event(sse)
3424
+ # Signal that new events are available for streaming (task-specific)
3425
+ try:
3426
+ self._events_available_by_task[task_id].set()
3427
+ except Exception:
3428
+ pass
3429
+
3430
+ # Persist event to DB in background if enabled
3431
+ if self.enable_db_agent_histories:
3432
+ asyncio.create_task(self._persist_event_to_db(task_id, sse))
3433
+
3434
+ return None
3435
+
3436
+ async def _persist_event_to_db(self, task_id: str, sse: ServerSentEvent) -> None:
3437
+ """
3438
+ Persist an event to the database.
3439
+ """
3440
+ try:
3441
+ # Serialize event data to string if needed
3442
+ event_data = sse.data
3443
+ if event_data is not None and not isinstance(event_data, str):
3444
+ import json
3445
+
3446
+ event_data = json.dumps(event_data)
3447
+
3448
+ await create_task_event(
3449
+ task_id=task_id,
3450
+ swarm_name=self.swarm_name,
3451
+ caller_role=self.user_role,
3452
+ caller_id=self.user_id,
3453
+ event_type=sse.event,
3454
+ event_data=event_data,
3455
+ event_id=sse.id,
3456
+ )
3457
+ except Exception as e:
3458
+ logger.warning(
3459
+ f"{self._log_prelude()} failed to persist event for task '{task_id}': {e}"
3460
+ )
3461
+
3462
+ async def _persist_task_completion_to_db(
3463
+ self, task_id: str, response_message: MAILMessage
3464
+ ) -> None:
3465
+ """
3466
+ Persist task completion status and response message to the database.
3467
+ """
3468
+ try:
3469
+ # Update task status
3470
+ await update_task(
3471
+ task_id=task_id,
3472
+ swarm_name=self.swarm_name,
3473
+ caller_role=self.user_role,
3474
+ caller_id=self.user_id,
3475
+ is_running=False,
3476
+ completed=True,
3477
+ )
3478
+
3479
+ # Save response message
3480
+ await create_task_response(
3481
+ task_id=task_id,
3482
+ swarm_name=self.swarm_name,
3483
+ caller_role=self.user_role,
3484
+ caller_id=self.user_id,
3485
+ response=response_message, # type: ignore
3486
+ )
3487
+
3488
+ logger.info(
3489
+ f"{self._log_prelude()} persisted task completion for task '{task_id}'"
3490
+ )
3491
+ except Exception as e:
3492
+ logger.warning(
3493
+ f"{self._log_prelude()} failed to persist task completion for task '{task_id}': {e}"
3494
+ )
3495
+
3496
+ def get_events_by_task_id(self, task_id: str) -> list[ServerSentEvent]:
3497
+ """
3498
+ Get events by task ID.
3499
+ """
3500
+ candidates: list[ServerSentEvent] = []
3501
+ try:
3502
+ candidates.extend(self.mail_tasks[task_id].events)
3503
+ except Exception:
3504
+ pass
3505
+
3506
+ out: list[ServerSentEvent] = []
3507
+ for ev in candidates:
3508
+ try:
3509
+ payload = ev.data
3510
+ if isinstance(payload, str):
3511
+ try:
3512
+ payload = ujson.loads(payload)
3513
+ except ValueError:
3514
+ payload = None
3515
+ if isinstance(payload, dict) and payload.get("task_id") == task_id:
3516
+ out.append(
3517
+ ServerSentEvent(
3518
+ event=ev.event,
3519
+ data=payload,
3520
+ id=getattr(ev, "id", None),
3521
+ )
3522
+ )
3523
+ except Exception:
3524
+ continue
3525
+ return out
3526
+
3527
+ def get_task_by_id(self, task_id: str) -> MAILTask | None:
3528
+ """
3529
+ Get a task by ID.
3530
+ """
3531
+ return self.mail_tasks.get(task_id)
3532
+
3533
+ def get_response_message(self, task_id: str) -> MAILMessage | None:
3534
+ """
3535
+ Get the response message for a given task ID. Mostly used after streaming response events.
3536
+ """
3537
+ return self.response_messages.get(task_id, None)