mail-swarms 1.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (137) hide show
  1. mail/__init__.py +35 -0
  2. mail/api.py +1964 -0
  3. mail/cli.py +432 -0
  4. mail/client.py +1657 -0
  5. mail/config/__init__.py +8 -0
  6. mail/config/client.py +87 -0
  7. mail/config/server.py +165 -0
  8. mail/core/__init__.py +72 -0
  9. mail/core/actions.py +69 -0
  10. mail/core/agents.py +73 -0
  11. mail/core/message.py +366 -0
  12. mail/core/runtime.py +3537 -0
  13. mail/core/tasks.py +311 -0
  14. mail/core/tools.py +1206 -0
  15. mail/db/__init__.py +0 -0
  16. mail/db/init.py +182 -0
  17. mail/db/types.py +65 -0
  18. mail/db/utils.py +523 -0
  19. mail/examples/__init__.py +27 -0
  20. mail/examples/analyst_dummy/__init__.py +15 -0
  21. mail/examples/analyst_dummy/agent.py +136 -0
  22. mail/examples/analyst_dummy/prompts.py +44 -0
  23. mail/examples/consultant_dummy/__init__.py +15 -0
  24. mail/examples/consultant_dummy/agent.py +136 -0
  25. mail/examples/consultant_dummy/prompts.py +42 -0
  26. mail/examples/data_analysis/__init__.py +40 -0
  27. mail/examples/data_analysis/analyst/__init__.py +9 -0
  28. mail/examples/data_analysis/analyst/agent.py +67 -0
  29. mail/examples/data_analysis/analyst/prompts.py +53 -0
  30. mail/examples/data_analysis/processor/__init__.py +13 -0
  31. mail/examples/data_analysis/processor/actions.py +293 -0
  32. mail/examples/data_analysis/processor/agent.py +67 -0
  33. mail/examples/data_analysis/processor/prompts.py +48 -0
  34. mail/examples/data_analysis/reporter/__init__.py +10 -0
  35. mail/examples/data_analysis/reporter/actions.py +187 -0
  36. mail/examples/data_analysis/reporter/agent.py +67 -0
  37. mail/examples/data_analysis/reporter/prompts.py +49 -0
  38. mail/examples/data_analysis/statistics/__init__.py +18 -0
  39. mail/examples/data_analysis/statistics/actions.py +343 -0
  40. mail/examples/data_analysis/statistics/agent.py +67 -0
  41. mail/examples/data_analysis/statistics/prompts.py +60 -0
  42. mail/examples/mafia/__init__.py +0 -0
  43. mail/examples/mafia/game.py +1537 -0
  44. mail/examples/mafia/narrator_tools.py +396 -0
  45. mail/examples/mafia/personas.py +240 -0
  46. mail/examples/mafia/prompts.py +489 -0
  47. mail/examples/mafia/roles.py +147 -0
  48. mail/examples/mafia/spec.md +350 -0
  49. mail/examples/math_dummy/__init__.py +23 -0
  50. mail/examples/math_dummy/actions.py +252 -0
  51. mail/examples/math_dummy/agent.py +136 -0
  52. mail/examples/math_dummy/prompts.py +46 -0
  53. mail/examples/math_dummy/types.py +5 -0
  54. mail/examples/research/__init__.py +39 -0
  55. mail/examples/research/researcher/__init__.py +9 -0
  56. mail/examples/research/researcher/agent.py +67 -0
  57. mail/examples/research/researcher/prompts.py +54 -0
  58. mail/examples/research/searcher/__init__.py +10 -0
  59. mail/examples/research/searcher/actions.py +324 -0
  60. mail/examples/research/searcher/agent.py +67 -0
  61. mail/examples/research/searcher/prompts.py +53 -0
  62. mail/examples/research/summarizer/__init__.py +18 -0
  63. mail/examples/research/summarizer/actions.py +255 -0
  64. mail/examples/research/summarizer/agent.py +67 -0
  65. mail/examples/research/summarizer/prompts.py +55 -0
  66. mail/examples/research/verifier/__init__.py +10 -0
  67. mail/examples/research/verifier/actions.py +337 -0
  68. mail/examples/research/verifier/agent.py +67 -0
  69. mail/examples/research/verifier/prompts.py +52 -0
  70. mail/examples/supervisor/__init__.py +11 -0
  71. mail/examples/supervisor/agent.py +4 -0
  72. mail/examples/supervisor/prompts.py +93 -0
  73. mail/examples/support/__init__.py +33 -0
  74. mail/examples/support/classifier/__init__.py +10 -0
  75. mail/examples/support/classifier/actions.py +307 -0
  76. mail/examples/support/classifier/agent.py +68 -0
  77. mail/examples/support/classifier/prompts.py +56 -0
  78. mail/examples/support/coordinator/__init__.py +9 -0
  79. mail/examples/support/coordinator/agent.py +67 -0
  80. mail/examples/support/coordinator/prompts.py +48 -0
  81. mail/examples/support/faq/__init__.py +10 -0
  82. mail/examples/support/faq/actions.py +182 -0
  83. mail/examples/support/faq/agent.py +67 -0
  84. mail/examples/support/faq/prompts.py +42 -0
  85. mail/examples/support/sentiment/__init__.py +15 -0
  86. mail/examples/support/sentiment/actions.py +341 -0
  87. mail/examples/support/sentiment/agent.py +67 -0
  88. mail/examples/support/sentiment/prompts.py +54 -0
  89. mail/examples/weather_dummy/__init__.py +23 -0
  90. mail/examples/weather_dummy/actions.py +75 -0
  91. mail/examples/weather_dummy/agent.py +136 -0
  92. mail/examples/weather_dummy/prompts.py +35 -0
  93. mail/examples/weather_dummy/types.py +5 -0
  94. mail/factories/__init__.py +27 -0
  95. mail/factories/action.py +223 -0
  96. mail/factories/base.py +1531 -0
  97. mail/factories/supervisor.py +241 -0
  98. mail/net/__init__.py +7 -0
  99. mail/net/registry.py +712 -0
  100. mail/net/router.py +728 -0
  101. mail/net/server_utils.py +114 -0
  102. mail/net/types.py +247 -0
  103. mail/server.py +1605 -0
  104. mail/stdlib/__init__.py +0 -0
  105. mail/stdlib/anthropic/__init__.py +0 -0
  106. mail/stdlib/fs/__init__.py +15 -0
  107. mail/stdlib/fs/actions.py +209 -0
  108. mail/stdlib/http/__init__.py +19 -0
  109. mail/stdlib/http/actions.py +333 -0
  110. mail/stdlib/interswarm/__init__.py +11 -0
  111. mail/stdlib/interswarm/actions.py +208 -0
  112. mail/stdlib/mcp/__init__.py +19 -0
  113. mail/stdlib/mcp/actions.py +294 -0
  114. mail/stdlib/openai/__init__.py +13 -0
  115. mail/stdlib/openai/agents.py +451 -0
  116. mail/summarizer.py +234 -0
  117. mail/swarms_json/__init__.py +27 -0
  118. mail/swarms_json/types.py +87 -0
  119. mail/swarms_json/utils.py +255 -0
  120. mail/url_scheme.py +51 -0
  121. mail/utils/__init__.py +53 -0
  122. mail/utils/auth.py +194 -0
  123. mail/utils/context.py +17 -0
  124. mail/utils/logger.py +73 -0
  125. mail/utils/openai.py +212 -0
  126. mail/utils/parsing.py +89 -0
  127. mail/utils/serialize.py +292 -0
  128. mail/utils/store.py +49 -0
  129. mail/utils/string_builder.py +119 -0
  130. mail/utils/version.py +20 -0
  131. mail_swarms-1.3.2.dist-info/METADATA +237 -0
  132. mail_swarms-1.3.2.dist-info/RECORD +137 -0
  133. mail_swarms-1.3.2.dist-info/WHEEL +4 -0
  134. mail_swarms-1.3.2.dist-info/entry_points.txt +2 -0
  135. mail_swarms-1.3.2.dist-info/licenses/LICENSE +202 -0
  136. mail_swarms-1.3.2.dist-info/licenses/NOTICE +10 -0
  137. mail_swarms-1.3.2.dist-info/licenses/THIRD_PARTY_NOTICES.md +12334 -0
mail/factories/base.py ADDED
@@ -0,0 +1,1531 @@
1
+ # SPDX-License-Identifier: Apache-2.0
2
+ # Copyright (c) 2025 Addison Kline, Ryan Heaton
3
+
4
+ import asyncio
5
+ import logging
6
+ import uuid
7
+ import warnings
8
+ from abc import abstractmethod
9
+ from collections.abc import Awaitable
10
+ from typing import Any, Literal
11
+
12
+ import anthropic
13
+ from anthropic.types import ContentBlockDeltaEvent, ContentBlockStartEvent, TextDelta, ThinkingDelta
14
+ import langsmith as ls
15
+ import litellm
16
+ import rich
17
+ import ujson
18
+ from langsmith.wrappers import wrap_anthropic
19
+ from litellm import (
20
+ ResponseFunctionToolCall,
21
+ ResponsesAPIResponse,
22
+ acompletion,
23
+ aresponses,
24
+ )
25
+ from litellm.types.utils import ModelResponse
26
+
27
+ from mail.core.agents import AgentFunction, AgentOutput
28
+ from mail.core.tools import AgentToolCall, create_mail_tools
29
+
30
+ logger = logging.getLogger("mail.factories.base")
31
+
32
+
33
+ def base_agent_factory(
34
+ # REQUIRED
35
+ # top-level params
36
+ comm_targets: list[str],
37
+ tools: list[dict[str, Any]],
38
+ # instance params
39
+ user_token: str,
40
+ # internal params
41
+ llm: str,
42
+ system: str,
43
+ # OPTIONAL
44
+ # top-level params
45
+ name: str = "base_agent",
46
+ enable_entrypoint: bool = False,
47
+ enable_interswarm: bool = False,
48
+ can_complete_tasks: bool = False,
49
+ tool_format: Literal["completions", "responses"] = "responses",
50
+ exclude_tools: list[str] = [],
51
+ # instance params
52
+ # ...
53
+ # internal params
54
+ reasoning_effort: Literal["minimal", "low", "medium", "high"] | None = None,
55
+ thinking_budget: int | None = None,
56
+ max_tokens: int | None = None,
57
+ memory: bool = True,
58
+ use_proxy: bool = True,
59
+ stream_tokens: bool = False,
60
+ _debug_include_mail_tools: bool = True,
61
+ default_tool_choice: str | dict[str, str] | None = None,
62
+ ) -> AgentFunction:
63
+ warnings.warn(
64
+ "`mail.factories.base:base_agent_factory` is deprecated and will be removed in a future version. "
65
+ "Use `mail.factories.base:LiteLLMAgentFunction` instead.",
66
+ DeprecationWarning,
67
+ stacklevel=2,
68
+ )
69
+
70
+ litellm_agent = LiteLLMAgentFunction(
71
+ name=name,
72
+ comm_targets=comm_targets,
73
+ tools=tools,
74
+ llm=llm,
75
+ system=system,
76
+ user_token=user_token,
77
+ enable_entrypoint=enable_entrypoint,
78
+ enable_interswarm=enable_interswarm,
79
+ can_complete_tasks=can_complete_tasks,
80
+ tool_format=tool_format,
81
+ exclude_tools=exclude_tools,
82
+ reasoning_effort=reasoning_effort,
83
+ thinking_budget=thinking_budget,
84
+ max_tokens=max_tokens,
85
+ memory=memory,
86
+ use_proxy=use_proxy,
87
+ stream_tokens=stream_tokens,
88
+ _debug_include_mail_tools=_debug_include_mail_tools,
89
+ default_tool_choice=default_tool_choice,
90
+ )
91
+
92
+ async def run(
93
+ messages: list[dict[str, Any]],
94
+ tool_choice: str | dict[str, str] = "required",
95
+ ) -> AgentOutput:
96
+ """
97
+ Return a MAIL-compatible agent function.
98
+ """
99
+
100
+ return await litellm_agent(
101
+ messages=messages,
102
+ tool_choice=tool_choice,
103
+ )
104
+
105
+ return run
106
+
107
+
108
+ class MAILAgentFunction:
109
+ """
110
+ Base class representing a MAIL-compatible agent function.
111
+ """
112
+
113
+ def __init__(
114
+ self,
115
+ name: str,
116
+ comm_targets: list[str],
117
+ tools: list[dict[str, Any]],
118
+ enable_entrypoint: bool = False,
119
+ enable_interswarm: bool = False,
120
+ can_complete_tasks: bool = False,
121
+ tool_format: Literal["completions", "responses"] = "responses",
122
+ exclude_tools: list[str] = [],
123
+ **kwargs: Any,
124
+ ) -> None:
125
+ self.name = name
126
+ self.comm_targets = comm_targets
127
+ self.tools = tools
128
+ self.enable_entrypoint = enable_entrypoint
129
+ self.enable_interswarm = enable_interswarm
130
+ self.can_complete_tasks = can_complete_tasks
131
+ self.tool_format = tool_format
132
+ self.exclude_tools = exclude_tools
133
+ self.kwargs = kwargs
134
+
135
+ @abstractmethod
136
+ def __call__(
137
+ self,
138
+ messages: list[dict[str, Any]],
139
+ tool_choice: str | dict[str, str] = "required",
140
+ ) -> Awaitable[AgentOutput]:
141
+ """
142
+ Execute the MAIL-compatible agent function.
143
+ """
144
+ pass
145
+
146
+
147
+ class LiteLLMAgentFunction(MAILAgentFunction):
148
+ """
149
+ Class representing a MAIL-compatible agent function which calls the LiteLLM API.
150
+ """
151
+
152
+ def __init__(
153
+ self,
154
+ name: str,
155
+ comm_targets: list[str],
156
+ tools: list[dict[str, Any]],
157
+ llm: str,
158
+ system: str = "",
159
+ user_token: str = "",
160
+ enable_entrypoint: bool = False,
161
+ enable_interswarm: bool = False,
162
+ can_complete_tasks: bool = False,
163
+ tool_format: Literal["completions", "responses"] = "responses",
164
+ exclude_tools: list[str] = [],
165
+ reasoning_effort: Literal["minimal", "low", "medium", "high"] | None = None,
166
+ thinking_budget: int | None = None,
167
+ max_tokens: int | None = None,
168
+ memory: bool = True,
169
+ use_proxy: bool = True,
170
+ stream_tokens: bool = False,
171
+ _debug_include_mail_tools: bool = True,
172
+ default_tool_choice: str | dict[str, str] | None = None,
173
+ ) -> None:
174
+ self.extra_headers: dict[str, str] = {}
175
+ if use_proxy:
176
+ if not llm.startswith("litellm_proxy/"):
177
+ llm = f"litellm_proxy/{llm}"
178
+ self.extra_headers["Authorization"] = f"Bearer {user_token}"
179
+
180
+ self.thinking: dict[str, Any] = {
181
+ "type": "disabled",
182
+ }
183
+
184
+ if reasoning_effort is not None:
185
+ if thinking_budget is None:
186
+ if reasoning_effort == "minimal":
187
+ thinking_budget = 2000
188
+ if reasoning_effort == "low":
189
+ thinking_budget = 4000
190
+ elif reasoning_effort == "medium":
191
+ thinking_budget = 8000
192
+ elif reasoning_effort == "high":
193
+ thinking_budget = 16000
194
+
195
+ if thinking_budget is not None:
196
+ if max_tokens is None:
197
+ max_tokens = thinking_budget + 4000
198
+ self.thinking = {
199
+ "type": "enabled",
200
+ "budget_tokens": thinking_budget,
201
+ }
202
+
203
+ super().__init__(
204
+ name,
205
+ comm_targets,
206
+ tools,
207
+ enable_entrypoint,
208
+ enable_interswarm,
209
+ can_complete_tasks,
210
+ tool_format,
211
+ exclude_tools,
212
+ )
213
+ self.llm = llm
214
+ self.system = system
215
+ self.user_token = user_token
216
+ self.reasoning_effort = reasoning_effort
217
+ self.thinking_budget = thinking_budget
218
+ self.max_tokens = max_tokens
219
+ self.memory = memory
220
+ self.use_proxy = use_proxy
221
+ self.stream_tokens = stream_tokens
222
+ self._debug_include_mail_tools = _debug_include_mail_tools
223
+ self.default_tool_choice = default_tool_choice
224
+
225
+ def __call__(
226
+ self,
227
+ messages: list[dict[str, Any]],
228
+ tool_choice: str | dict[str, str] = "required",
229
+ ) -> Awaitable[AgentOutput]:
230
+ """
231
+ Execute the MAIL-compatible agent function using the LiteLLM API.
232
+ """
233
+ # Use default_tool_choice if set, otherwise use the passed tool_choice
234
+ effective_tool_choice = (
235
+ self.default_tool_choice
236
+ if self.default_tool_choice is not None
237
+ else tool_choice
238
+ )
239
+ if self.tool_format == "completions":
240
+ return self._run_completions(messages, effective_tool_choice)
241
+ else:
242
+ return self._run_responses(messages, effective_tool_choice)
243
+
244
+ async def _preprocess(
245
+ self,
246
+ messages: list[dict[str, Any]],
247
+ style: Literal["completions", "responses"],
248
+ exclude_tools: list[str] = [],
249
+ ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
250
+ # set up system prompt
251
+ if not messages[0]["role"] == "system" and not self.system == "":
252
+ messages.insert(0, {"role": "system", "content": self.system})
253
+
254
+ # add the agent's tools to the list of tools
255
+ if self._debug_include_mail_tools and len(self.comm_targets) > 0:
256
+ agent_tools = (
257
+ create_mail_tools(
258
+ self.comm_targets,
259
+ self.enable_interswarm,
260
+ style=style,
261
+ exclude_tools=exclude_tools,
262
+ )
263
+ + self.tools
264
+ )
265
+ else:
266
+ agent_tools = self.tools
267
+
268
+ return messages, agent_tools
269
+
270
+ def _has_web_search_tools(self, tools: list[dict[str, Any]]) -> bool:
271
+ """Check if any tools are Anthropic web_search built-in tools."""
272
+ return any(t.get("type", "").startswith("web_search") for t in tools)
273
+
274
+ def _convert_tools_to_anthropic_format(
275
+ self, tools: list[dict[str, Any]]
276
+ ) -> list[dict[str, Any]]:
277
+ """
278
+ Convert tools from OpenAI/LiteLLM completions format to native Anthropic format.
279
+
280
+ OpenAI format:
281
+ {"type": "function", "function": {"name": ..., "description": ..., "parameters": ...}}
282
+
283
+ Anthropic format:
284
+ {"name": ..., "description": ..., "input_schema": ...}
285
+
286
+ Server tools (like web_search) are passed through as-is.
287
+ """
288
+ anthropic_tools: list[dict[str, Any]] = []
289
+
290
+ for tool in tools:
291
+ tool_type = tool.get("type", "")
292
+
293
+ # Server tools (web_search, etc.) - pass through as-is
294
+ if tool_type.startswith("web_search"):
295
+ anthropic_tools.append(tool)
296
+ continue
297
+
298
+ # OpenAI/LiteLLM completions format - convert to Anthropic format
299
+ if tool_type == "function" and "function" in tool:
300
+ func = tool["function"]
301
+ anthropic_tools.append(
302
+ {
303
+ "name": func.get("name", ""),
304
+ "description": func.get("description", ""),
305
+ "input_schema": func.get("parameters", {}),
306
+ }
307
+ )
308
+ continue
309
+
310
+ # Already in Anthropic format (has input_schema) - pass through
311
+ if "input_schema" in tool:
312
+ anthropic_tools.append(tool)
313
+ continue
314
+
315
+ # Unknown format - try to pass through and let Anthropic API handle it
316
+ logger.warning(f"Unknown tool format, passing through as-is: {tool}")
317
+ anthropic_tools.append(tool)
318
+
319
+ return anthropic_tools
320
+
321
+ def _convert_messages_to_anthropic_format(
322
+ self, messages: list[dict[str, Any]]
323
+ ) -> list[dict[str, Any]]:
324
+ """
325
+ Convert messages from OpenAI/LiteLLM format to native Anthropic format.
326
+
327
+ Key transformations:
328
+ 1. Tool results: {"role": "tool", "content": ..., "tool_call_id": ...}
329
+ → {"role": "user", "content": [{"type": "tool_result", "tool_use_id": ..., "content": ...}]}
330
+
331
+ 2. Assistant with tool_calls: {"role": "assistant", "tool_calls": [...]}
332
+ → {"role": "assistant", "content": [{"type": "tool_use", ...}]}
333
+
334
+ 3. Multiple consecutive tool results are grouped into a single user message
335
+ """
336
+ anthropic_messages: list[dict[str, Any]] = []
337
+ pending_tool_results: list[dict[str, Any]] = []
338
+
339
+ def flush_tool_results() -> None:
340
+ """Flush pending tool results into a single user message."""
341
+ if pending_tool_results:
342
+ anthropic_messages.append(
343
+ {
344
+ "role": "user",
345
+ "content": pending_tool_results.copy(),
346
+ }
347
+ )
348
+ pending_tool_results.clear()
349
+
350
+ for msg in messages:
351
+ role = msg.get("role", "")
352
+
353
+ # Handle tool result messages (OpenAI format)
354
+ if role == "tool":
355
+ tool_result = {
356
+ "type": "tool_result",
357
+ "tool_use_id": msg.get("tool_call_id", ""),
358
+ "content": msg.get("content", ""),
359
+ }
360
+ # Add is_error if present
361
+ if msg.get("is_error"):
362
+ tool_result["is_error"] = True
363
+ pending_tool_results.append(tool_result)
364
+ continue
365
+
366
+ # Flush any pending tool results before processing other messages
367
+ flush_tool_results()
368
+
369
+ # Handle assistant messages
370
+ if role == "assistant":
371
+ content = msg.get("content")
372
+ tool_calls = msg.get("tool_calls", [])
373
+
374
+ # Check if already in Anthropic format (content is list of typed blocks)
375
+ # This preserves thinking blocks, tool_use blocks, etc. from previous turns
376
+ if (
377
+ isinstance(content, list)
378
+ and content
379
+ and isinstance(content[0], dict)
380
+ and "type" in content[0]
381
+ ):
382
+ # Already Anthropic format - pass through directly
383
+ anthropic_messages.append(msg)
384
+ continue
385
+
386
+ # Convert from OpenAI format
387
+ content = content or ""
388
+ if tool_calls:
389
+ # Convert to Anthropic format with tool_use content blocks
390
+ content_blocks: list[dict[str, Any]] = []
391
+
392
+ # Add text content if present
393
+ if content:
394
+ content_blocks.append(
395
+ {
396
+ "type": "text",
397
+ "text": content,
398
+ }
399
+ )
400
+
401
+ # Add tool_use blocks
402
+ for tc in tool_calls:
403
+ func = tc.get("function", {})
404
+ # Parse arguments if it's a JSON string
405
+ args = func.get("arguments", {})
406
+ if isinstance(args, str):
407
+ try:
408
+ import json
409
+
410
+ args = json.loads(args)
411
+ except json.JSONDecodeError:
412
+ args = {"raw": args}
413
+
414
+ content_blocks.append(
415
+ {
416
+ "type": "tool_use",
417
+ "id": tc.get("id", ""),
418
+ "name": func.get("name", ""),
419
+ "input": args,
420
+ }
421
+ )
422
+
423
+ anthropic_messages.append(
424
+ {
425
+ "role": "assistant",
426
+ "content": content_blocks,
427
+ }
428
+ )
429
+ else:
430
+ # No tool calls - pass through with content normalization
431
+ if isinstance(content, str):
432
+ anthropic_messages.append(
433
+ {
434
+ "role": "assistant",
435
+ "content": [{"type": "text", "text": content}]
436
+ if content
437
+ else [],
438
+ }
439
+ )
440
+ else:
441
+ # Already structured content
442
+ anthropic_messages.append(msg)
443
+ continue
444
+
445
+ # Handle user messages
446
+ if role == "user":
447
+ content = msg.get("content", "")
448
+ if isinstance(content, str):
449
+ anthropic_messages.append(
450
+ {
451
+ "role": "user",
452
+ "content": [{"type": "text", "text": content}],
453
+ }
454
+ )
455
+ else:
456
+ # Already structured content (could have images, etc.)
457
+ anthropic_messages.append(msg)
458
+ continue
459
+
460
+ # Pass through other messages (shouldn't happen often)
461
+ anthropic_messages.append(msg)
462
+
463
+ # Flush any remaining tool results
464
+ flush_tool_results()
465
+
466
+ return anthropic_messages
467
+
468
+ async def _run_completions(
469
+ self,
470
+ messages: list[dict[str, Any]],
471
+ tool_choice: str | dict[str, str] = "required",
472
+ ) -> AgentOutput:
473
+ """
474
+ Execute a LiteLLM completion-style call on behalf of the MAIL agent.
475
+ """
476
+ litellm.drop_params = True
477
+ messages, agent_tools = await self._preprocess(
478
+ messages, "completions", exclude_tools=self.exclude_tools
479
+ )
480
+
481
+ # Route all Anthropic models through native SDK for better support of:
482
+ # - Extended thinking / interleaved thinking
483
+ # - Server-side tools (web_search, code_interpreter)
484
+ # - Full response structure preservation
485
+ llm_lower = self.llm.lower()
486
+ if "anthropic" in llm_lower or "claude" in llm_lower:
487
+ # if self.stream_tokens:
488
+ # TODO: anthropic native needs to be streaming
489
+ return await self._stream_completions_anthropic_native(
490
+ messages, agent_tools, tool_choice
491
+ )
492
+ # else:
493
+ # return await self._run_completions_anthropic_native(
494
+ # messages, agent_tools, tool_choice
495
+ # )
496
+
497
+ retries = 5
498
+
499
+ with ls.trace(
500
+ name=f"{self.name}_completions",
501
+ run_type="llm",
502
+ inputs={
503
+ "messages": messages,
504
+ "tools": agent_tools,
505
+ "thinking": self.thinking,
506
+ "reasoning_effort": self.reasoning_effort,
507
+ "max_tokens": self.max_tokens,
508
+ "tool_choice": tool_choice,
509
+ },
510
+ ) as rt:
511
+ while retries > 0:
512
+ try:
513
+ if self.stream_tokens:
514
+ res = await self._stream_completions(
515
+ messages, agent_tools, tool_choice
516
+ )
517
+ else:
518
+ res = await acompletion(
519
+ model=self.llm,
520
+ messages=messages,
521
+ tools=agent_tools,
522
+ thinking=self.thinking,
523
+ reasoning_effort=self.reasoning_effort,
524
+ max_tokens=self.max_tokens,
525
+ tool_choice=tool_choice if len(agent_tools) > 0 else None,
526
+ extra_headers=self.extra_headers,
527
+ )
528
+ rt.end(outputs={"output": res})
529
+ break
530
+ except Exception as e:
531
+ retries -= 1
532
+ logger.warning(f"Error running completion: {e}")
533
+ logger.warning(f"Retrying {retries} more times")
534
+ await asyncio.sleep(retries)
535
+
536
+ msg = res.choices[0].message # type: ignore
537
+ tool_calls: list[AgentToolCall] = []
538
+ # Normalize assistant message to a dict so we can ensure consistent tool_call ids
539
+ assistant_dict = msg.to_dict() # type: ignore
540
+ if getattr(msg, "tool_calls", None):
541
+ for tc in msg.tool_calls: # type: ignore
542
+ call_id = tc.id
543
+ tool_calls.append(
544
+ AgentToolCall(
545
+ tool_name=tc.function.name, # type: ignore
546
+ tool_args=ujson.loads(tc.function.arguments),
547
+ tool_call_id=call_id,
548
+ completion=assistant_dict,
549
+ )
550
+ )
551
+ if len(tool_calls) == 0:
552
+ tool_calls.append(
553
+ AgentToolCall(
554
+ tool_name="text_output",
555
+ tool_args={"content": msg.content},
556
+ tool_call_id=str(uuid.uuid4()),
557
+ completion=assistant_dict,
558
+ )
559
+ )
560
+
561
+ return msg.content, tool_calls
562
+
563
+ async def _run_completions_anthropic_native(
564
+ self,
565
+ messages: list[dict[str, Any]],
566
+ agent_tools: list[dict[str, Any]],
567
+ tool_choice: str | dict[str, str] = "required",
568
+ ) -> AgentOutput:
569
+ """
570
+ Execute a native Anthropic API call with web_search built-in tools.
571
+ This preserves the full response structure including server_tool_use blocks.
572
+ """
573
+ client = wrap_anthropic(anthropic.AsyncAnthropic())
574
+
575
+ # Strip provider prefix from model name
576
+ model = self.llm
577
+ for prefix in ("anthropic/", "litellm_proxy/anthropic/", "litellm_proxy/"):
578
+ if model.startswith(prefix):
579
+ model = model[len(prefix) :]
580
+ break
581
+
582
+ # Extract system message - Anthropic expects it as a top-level parameter
583
+ system_content = None
584
+ filtered_messages = []
585
+ for msg in messages:
586
+ if msg.get("role") == "system":
587
+ system_content = msg.get("content", "")
588
+ else:
589
+ filtered_messages.append(msg)
590
+
591
+ # Convert messages from OpenAI/LiteLLM format to Anthropic format
592
+ # This handles tool results (role: "tool") and tool_calls in assistant messages
593
+ anthropic_messages = self._convert_messages_to_anthropic_format(
594
+ filtered_messages
595
+ )
596
+
597
+ # Convert tools to Anthropic format
598
+ anthropic_tools = self._convert_tools_to_anthropic_format(agent_tools)
599
+
600
+ # Build request params
601
+ request_params: dict[str, Any] = {
602
+ "model": model,
603
+ "messages": anthropic_messages,
604
+ "tools": anthropic_tools,
605
+ "max_tokens": 64000, # TODO: make this configurable - currently hardcoded to 64k
606
+ }
607
+
608
+ if system_content:
609
+ request_params["system"] = system_content
610
+
611
+ # Add thinking/extended thinking if enabled
612
+ thinking_enabled = self.thinking.get("type") == "enabled"
613
+ if thinking_enabled:
614
+ request_params["thinking"] = self.thinking
615
+ # Enable interleaved thinking for Claude 4 models via beta header
616
+ # This allows Claude to think between tool calls for more sophisticated reasoning
617
+ request_params["extra_headers"] = {
618
+ "anthropic-beta": "interleaved-thinking-2025-05-14"
619
+ }
620
+
621
+ # Handle tool_choice
622
+ # IMPORTANT: When thinking is enabled, only "auto" and "none" are supported.
623
+ # Using "any" or forced tool use will cause an error.
624
+ if tool_choice == "required":
625
+ if thinking_enabled:
626
+ # Fall back to "auto" when thinking is enabled - "any" is incompatible
627
+ logger.warning(
628
+ "tool_choice='required' is incompatible with extended thinking. "
629
+ "Falling back to tool_choice='auto'."
630
+ )
631
+ request_params["tool_choice"] = {"type": "auto"}
632
+ else:
633
+ request_params["tool_choice"] = {"type": "any"}
634
+ elif tool_choice == "auto":
635
+ request_params["tool_choice"] = {"type": "auto"}
636
+ elif isinstance(tool_choice, dict):
637
+ # Validate dict tool_choice when thinking is enabled
638
+ if thinking_enabled and tool_choice.get("type") in ("any", "tool"):
639
+ logger.warning(
640
+ f"tool_choice={tool_choice} is incompatible with extended thinking. "
641
+ "Falling back to tool_choice='auto'."
642
+ )
643
+ request_params["tool_choice"] = {"type": "auto"}
644
+ else:
645
+ request_params["tool_choice"] = tool_choice
646
+
647
+ response = await client.messages.create(**request_params)
648
+
649
+ # Handle pause_turn - model paused mid-generation (often during long thinking)
650
+ # We need to continue generation by sending the partial response back
651
+ all_content_blocks = list(response.content)
652
+ while response.stop_reason == "pause_turn":
653
+ logger.debug(
654
+ f"Received pause_turn, continuing generation (accumulated {len(all_content_blocks)} blocks)"
655
+ )
656
+ # Add partial response to messages so model can continue
657
+ anthropic_messages.append(
658
+ {
659
+ "role": "assistant",
660
+ "content": [block.model_dump() for block in response.content],
661
+ }
662
+ )
663
+ request_params["messages"] = anthropic_messages
664
+ response = await client.messages.create(**request_params)
665
+ # Accumulate content blocks from continuation
666
+ all_content_blocks.extend(response.content)
667
+
668
+ # Build assistant message from all accumulated content blocks
669
+ # This preserves thinking blocks, tool_use, text, etc. in Anthropic format
670
+ assistant_message: dict[str, Any] = {
671
+ "role": "assistant",
672
+ "content": [block.model_dump() for block in all_content_blocks],
673
+ }
674
+
675
+ # Parse response content blocks with interleaved thinking support
676
+ tool_calls: list[AgentToolCall] = []
677
+ text_chunks: list[str] = []
678
+ all_citations: list[dict[str, Any]] = []
679
+ web_search_results: dict[
680
+ str, list[dict[str, Any]]
681
+ ] = {} # tool_use_id -> results
682
+
683
+ # Track pending reasoning/preamble for interleaved association
684
+ pending_reasoning: list[str] = []
685
+ pending_preamble: list[str] = []
686
+
687
+ for block in all_content_blocks:
688
+ block_type = block.type
689
+
690
+ if block_type == "thinking":
691
+ # Capture thinking text for next tool call
692
+ thinking_text = getattr(block, "thinking", "")
693
+ if thinking_text:
694
+ pending_reasoning.append(thinking_text)
695
+
696
+ elif block_type == "redacted_thinking":
697
+ # Use placeholder for redacted thinking
698
+ pending_reasoning.append("[redacted thinking]")
699
+
700
+ elif block_type == "server_tool_use":
701
+ # Capture reasoning/preamble for this tool call
702
+ call_reasoning = pending_reasoning.copy() if pending_reasoning else None
703
+ call_preamble = (
704
+ "\n".join(pending_preamble) if pending_preamble else None
705
+ )
706
+ tool_calls.append(
707
+ AgentToolCall(
708
+ tool_name="web_search_call",
709
+ tool_args={
710
+ "query": block.input.get("query", ""),
711
+ "status": "completed",
712
+ },
713
+ tool_call_id=block.id,
714
+ completion=assistant_message,
715
+ reasoning=call_reasoning,
716
+ preamble=call_preamble,
717
+ )
718
+ )
719
+ pending_reasoning = []
720
+ pending_preamble = []
721
+
722
+ elif block_type == "web_search_tool_result":
723
+ # Extract search results and associate with tool call
724
+ results = []
725
+ for result in block.content:
726
+ if hasattr(result, "url"):
727
+ results.append(
728
+ {
729
+ "url": result.url,
730
+ "title": getattr(result, "title", ""),
731
+ "page_age": getattr(result, "page_age", None),
732
+ }
733
+ )
734
+ web_search_results[block.tool_use_id] = results
735
+
736
+ elif block_type == "text":
737
+ # Text blocks contribute to preamble (don't reset pending_reasoning)
738
+ text_chunks.append(block.text)
739
+ pending_preamble.append(block.text)
740
+ # Extract citations if present
741
+ if hasattr(block, "citations") and block.citations:
742
+ for citation in block.citations:
743
+ all_citations.append(
744
+ {
745
+ "url": getattr(citation, "url", ""),
746
+ "title": getattr(citation, "title", ""),
747
+ "cited_text": getattr(citation, "cited_text", ""),
748
+ }
749
+ )
750
+
751
+ elif block_type == "tool_use":
752
+ # Handle regular tool calls (non-server-side)
753
+ call_reasoning = pending_reasoning.copy() if pending_reasoning else None
754
+ call_preamble = (
755
+ "\n".join(pending_preamble) if pending_preamble else None
756
+ )
757
+ tool_calls.append(
758
+ AgentToolCall(
759
+ tool_name=block.name,
760
+ tool_args=block.input,
761
+ tool_call_id=block.id,
762
+ completion=assistant_message,
763
+ reasoning=call_reasoning,
764
+ preamble=call_preamble,
765
+ )
766
+ )
767
+ pending_reasoning = []
768
+ pending_preamble = []
769
+
770
+ # Update tool calls with their results
771
+ for tc in tool_calls:
772
+ if (
773
+ tc.tool_name == "web_search_call"
774
+ and tc.tool_call_id in web_search_results
775
+ ):
776
+ tc.tool_args["results"] = web_search_results[tc.tool_call_id]
777
+
778
+ # Add citations to the response if present
779
+ if all_citations:
780
+ for tc in tool_calls:
781
+ if tc.tool_name == "web_search_call":
782
+ tc.tool_args["citations"] = all_citations
783
+ break
784
+
785
+ content = "".join(text_chunks)
786
+
787
+ # If no tool calls, add text_output with any remaining reasoning
788
+ if len(tool_calls) == 0:
789
+ call_reasoning = pending_reasoning.copy() if pending_reasoning else None
790
+ tool_calls.append(
791
+ AgentToolCall(
792
+ tool_name="text_output",
793
+ tool_args={"content": content},
794
+ tool_call_id=str(uuid.uuid4()),
795
+ completion=assistant_message,
796
+ reasoning=call_reasoning,
797
+ preamble=None, # No preamble for text-only
798
+ )
799
+ )
800
+
801
+ return content, tool_calls
802
+
803
+ async def _stream_completions_anthropic_native(
804
+ self,
805
+ messages: list[dict[str, Any]],
806
+ agent_tools: list[dict[str, Any]],
807
+ tool_choice: str | dict[str, str] = "required",
808
+ ) -> AgentOutput:
809
+ """
810
+ Stream a native Anthropic API call with web_search built-in tools.
811
+ """
812
+ client = wrap_anthropic(anthropic.AsyncAnthropic())
813
+
814
+ # Strip provider prefix from model name
815
+ model = self.llm
816
+ for prefix in ("anthropic/", "litellm_proxy/anthropic/", "litellm_proxy/"):
817
+ if model.startswith(prefix):
818
+ model = model[len(prefix) :]
819
+ break
820
+
821
+ # Extract system message - Anthropic expects it as a top-level parameter
822
+ system_content = None
823
+ filtered_messages = []
824
+ for msg in messages:
825
+ if msg.get("role") == "system":
826
+ system_content = msg.get("content", "")
827
+ else:
828
+ filtered_messages.append(msg)
829
+
830
+ # Convert messages from OpenAI/LiteLLM format to Anthropic format
831
+ # This handles tool results (role: "tool") and tool_calls in assistant messages
832
+ anthropic_messages = self._convert_messages_to_anthropic_format(
833
+ filtered_messages
834
+ )
835
+
836
+ # Convert tools to Anthropic format
837
+ anthropic_tools = self._convert_tools_to_anthropic_format(agent_tools)
838
+
839
+ # Build request params
840
+ request_params: dict[str, Any] = {
841
+ "model": model,
842
+ "messages": anthropic_messages,
843
+ "tools": anthropic_tools,
844
+ "max_tokens": 64000, # TODO: make this configurable - currently hardcoded to 64k
845
+ }
846
+
847
+ if system_content:
848
+ request_params["system"] = system_content
849
+
850
+ # Add thinking/extended thinking if enabled
851
+ thinking_enabled = self.thinking.get("type") == "enabled"
852
+ if thinking_enabled:
853
+ request_params["thinking"] = self.thinking
854
+ # Enable interleaved thinking for Claude 4 models via beta header
855
+ # This allows Claude to think between tool calls for more sophisticated reasoning
856
+ request_params["extra_headers"] = {
857
+ "anthropic-beta": "interleaved-thinking-2025-05-14"
858
+ }
859
+
860
+ # Handle tool_choice
861
+ # IMPORTANT: When thinking is enabled, only "auto" and "none" are supported.
862
+ # Using "any" or forced tool use will cause an error.
863
+ if tool_choice == "required":
864
+ if thinking_enabled:
865
+ # Fall back to "auto" when thinking is enabled - "any" is incompatible
866
+ logger.warning(
867
+ "tool_choice='required' is incompatible with extended thinking. "
868
+ "Falling back to tool_choice='auto'."
869
+ )
870
+ request_params["tool_choice"] = {"type": "auto"}
871
+ else:
872
+ request_params["tool_choice"] = {"type": "any"}
873
+ elif tool_choice == "auto":
874
+ request_params["tool_choice"] = {"type": "auto"}
875
+ elif isinstance(tool_choice, dict):
876
+ # Validate dict tool_choice when thinking is enabled
877
+ if thinking_enabled and tool_choice.get("type") in ("any", "tool"):
878
+ logger.warning(
879
+ f"tool_choice={tool_choice} is incompatible with extended thinking. "
880
+ "Falling back to tool_choice='auto'."
881
+ )
882
+ request_params["tool_choice"] = {"type": "auto"}
883
+ else:
884
+ request_params["tool_choice"] = tool_choice
885
+
886
+ is_response = False
887
+ is_searching = False
888
+ is_reasoning = False
889
+
890
+ # Accumulate all content blocks across potential pause_turn continuations
891
+ all_content_blocks: list[Any] = []
892
+ final_message = None
893
+
894
+ while True:
895
+ async with client.messages.stream(**request_params) as stream:
896
+ async for event in stream:
897
+ event_type = event.type
898
+
899
+ if event_type == "content_block_start":
900
+ assert isinstance(event, ContentBlockStartEvent)
901
+ block = event.content_block
902
+ block_type = block.type
903
+
904
+ if block_type == "thinking":
905
+ if not is_reasoning:
906
+ rich.print(
907
+ f"\n\n[bold green]{'=' * 21} REASONING {'=' * 21}[/bold green]\n\n"
908
+ )
909
+ is_reasoning = True
910
+
911
+ elif block_type == "redacted_thinking":
912
+ # Redacted thinking blocks contain encrypted content
913
+ if not is_reasoning:
914
+ rich.print(
915
+ f"\n\n[bold green]{'=' * 21} REASONING {'=' * 21}[/bold green]\n\n"
916
+ )
917
+ is_reasoning = True
918
+ rich.print("[redacted thinking]", flush=True)
919
+
920
+ elif block_type == "server_tool_use":
921
+ if not is_searching:
922
+ rich.print(
923
+ f"\n\n[bold yellow]{'=' * 21} WEB SEARCH {'=' * 21}[/bold yellow]\n\n"
924
+ )
925
+ is_searching = True
926
+
927
+ elif block_type == "text":
928
+ if not is_response:
929
+ rich.print(
930
+ f"\n\n[bold blue]{'=' * 21} RESPONSE {'=' * 21}[/bold blue]\n\n"
931
+ )
932
+ is_response = True
933
+
934
+ elif event_type == "content_block_delta":
935
+ assert isinstance(event, ContentBlockDeltaEvent)
936
+ delta = event.delta
937
+ delta_type = delta.type
938
+
939
+ if delta_type == "thinking_delta":
940
+ assert isinstance(delta, ThinkingDelta)
941
+ print(delta.thinking, end="", flush=True)
942
+ elif delta_type == "text_delta":
943
+ assert isinstance(delta, TextDelta)
944
+ print(delta.text, end="", flush=True)
945
+
946
+ # Get the final message with full content
947
+ final_message = await stream.get_final_message()
948
+
949
+ # Accumulate content blocks from this stream
950
+ all_content_blocks.extend(final_message.content)
951
+
952
+ # Check if we need to continue (pause_turn means model paused mid-generation)
953
+ if final_message.stop_reason == "pause_turn":
954
+ logger.debug(
955
+ f"Received pause_turn in stream, continuing generation (accumulated {len(all_content_blocks)} blocks)"
956
+ )
957
+ # Add partial response to messages so model can continue
958
+ anthropic_messages.append(
959
+ {
960
+ "role": "assistant",
961
+ "content": [
962
+ block.model_dump() for block in final_message.content
963
+ ],
964
+ }
965
+ )
966
+ request_params["messages"] = anthropic_messages
967
+ # Continue the loop to start a new stream
968
+ else:
969
+ # Generation complete (end_turn, tool_use, etc.)
970
+ break
971
+
972
+ # Build assistant message from all accumulated content blocks
973
+ # This preserves thinking blocks, tool_use, text, etc. in Anthropic format
974
+ assistant_message: dict[str, Any] = {
975
+ "role": "assistant",
976
+ "content": [block.model_dump() for block in all_content_blocks],
977
+ }
978
+
979
+ # Process the final message to get complete data with interleaved thinking
980
+ tool_calls: list[AgentToolCall] = []
981
+ text_chunks: list[str] = []
982
+ all_citations: list[dict[str, Any]] = []
983
+ web_search_results: dict[str, list[dict[str, Any]]] = {}
984
+
985
+ # Track pending reasoning/preamble for interleaved association
986
+ pending_reasoning: list[str] = []
987
+ pending_preamble: list[str] = []
988
+
989
+ for block in all_content_blocks:
990
+ block_type = block.type
991
+
992
+ if block_type == "thinking":
993
+ # Capture thinking text for next tool call
994
+ thinking_text = getattr(block, "thinking", "")
995
+ if thinking_text:
996
+ pending_reasoning.append(thinking_text)
997
+
998
+ elif block_type == "redacted_thinking":
999
+ # Use placeholder for redacted thinking
1000
+ pending_reasoning.append("[redacted thinking]")
1001
+
1002
+ elif block_type == "server_tool_use":
1003
+ # Capture reasoning/preamble for this tool call
1004
+ call_reasoning = pending_reasoning.copy() if pending_reasoning else None
1005
+ call_preamble = (
1006
+ "\n".join(pending_preamble) if pending_preamble else None
1007
+ )
1008
+ tool_calls.append(
1009
+ AgentToolCall(
1010
+ tool_name="web_search_call",
1011
+ tool_args={
1012
+ "query": block.input.get("query", ""),
1013
+ "status": "completed",
1014
+ },
1015
+ tool_call_id=block.id,
1016
+ completion=assistant_message,
1017
+ reasoning=call_reasoning,
1018
+ preamble=call_preamble,
1019
+ )
1020
+ )
1021
+ pending_reasoning = []
1022
+ pending_preamble = []
1023
+
1024
+ elif block_type == "web_search_tool_result":
1025
+ results = []
1026
+ for result in block.content:
1027
+ if hasattr(result, "url"):
1028
+ results.append(
1029
+ {
1030
+ "url": result.url,
1031
+ "title": getattr(result, "title", ""),
1032
+ "page_age": getattr(result, "page_age", None),
1033
+ }
1034
+ )
1035
+ web_search_results[block.tool_use_id] = results
1036
+
1037
+ elif block_type == "text":
1038
+ # Text blocks contribute to preamble (don't reset pending_reasoning)
1039
+ text_chunks.append(block.text)
1040
+ pending_preamble.append(block.text)
1041
+ if hasattr(block, "citations") and block.citations:
1042
+ for citation in block.citations:
1043
+ all_citations.append(
1044
+ {
1045
+ "url": getattr(citation, "url", ""),
1046
+ "title": getattr(citation, "title", ""),
1047
+ "cited_text": getattr(citation, "cited_text", ""),
1048
+ }
1049
+ )
1050
+
1051
+ elif block_type == "tool_use":
1052
+ # Handle regular tool calls (non-server-side)
1053
+ call_reasoning = pending_reasoning.copy() if pending_reasoning else None
1054
+ call_preamble = (
1055
+ "\n".join(pending_preamble) if pending_preamble else None
1056
+ )
1057
+ tool_calls.append(
1058
+ AgentToolCall(
1059
+ tool_name=block.name,
1060
+ tool_args=block.input,
1061
+ tool_call_id=block.id,
1062
+ completion=assistant_message,
1063
+ reasoning=call_reasoning,
1064
+ preamble=call_preamble,
1065
+ )
1066
+ )
1067
+ pending_reasoning = []
1068
+ pending_preamble = []
1069
+
1070
+ # Update tool calls with their results
1071
+ for tc in tool_calls:
1072
+ if (
1073
+ tc.tool_name == "web_search_call"
1074
+ and tc.tool_call_id in web_search_results
1075
+ ):
1076
+ tc.tool_args["results"] = web_search_results[tc.tool_call_id]
1077
+
1078
+ # Add citations to the response if present
1079
+ if all_citations:
1080
+ for tc in tool_calls:
1081
+ if tc.tool_name == "web_search_call":
1082
+ tc.tool_args["citations"] = all_citations
1083
+ break
1084
+
1085
+ content = "".join(text_chunks)
1086
+
1087
+ # If no tool calls, add text_output with any remaining reasoning
1088
+ if len(tool_calls) == 0:
1089
+ call_reasoning = pending_reasoning.copy() if pending_reasoning else None
1090
+ tool_calls.append(
1091
+ AgentToolCall(
1092
+ tool_name="text_output",
1093
+ tool_args={"content": content},
1094
+ tool_call_id=str(uuid.uuid4()),
1095
+ completion=assistant_message,
1096
+ reasoning=call_reasoning,
1097
+ preamble=None, # No preamble for text-only
1098
+ )
1099
+ )
1100
+
1101
+ return content, tool_calls
1102
+
1103
+ async def _stream_completions(
1104
+ self,
1105
+ messages: list[dict[str, Any]],
1106
+ tools: list[dict[str, Any]],
1107
+ tool_choice: str | dict[str, str] = "required",
1108
+ ) -> ModelResponse:
1109
+ """
1110
+ Stream a LiteLLM completion-style call to the terminal.
1111
+ """
1112
+ litellm.drop_params = True
1113
+ stream = await acompletion(
1114
+ model=self.llm,
1115
+ messages=messages,
1116
+ tools=tools,
1117
+ thinking=self.thinking,
1118
+ reasoning_effort=self.reasoning_effort,
1119
+ max_tokens=self.max_tokens,
1120
+ tool_choice=tool_choice if len(tools) > 0 else None,
1121
+ extra_headers=self.extra_headers,
1122
+ stream=True,
1123
+ )
1124
+ chunks = []
1125
+ is_response = False
1126
+ is_reasoning = False
1127
+ async for chunk in stream:
1128
+ delta = chunk.choices[0].delta
1129
+ if getattr(delta, "reasoning_content", None) is not None:
1130
+ if not is_reasoning:
1131
+ rich.print(
1132
+ f"\n\n[bold green]{'=' * 21} REASONING {'=' * 21}[/bold green]\n\n"
1133
+ )
1134
+ is_reasoning = True
1135
+ rich.print(delta.reasoning_content, end="", flush=True)
1136
+ elif getattr(delta, "content", None) is not None:
1137
+ if not is_response:
1138
+ rich.print(
1139
+ f"\n\n[bold blue]{'=' * 21} RESPONSE {'=' * 21}[/bold blue]\n\n"
1140
+ )
1141
+ is_response = True
1142
+ rich.print(delta.content, end="", flush=True)
1143
+ chunks.append(chunk)
1144
+
1145
+ final_completion = litellm.stream_chunk_builder(chunks, messages=messages)
1146
+ assert isinstance(final_completion, ModelResponse)
1147
+ return final_completion
1148
+
1149
+ async def _run_responses(
1150
+ self,
1151
+ messages: list[dict[str, Any]],
1152
+ tool_choice: str | dict[str, str] = "required",
1153
+ ) -> AgentOutput:
1154
+ """
1155
+ Execute a LiteLLM responses-style call on behalf of the MAIL agent.
1156
+ """
1157
+ litellm.drop_params = True
1158
+ messages, agent_tools = await self._preprocess(
1159
+ messages, "responses", exclude_tools=self.exclude_tools
1160
+ )
1161
+ retries = 5
1162
+ with ls.trace(
1163
+ name=f"{self.name}_responses",
1164
+ run_type="llm",
1165
+ inputs={
1166
+ "messages": messages,
1167
+ "tools": agent_tools,
1168
+ "thinking": self.thinking,
1169
+ "reasoning_effort": self.reasoning_effort,
1170
+ "max_tokens": self.max_tokens,
1171
+ "tool_choice": tool_choice,
1172
+ },
1173
+ ) as rt:
1174
+ include: list[str] = ["code_interpreter_call.outputs"]
1175
+ reasoning: dict[str, Any] = {}
1176
+ if litellm.supports_reasoning(self.llm):
1177
+ include.append("reasoning.encrypted_content")
1178
+ reasoning = {
1179
+ "effort": self.reasoning_effort or "medium",
1180
+ "summary": "auto",
1181
+ }
1182
+ # Track streaming reasoning data (None for non-streaming)
1183
+ tool_reasoning_map: dict[int, list[str]] | None = None
1184
+ streaming_pending_reasoning: list[str] | None = None
1185
+
1186
+ while retries > 0:
1187
+ try:
1188
+ if self.stream_tokens:
1189
+ # Streaming returns 3-tuple with reasoning tracking
1190
+ (
1191
+ res,
1192
+ tool_reasoning_map,
1193
+ streaming_pending_reasoning,
1194
+ ) = await self._stream_responses(
1195
+ messages, include, reasoning, agent_tools, tool_choice
1196
+ )
1197
+ else:
1198
+ res = await aresponses(
1199
+ input=messages,
1200
+ model=self.llm,
1201
+ max_output_tokens=self.max_tokens,
1202
+ include=include,
1203
+ reasoning=reasoning,
1204
+ tool_choice=tool_choice,
1205
+ tools=agent_tools,
1206
+ extra_headers=self.extra_headers,
1207
+ )
1208
+ rt.end(outputs={"output": res})
1209
+ break
1210
+ except Exception as e:
1211
+ retries -= 1
1212
+ logger.warning(f"Error running responses: {e}")
1213
+ logger.warning(f"Retrying {retries} more times")
1214
+ await asyncio.sleep(retries)
1215
+
1216
+ # Single-pass collection preserving original order with reasoning attachment
1217
+ agent_tool_calls: list[AgentToolCall] = []
1218
+ res_dict = res.model_dump()
1219
+ outputs = res_dict["output"]
1220
+
1221
+ # Track pending reasoning/preamble for interleaved association
1222
+ pending_reasoning: list[str] = []
1223
+ pending_preamble: list[str] = []
1224
+ first_message_text: str | None = None
1225
+
1226
+ # Helper to get output type (dict or object)
1227
+ def get_output_type(output: Any) -> str | None:
1228
+ if isinstance(output, dict):
1229
+ return output.get("type")
1230
+ return getattr(output, "type", None)
1231
+
1232
+ for i, output in enumerate(res.output):
1233
+ output_type = get_output_type(output)
1234
+
1235
+ if output_type == "reasoning":
1236
+ # Hold reasoning blocks for next tool call
1237
+ # Handle both dict and object formats
1238
+ summary = (
1239
+ output.get("summary")
1240
+ if isinstance(output, dict)
1241
+ else getattr(output, "summary", None)
1242
+ )
1243
+ if summary:
1244
+ for s in summary:
1245
+ text = (
1246
+ s.get("text")
1247
+ if isinstance(s, dict)
1248
+ else getattr(s, "text", None)
1249
+ )
1250
+ if text:
1251
+ pending_reasoning.append(text)
1252
+
1253
+ elif output_type == "message":
1254
+ # Message content - collect for preamble AND text_output fallback
1255
+ content = (
1256
+ output.get("content")
1257
+ if isinstance(output, dict)
1258
+ else getattr(output, "content", None)
1259
+ )
1260
+ if content:
1261
+ for part in content:
1262
+ text = (
1263
+ part.get("text")
1264
+ if isinstance(part, dict)
1265
+ else getattr(part, "text", None)
1266
+ )
1267
+ if text:
1268
+ pending_preamble.append(text)
1269
+ if first_message_text is None:
1270
+ first_message_text = text
1271
+
1272
+ elif output_type == "function_call":
1273
+ # Get reasoning - from inline extraction OR from streaming map
1274
+ call_reasoning = pending_reasoning.copy() if pending_reasoning else None
1275
+ call_preamble = (
1276
+ "\n".join(pending_preamble) if pending_preamble else None
1277
+ )
1278
+
1279
+ # For streaming: fill reasoning from map if inline extraction empty
1280
+ if tool_reasoning_map and i in tool_reasoning_map:
1281
+ map_reasoning = tool_reasoning_map[i]
1282
+ if not call_reasoning and map_reasoning:
1283
+ call_reasoning = map_reasoning
1284
+
1285
+ # Handle both dict and object formats
1286
+ if isinstance(output, dict):
1287
+ call_id = output["call_id"]
1288
+ name = output["name"]
1289
+ arguments = output["arguments"]
1290
+ else:
1291
+ assert isinstance(output, ResponseFunctionToolCall)
1292
+ call_id = output.call_id
1293
+ name = output.name
1294
+ arguments = output.arguments
1295
+
1296
+ agent_tool_calls.append(
1297
+ AgentToolCall(
1298
+ tool_name=name,
1299
+ tool_args=ujson.loads(arguments),
1300
+ tool_call_id=call_id,
1301
+ responses=outputs,
1302
+ reasoning=call_reasoning,
1303
+ preamble=call_preamble,
1304
+ )
1305
+ )
1306
+ pending_reasoning = []
1307
+ pending_preamble = []
1308
+
1309
+ elif output_type == "web_search_call":
1310
+ call_reasoning = pending_reasoning.copy() if pending_reasoning else None
1311
+ call_preamble = (
1312
+ "\n".join(pending_preamble) if pending_preamble else None
1313
+ )
1314
+ if tool_reasoning_map and i in tool_reasoning_map:
1315
+ map_reasoning = tool_reasoning_map[i]
1316
+ if not call_reasoning and map_reasoning:
1317
+ call_reasoning = map_reasoning
1318
+
1319
+ # Handle both dict and object - get fields safely
1320
+ btc_id = (
1321
+ output.get("id")
1322
+ if isinstance(output, dict)
1323
+ else getattr(output, "id", "")
1324
+ )
1325
+ btc_status = (
1326
+ output.get("status")
1327
+ if isinstance(output, dict)
1328
+ else getattr(output, "status", "completed")
1329
+ )
1330
+ action = (
1331
+ output.get("action", {})
1332
+ if isinstance(output, dict)
1333
+ else getattr(output, "action", {})
1334
+ )
1335
+ if isinstance(action, dict):
1336
+ query = action.get("query", "")
1337
+ search_type = action.get("type", "")
1338
+ else:
1339
+ query = getattr(action, "query", "")
1340
+ search_type = getattr(action, "type", "")
1341
+
1342
+ agent_tool_calls.append(
1343
+ AgentToolCall(
1344
+ tool_name="web_search_call",
1345
+ tool_args={
1346
+ "query": query,
1347
+ "search_type": search_type,
1348
+ "status": btc_status,
1349
+ },
1350
+ tool_call_id=btc_id or "",
1351
+ responses=outputs,
1352
+ reasoning=call_reasoning,
1353
+ preamble=call_preamble,
1354
+ )
1355
+ )
1356
+ pending_reasoning = []
1357
+ pending_preamble = []
1358
+
1359
+ elif output_type == "code_interpreter_call":
1360
+ call_reasoning = pending_reasoning.copy() if pending_reasoning else None
1361
+ call_preamble = (
1362
+ "\n".join(pending_preamble) if pending_preamble else None
1363
+ )
1364
+ if tool_reasoning_map and i in tool_reasoning_map:
1365
+ map_reasoning = tool_reasoning_map[i]
1366
+ if not call_reasoning and map_reasoning:
1367
+ call_reasoning = map_reasoning
1368
+
1369
+ # Handle both dict and object
1370
+ btc_id = (
1371
+ output.get("id")
1372
+ if isinstance(output, dict)
1373
+ else getattr(output, "id", "")
1374
+ )
1375
+ btc_status = (
1376
+ output.get("status")
1377
+ if isinstance(output, dict)
1378
+ else getattr(output, "status", "completed")
1379
+ )
1380
+ btc_code = (
1381
+ output.get("code")
1382
+ if isinstance(output, dict)
1383
+ else getattr(output, "code", "")
1384
+ )
1385
+ btc_outputs = (
1386
+ output.get("outputs")
1387
+ if isinstance(output, dict)
1388
+ else getattr(output, "outputs", [])
1389
+ )
1390
+
1391
+ agent_tool_calls.append(
1392
+ AgentToolCall(
1393
+ tool_name="code_interpreter_call",
1394
+ tool_args={
1395
+ "code": btc_code,
1396
+ "outputs": btc_outputs,
1397
+ "status": btc_status,
1398
+ },
1399
+ tool_call_id=btc_id or "",
1400
+ responses=outputs,
1401
+ reasoning=call_reasoning,
1402
+ preamble=call_preamble,
1403
+ )
1404
+ )
1405
+ pending_reasoning = []
1406
+ pending_preamble = []
1407
+
1408
+ # If no tool calls, create text_output with message content
1409
+ if not agent_tool_calls and first_message_text:
1410
+ # For text-only: use inline pending_reasoning, OR streaming fallback
1411
+ call_reasoning = pending_reasoning.copy() if pending_reasoning else None
1412
+ if not call_reasoning and streaming_pending_reasoning:
1413
+ call_reasoning = streaming_pending_reasoning
1414
+
1415
+ agent_tool_calls.append(
1416
+ AgentToolCall(
1417
+ tool_name="text_output",
1418
+ tool_args={"content": first_message_text},
1419
+ tool_call_id=str(uuid.uuid4()),
1420
+ responses=outputs,
1421
+ reasoning=call_reasoning,
1422
+ preamble=None, # No preamble for text-only
1423
+ )
1424
+ )
1425
+ return first_message_text, agent_tool_calls
1426
+
1427
+ return "", agent_tool_calls
1428
+
1429
+ async def _stream_responses(
1430
+ self,
1431
+ messages: list[dict[str, Any]],
1432
+ include: list[str],
1433
+ reasoning: dict[str, Any],
1434
+ tools: list[dict[str, Any]],
1435
+ tool_choice: str | dict[str, str] = "required",
1436
+ ) -> tuple[ResponsesAPIResponse, dict[int, list[str]], list[str]]:
1437
+ """
1438
+ Stream a LiteLLM responses-style call to the terminal.
1439
+
1440
+ Returns:
1441
+ A 3-tuple of (response, tool_reasoning_map, pending_reasoning):
1442
+ - response: The final ResponsesAPIResponse
1443
+ - tool_reasoning_map: Dict mapping output_index to list of reasoning blocks
1444
+ - pending_reasoning: Any reasoning that wasn't associated with a tool (for text-only)
1445
+ """
1446
+ litellm.drop_params = True
1447
+ stream = await aresponses(
1448
+ input=messages,
1449
+ model=self.llm,
1450
+ max_output_tokens=self.max_tokens,
1451
+ include=include,
1452
+ reasoning=reasoning,
1453
+ tool_choice=tool_choice,
1454
+ tools=tools,
1455
+ extra_headers=self.extra_headers,
1456
+ stream=True,
1457
+ )
1458
+
1459
+ final_response = None
1460
+
1461
+ # Track interleaved reasoning per tool
1462
+ pending_reasoning_parts: list[str] = [] # Completed reasoning blocks
1463
+ current_reasoning_text: list[str] = [] # Delta accumulator for current block
1464
+ tool_reasoning_map: dict[
1465
+ int, list[str]
1466
+ ] = {} # output_index -> reasoning_blocks
1467
+
1468
+ async for event in stream:
1469
+ match event.type:
1470
+ case "response.created":
1471
+ rich.print(
1472
+ f"\n\n[bold green]{'=' * 21} REASONING {'=' * 21}[/bold green]\n\n"
1473
+ )
1474
+ case "response.reasoning_summary_text.delta":
1475
+ # Stream reasoning text and accumulate for mapping
1476
+ rich.print(event.delta, end="", flush=True)
1477
+ current_reasoning_text.append(event.delta)
1478
+
1479
+ case "response.reasoning_summary_part.done":
1480
+ # Reasoning part complete - finalize the block
1481
+ rich.print("\n\n")
1482
+ if current_reasoning_text:
1483
+ pending_reasoning_parts.append("".join(current_reasoning_text))
1484
+ current_reasoning_text = []
1485
+
1486
+ case "response.output_item.added":
1487
+ # Handle both dict and object formats
1488
+ item_type = (
1489
+ event.item.get("type")
1490
+ if isinstance(event.item, dict)
1491
+ else getattr(event.item, "type", None)
1492
+ )
1493
+
1494
+ # When a tool output starts, capture pending reasoning for it
1495
+ if item_type in (
1496
+ "function_call",
1497
+ "web_search_call",
1498
+ "code_interpreter_call",
1499
+ ):
1500
+ # Finalize any in-progress reasoning block
1501
+ if current_reasoning_text:
1502
+ pending_reasoning_parts.append(
1503
+ "".join(current_reasoning_text)
1504
+ )
1505
+ current_reasoning_text = []
1506
+ # Store reasoning for this tool's output_index
1507
+ tool_reasoning_map[event.output_index] = (
1508
+ pending_reasoning_parts.copy()
1509
+ if pending_reasoning_parts
1510
+ else []
1511
+ )
1512
+ pending_reasoning_parts = []
1513
+
1514
+ if item_type == "message":
1515
+ rich.print(
1516
+ f"\n\n[bold blue]{'=' * 21} RESPONSE {'=' * 21}[/bold blue]\n\n"
1517
+ )
1518
+
1519
+ case "response.output_text.delta":
1520
+ rich.print(event.delta, end="", flush=True)
1521
+
1522
+ case "response.completed":
1523
+ # Defensive: flush any remaining reasoning text
1524
+ if current_reasoning_text:
1525
+ pending_reasoning_parts.append("".join(current_reasoning_text))
1526
+ current_reasoning_text = []
1527
+ final_response = event.response
1528
+
1529
+ assert final_response is not None
1530
+ assert isinstance(final_response, ResponsesAPIResponse)
1531
+ return final_response, tool_reasoning_map, pending_reasoning_parts