openai-agents 0.2.6__py3-none-any.whl → 0.6.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. agents/__init__.py +105 -4
  2. agents/_debug.py +15 -4
  3. agents/_run_impl.py +1203 -96
  4. agents/agent.py +294 -21
  5. agents/apply_diff.py +329 -0
  6. agents/editor.py +47 -0
  7. agents/exceptions.py +35 -0
  8. agents/extensions/experimental/__init__.py +6 -0
  9. agents/extensions/experimental/codex/__init__.py +92 -0
  10. agents/extensions/experimental/codex/codex.py +89 -0
  11. agents/extensions/experimental/codex/codex_options.py +35 -0
  12. agents/extensions/experimental/codex/codex_tool.py +1142 -0
  13. agents/extensions/experimental/codex/events.py +162 -0
  14. agents/extensions/experimental/codex/exec.py +263 -0
  15. agents/extensions/experimental/codex/items.py +245 -0
  16. agents/extensions/experimental/codex/output_schema_file.py +50 -0
  17. agents/extensions/experimental/codex/payloads.py +31 -0
  18. agents/extensions/experimental/codex/thread.py +214 -0
  19. agents/extensions/experimental/codex/thread_options.py +54 -0
  20. agents/extensions/experimental/codex/turn_options.py +36 -0
  21. agents/extensions/handoff_filters.py +13 -1
  22. agents/extensions/memory/__init__.py +120 -0
  23. agents/extensions/memory/advanced_sqlite_session.py +1285 -0
  24. agents/extensions/memory/async_sqlite_session.py +239 -0
  25. agents/extensions/memory/dapr_session.py +423 -0
  26. agents/extensions/memory/encrypt_session.py +185 -0
  27. agents/extensions/memory/redis_session.py +261 -0
  28. agents/extensions/memory/sqlalchemy_session.py +334 -0
  29. agents/extensions/models/litellm_model.py +449 -36
  30. agents/extensions/models/litellm_provider.py +3 -1
  31. agents/function_schema.py +47 -5
  32. agents/guardrail.py +16 -2
  33. agents/{handoffs.py → handoffs/__init__.py} +89 -47
  34. agents/handoffs/history.py +268 -0
  35. agents/items.py +238 -13
  36. agents/lifecycle.py +75 -14
  37. agents/mcp/server.py +280 -37
  38. agents/mcp/util.py +24 -3
  39. agents/memory/__init__.py +22 -2
  40. agents/memory/openai_conversations_session.py +91 -0
  41. agents/memory/openai_responses_compaction_session.py +249 -0
  42. agents/memory/session.py +19 -261
  43. agents/memory/sqlite_session.py +275 -0
  44. agents/memory/util.py +20 -0
  45. agents/model_settings.py +18 -3
  46. agents/models/__init__.py +13 -0
  47. agents/models/chatcmpl_converter.py +303 -50
  48. agents/models/chatcmpl_helpers.py +63 -0
  49. agents/models/chatcmpl_stream_handler.py +290 -68
  50. agents/models/default_models.py +58 -0
  51. agents/models/interface.py +4 -0
  52. agents/models/openai_chatcompletions.py +103 -48
  53. agents/models/openai_provider.py +10 -4
  54. agents/models/openai_responses.py +167 -46
  55. agents/realtime/__init__.py +4 -0
  56. agents/realtime/_util.py +14 -3
  57. agents/realtime/agent.py +7 -0
  58. agents/realtime/audio_formats.py +53 -0
  59. agents/realtime/config.py +78 -10
  60. agents/realtime/events.py +18 -0
  61. agents/realtime/handoffs.py +2 -2
  62. agents/realtime/items.py +17 -1
  63. agents/realtime/model.py +13 -0
  64. agents/realtime/model_events.py +12 -0
  65. agents/realtime/model_inputs.py +18 -1
  66. agents/realtime/openai_realtime.py +700 -151
  67. agents/realtime/session.py +309 -32
  68. agents/repl.py +7 -3
  69. agents/result.py +197 -38
  70. agents/run.py +1053 -178
  71. agents/run_context.py +13 -2
  72. agents/stream_events.py +1 -0
  73. agents/strict_schema.py +14 -0
  74. agents/tool.py +413 -15
  75. agents/tool_context.py +22 -1
  76. agents/tool_guardrails.py +279 -0
  77. agents/tracing/__init__.py +2 -0
  78. agents/tracing/config.py +9 -0
  79. agents/tracing/create.py +4 -0
  80. agents/tracing/processor_interface.py +84 -11
  81. agents/tracing/processors.py +65 -54
  82. agents/tracing/provider.py +64 -7
  83. agents/tracing/spans.py +105 -0
  84. agents/tracing/traces.py +116 -16
  85. agents/usage.py +134 -12
  86. agents/util/_json.py +19 -1
  87. agents/util/_transforms.py +12 -2
  88. agents/voice/input.py +5 -4
  89. agents/voice/models/openai_stt.py +17 -9
  90. agents/voice/pipeline.py +2 -0
  91. agents/voice/pipeline_config.py +4 -0
  92. {openai_agents-0.2.6.dist-info → openai_agents-0.6.8.dist-info}/METADATA +44 -19
  93. openai_agents-0.6.8.dist-info/RECORD +134 -0
  94. {openai_agents-0.2.6.dist-info → openai_agents-0.6.8.dist-info}/WHEEL +1 -1
  95. openai_agents-0.2.6.dist-info/RECORD +0 -103
  96. {openai_agents-0.2.6.dist-info → openai_agents-0.6.8.dist-info}/licenses/LICENSE +0 -0
agents/_run_impl.py CHANGED
@@ -3,12 +3,14 @@ from __future__ import annotations
3
3
  import asyncio
4
4
  import dataclasses
5
5
  import inspect
6
- from collections.abc import Awaitable
6
+ import json
7
+ from collections.abc import Awaitable, Mapping, Sequence
7
8
  from dataclasses import dataclass, field
8
- from typing import TYPE_CHECKING, Any, cast
9
+ from typing import TYPE_CHECKING, Any, Literal, Optional, cast
9
10
 
10
11
  from openai.types.responses import (
11
12
  ResponseComputerToolCall,
13
+ ResponseCustomToolCall,
12
14
  ResponseFileSearchToolCall,
13
15
  ResponseFunctionToolCall,
14
16
  ResponseFunctionWebSearch,
@@ -44,10 +46,18 @@ from openai.types.responses.response_reasoning_item import ResponseReasoningItem
44
46
  from .agent import Agent, ToolsToFinalOutputResult
45
47
  from .agent_output import AgentOutputSchemaBase
46
48
  from .computer import AsyncComputer, Computer
47
- from .exceptions import AgentsException, ModelBehaviorError, UserError
49
+ from .editor import ApplyPatchOperation, ApplyPatchResult
50
+ from .exceptions import (
51
+ AgentsException,
52
+ ModelBehaviorError,
53
+ ToolInputGuardrailTripwireTriggered,
54
+ ToolOutputGuardrailTripwireTriggered,
55
+ UserError,
56
+ )
48
57
  from .guardrail import InputGuardrail, InputGuardrailResult, OutputGuardrail, OutputGuardrailResult
49
- from .handoffs import Handoff, HandoffInputData
58
+ from .handoffs import Handoff, HandoffInputData, nest_handoff_history
50
59
  from .items import (
60
+ CompactionItem,
51
61
  HandoffCallItem,
52
62
  HandoffOutputItem,
53
63
  ItemHelpers,
@@ -66,9 +76,10 @@ from .lifecycle import RunHooks
66
76
  from .logger import logger
67
77
  from .model_settings import ModelSettings
68
78
  from .models.interface import ModelTracing
69
- from .run_context import RunContextWrapper, TContext
79
+ from .run_context import AgentHookContext, RunContextWrapper, TContext
70
80
  from .stream_events import RunItemStreamEvent, StreamEvent
71
81
  from .tool import (
82
+ ApplyPatchTool,
72
83
  ComputerTool,
73
84
  ComputerToolSafetyCheckData,
74
85
  FunctionTool,
@@ -77,12 +88,27 @@ from .tool import (
77
88
  LocalShellCommandRequest,
78
89
  LocalShellTool,
79
90
  MCPToolApprovalRequest,
91
+ ShellActionRequest,
92
+ ShellCallData,
93
+ ShellCallOutcome,
94
+ ShellCommandOutput,
95
+ ShellCommandRequest,
96
+ ShellResult,
97
+ ShellTool,
80
98
  Tool,
99
+ resolve_computer,
81
100
  )
82
101
  from .tool_context import ToolContext
102
+ from .tool_guardrails import (
103
+ ToolInputGuardrailData,
104
+ ToolInputGuardrailResult,
105
+ ToolOutputGuardrailData,
106
+ ToolOutputGuardrailResult,
107
+ )
83
108
  from .tracing import (
84
109
  SpanError,
85
110
  Trace,
111
+ TracingConfig,
86
112
  function_span,
87
113
  get_current_trace,
88
114
  guardrail_span,
@@ -136,7 +162,7 @@ class ToolRunFunction:
136
162
  @dataclass
137
163
  class ToolRunComputerAction:
138
164
  tool_call: ResponseComputerToolCall
139
- computer_tool: ComputerTool
165
+ computer_tool: ComputerTool[Any]
140
166
 
141
167
 
142
168
  @dataclass
@@ -151,6 +177,18 @@ class ToolRunLocalShellCall:
151
177
  local_shell_tool: LocalShellTool
152
178
 
153
179
 
180
+ @dataclass
181
+ class ToolRunShellCall:
182
+ tool_call: Any
183
+ shell_tool: ShellTool
184
+
185
+
186
+ @dataclass
187
+ class ToolRunApplyPatchCall:
188
+ tool_call: Any
189
+ apply_patch_tool: ApplyPatchTool
190
+
191
+
154
192
  @dataclass
155
193
  class ProcessedResponse:
156
194
  new_items: list[RunItem]
@@ -158,6 +196,8 @@ class ProcessedResponse:
158
196
  functions: list[ToolRunFunction]
159
197
  computer_actions: list[ToolRunComputerAction]
160
198
  local_shell_calls: list[ToolRunLocalShellCall]
199
+ shell_calls: list[ToolRunShellCall]
200
+ apply_patch_calls: list[ToolRunApplyPatchCall]
161
201
  tools_used: list[str] # Names of all tools used, including hosted tools
162
202
  mcp_approval_requests: list[ToolRunMCPApprovalRequest] # Only requests with callbacks
163
203
 
@@ -170,6 +210,8 @@ class ProcessedResponse:
170
210
  self.functions,
171
211
  self.computer_actions,
172
212
  self.local_shell_calls,
213
+ self.shell_calls,
214
+ self.apply_patch_calls,
173
215
  self.mcp_approval_requests,
174
216
  ]
175
217
  )
@@ -203,16 +245,30 @@ class SingleStepResult:
203
245
  """Items generated before the current step."""
204
246
 
205
247
  new_step_items: list[RunItem]
206
- """Items generated during this current step."""
248
+ """Items generated during this current step. May be filtered during handoffs to avoid
249
+ duplication in model input."""
207
250
 
208
251
  next_step: NextStepHandoff | NextStepFinalOutput | NextStepRunAgain
209
252
  """The next step to take."""
210
253
 
254
+ tool_input_guardrail_results: list[ToolInputGuardrailResult]
255
+ """Tool input guardrail results from this step."""
256
+
257
+ tool_output_guardrail_results: list[ToolOutputGuardrailResult]
258
+ """Tool output guardrail results from this step."""
259
+
260
+ session_step_items: list[RunItem] | None = None
261
+ """Full unfiltered items for session history. When set, these are used instead of
262
+ new_step_items for session saving and generated_items property."""
263
+
211
264
  @property
212
265
  def generated_items(self) -> list[RunItem]:
213
266
  """Items generated during the agent run (i.e. everything generated after
214
- `original_input`)."""
215
- return self.pre_step_items + self.new_step_items
267
+ `original_input`). Uses session_step_items when available for full observability."""
268
+ items = (
269
+ self.session_step_items if self.session_step_items is not None else self.new_step_items
270
+ )
271
+ return self.pre_step_items + items
216
272
 
217
273
 
218
274
  def get_model_tracing_impl(
@@ -249,8 +305,15 @@ class RunImpl:
249
305
  new_step_items: list[RunItem] = []
250
306
  new_step_items.extend(processed_response.new_items)
251
307
 
252
- # First, lets run the tool calls - function tools and computer actions
253
- function_results, computer_results = await asyncio.gather(
308
+ # First, run function tools, computer actions, shell calls, apply_patch calls,
309
+ # and legacy local shell calls.
310
+ (
311
+ (function_results, tool_input_guardrail_results, tool_output_guardrail_results),
312
+ computer_results,
313
+ shell_results,
314
+ apply_patch_results,
315
+ local_shell_results,
316
+ ) = await asyncio.gather(
254
317
  cls.execute_function_tool_calls(
255
318
  agent=agent,
256
319
  tool_runs=processed_response.functions,
@@ -265,9 +328,33 @@ class RunImpl:
265
328
  context_wrapper=context_wrapper,
266
329
  config=run_config,
267
330
  ),
331
+ cls.execute_shell_calls(
332
+ agent=agent,
333
+ calls=processed_response.shell_calls,
334
+ hooks=hooks,
335
+ context_wrapper=context_wrapper,
336
+ config=run_config,
337
+ ),
338
+ cls.execute_apply_patch_calls(
339
+ agent=agent,
340
+ calls=processed_response.apply_patch_calls,
341
+ hooks=hooks,
342
+ context_wrapper=context_wrapper,
343
+ config=run_config,
344
+ ),
345
+ cls.execute_local_shell_calls(
346
+ agent=agent,
347
+ calls=processed_response.local_shell_calls,
348
+ hooks=hooks,
349
+ context_wrapper=context_wrapper,
350
+ config=run_config,
351
+ ),
268
352
  )
269
353
  new_step_items.extend([result.run_item for result in function_results])
270
354
  new_step_items.extend(computer_results)
355
+ new_step_items.extend(shell_results)
356
+ new_step_items.extend(apply_patch_results)
357
+ new_step_items.extend(local_shell_results)
271
358
 
272
359
  # Next, run the MCP approval requests
273
360
  if processed_response.mcp_approval_requests:
@@ -320,6 +407,8 @@ class RunImpl:
320
407
  final_output=check_tool_use.final_output,
321
408
  hooks=hooks,
322
409
  context_wrapper=context_wrapper,
410
+ tool_input_guardrail_results=tool_input_guardrail_results,
411
+ tool_output_guardrail_results=tool_output_guardrail_results,
323
412
  )
324
413
 
325
414
  # Now we can check if the model also produced a final output
@@ -330,43 +419,46 @@ class RunImpl:
330
419
  ItemHelpers.extract_last_text(message_items[-1].raw_item) if message_items else None
331
420
  )
332
421
 
333
- # There are two possibilities that lead to a final output:
334
- # 1. Structured output schema => always leads to a final output
335
- # 2. Plain text output schema => only leads to a final output if there are no tool calls
336
- if output_schema and not output_schema.is_plain_text() and potential_final_output_text:
337
- final_output = output_schema.validate_json(potential_final_output_text)
338
- return await cls.execute_final_output(
339
- agent=agent,
340
- original_input=original_input,
341
- new_response=new_response,
342
- pre_step_items=pre_step_items,
343
- new_step_items=new_step_items,
344
- final_output=final_output,
345
- hooks=hooks,
346
- context_wrapper=context_wrapper,
347
- )
348
- elif (
349
- not output_schema or output_schema.is_plain_text()
350
- ) and not processed_response.has_tools_or_approvals_to_run():
351
- return await cls.execute_final_output(
352
- agent=agent,
353
- original_input=original_input,
354
- new_response=new_response,
355
- pre_step_items=pre_step_items,
356
- new_step_items=new_step_items,
357
- final_output=potential_final_output_text or "",
358
- hooks=hooks,
359
- context_wrapper=context_wrapper,
360
- )
361
- else:
362
- # If there's no final output, we can just run again
363
- return SingleStepResult(
364
- original_input=original_input,
365
- model_response=new_response,
366
- pre_step_items=pre_step_items,
367
- new_step_items=new_step_items,
368
- next_step=NextStepRunAgain(),
369
- )
422
+ # Generate final output only when there are no pending tool calls or approval requests.
423
+ if not processed_response.has_tools_or_approvals_to_run():
424
+ if output_schema and not output_schema.is_plain_text() and potential_final_output_text:
425
+ final_output = output_schema.validate_json(potential_final_output_text)
426
+ return await cls.execute_final_output(
427
+ agent=agent,
428
+ original_input=original_input,
429
+ new_response=new_response,
430
+ pre_step_items=pre_step_items,
431
+ new_step_items=new_step_items,
432
+ final_output=final_output,
433
+ hooks=hooks,
434
+ context_wrapper=context_wrapper,
435
+ tool_input_guardrail_results=tool_input_guardrail_results,
436
+ tool_output_guardrail_results=tool_output_guardrail_results,
437
+ )
438
+ elif not output_schema or output_schema.is_plain_text():
439
+ return await cls.execute_final_output(
440
+ agent=agent,
441
+ original_input=original_input,
442
+ new_response=new_response,
443
+ pre_step_items=pre_step_items,
444
+ new_step_items=new_step_items,
445
+ final_output=potential_final_output_text or "",
446
+ hooks=hooks,
447
+ context_wrapper=context_wrapper,
448
+ tool_input_guardrail_results=tool_input_guardrail_results,
449
+ tool_output_guardrail_results=tool_output_guardrail_results,
450
+ )
451
+
452
+ # If there's no final output, we can just run again
453
+ return SingleStepResult(
454
+ original_input=original_input,
455
+ model_response=new_response,
456
+ pre_step_items=pre_step_items,
457
+ new_step_items=new_step_items,
458
+ next_step=NextStepRunAgain(),
459
+ tool_input_guardrail_results=tool_input_guardrail_results,
460
+ tool_output_guardrail_results=tool_output_guardrail_results,
461
+ )
370
462
 
371
463
  @classmethod
372
464
  def maybe_reset_tool_choice(
@@ -380,6 +472,22 @@ class RunImpl:
380
472
 
381
473
  return model_settings
382
474
 
475
+ @classmethod
476
+ async def initialize_computer_tools(
477
+ cls,
478
+ *,
479
+ tools: list[Tool],
480
+ context_wrapper: RunContextWrapper[TContext],
481
+ ) -> None:
482
+ """Resolve computer tools ahead of model invocation so each run gets its own instance."""
483
+ computer_tools = [tool for tool in tools if isinstance(tool, ComputerTool)]
484
+ if not computer_tools:
485
+ return
486
+
487
+ await asyncio.gather(
488
+ *(resolve_computer(tool=tool, run_context=context_wrapper) for tool in computer_tools)
489
+ )
490
+
383
491
  @classmethod
384
492
  def process_model_response(
385
493
  cls,
@@ -396,6 +504,8 @@ class RunImpl:
396
504
  functions = []
397
505
  computer_actions = []
398
506
  local_shell_calls = []
507
+ shell_calls = []
508
+ apply_patch_calls = []
399
509
  mcp_approval_requests = []
400
510
  tools_used: list[str] = []
401
511
  handoff_map = {handoff.tool_name: handoff for handoff in handoffs}
@@ -404,6 +514,10 @@ class RunImpl:
404
514
  local_shell_tool = next(
405
515
  (tool for tool in all_tools if isinstance(tool, LocalShellTool)), None
406
516
  )
517
+ shell_tool = next((tool for tool in all_tools if isinstance(tool, ShellTool)), None)
518
+ apply_patch_tool = next(
519
+ (tool for tool in all_tools if isinstance(tool, ApplyPatchTool)), None
520
+ )
407
521
  hosted_mcp_server_map = {
408
522
  tool.tool_config["server_label"]: tool
409
523
  for tool in all_tools
@@ -411,6 +525,59 @@ class RunImpl:
411
525
  }
412
526
 
413
527
  for output in response.output:
528
+ output_type = _get_mapping_or_attr(output, "type")
529
+ logger.debug(
530
+ "Processing output item type=%s class=%s",
531
+ output_type,
532
+ output.__class__.__name__ if hasattr(output, "__class__") else type(output),
533
+ )
534
+ if output_type == "shell_call":
535
+ items.append(ToolCallItem(raw_item=cast(Any, output), agent=agent))
536
+ if not shell_tool:
537
+ tools_used.append("shell")
538
+ _error_tracing.attach_error_to_current_span(
539
+ SpanError(
540
+ message="Shell tool not found",
541
+ data={},
542
+ )
543
+ )
544
+ raise ModelBehaviorError("Model produced shell call without a shell tool.")
545
+ tools_used.append(shell_tool.name)
546
+ call_identifier = _get_mapping_or_attr(output, "call_id") or _get_mapping_or_attr(
547
+ output, "callId"
548
+ )
549
+ logger.debug("Queuing shell_call %s", call_identifier)
550
+ shell_calls.append(ToolRunShellCall(tool_call=output, shell_tool=shell_tool))
551
+ continue
552
+ if output_type == "compaction":
553
+ items.append(CompactionItem(raw_item=cast(TResponseInputItem, output), agent=agent))
554
+ continue
555
+ if output_type == "apply_patch_call":
556
+ items.append(ToolCallItem(raw_item=cast(Any, output), agent=agent))
557
+ if apply_patch_tool:
558
+ tools_used.append(apply_patch_tool.name)
559
+ call_identifier = _get_mapping_or_attr(output, "call_id")
560
+ if not call_identifier:
561
+ call_identifier = _get_mapping_or_attr(output, "callId")
562
+ logger.debug("Queuing apply_patch_call %s", call_identifier)
563
+ apply_patch_calls.append(
564
+ ToolRunApplyPatchCall(
565
+ tool_call=output,
566
+ apply_patch_tool=apply_patch_tool,
567
+ )
568
+ )
569
+ else:
570
+ tools_used.append("apply_patch")
571
+ _error_tracing.attach_error_to_current_span(
572
+ SpanError(
573
+ message="Apply patch tool not found",
574
+ data={},
575
+ )
576
+ )
577
+ raise ModelBehaviorError(
578
+ "Model produced apply_patch call without an apply_patch tool."
579
+ )
580
+ continue
414
581
  if isinstance(output, ResponseOutputMessage):
415
582
  items.append(MessageOutputItem(raw_item=output, agent=agent))
416
583
  elif isinstance(output, ResponseFileSearchToolCall):
@@ -473,20 +640,84 @@ class RunImpl:
473
640
  tools_used.append("code_interpreter")
474
641
  elif isinstance(output, LocalShellCall):
475
642
  items.append(ToolCallItem(raw_item=output, agent=agent))
476
- tools_used.append("local_shell")
477
- if not local_shell_tool:
643
+ if shell_tool:
644
+ tools_used.append(shell_tool.name)
645
+ shell_calls.append(ToolRunShellCall(tool_call=output, shell_tool=shell_tool))
646
+ else:
647
+ tools_used.append("local_shell")
648
+ if not local_shell_tool:
649
+ _error_tracing.attach_error_to_current_span(
650
+ SpanError(
651
+ message="Local shell tool not found",
652
+ data={},
653
+ )
654
+ )
655
+ raise ModelBehaviorError(
656
+ "Model produced local shell call without a local shell tool."
657
+ )
658
+ local_shell_calls.append(
659
+ ToolRunLocalShellCall(tool_call=output, local_shell_tool=local_shell_tool)
660
+ )
661
+ elif isinstance(output, ResponseCustomToolCall) and _is_apply_patch_name(
662
+ output.name, apply_patch_tool
663
+ ):
664
+ parsed_operation = _parse_apply_patch_custom_input(output.input)
665
+ pseudo_call = {
666
+ "type": "apply_patch_call",
667
+ "call_id": output.call_id,
668
+ "operation": parsed_operation,
669
+ }
670
+ items.append(ToolCallItem(raw_item=cast(Any, pseudo_call), agent=agent))
671
+ if apply_patch_tool:
672
+ tools_used.append(apply_patch_tool.name)
673
+ apply_patch_calls.append(
674
+ ToolRunApplyPatchCall(
675
+ tool_call=pseudo_call,
676
+ apply_patch_tool=apply_patch_tool,
677
+ )
678
+ )
679
+ else:
680
+ tools_used.append("apply_patch")
478
681
  _error_tracing.attach_error_to_current_span(
479
682
  SpanError(
480
- message="Local shell tool not found",
683
+ message="Apply patch tool not found",
481
684
  data={},
482
685
  )
483
686
  )
484
687
  raise ModelBehaviorError(
485
- "Model produced local shell call without a local shell tool."
688
+ "Model produced apply_patch call without an apply_patch tool."
486
689
  )
487
- local_shell_calls.append(
488
- ToolRunLocalShellCall(tool_call=output, local_shell_tool=local_shell_tool)
489
- )
690
+ elif (
691
+ isinstance(output, ResponseFunctionToolCall)
692
+ and _is_apply_patch_name(output.name, apply_patch_tool)
693
+ and output.name not in function_map
694
+ ):
695
+ parsed_operation = _parse_apply_patch_function_args(output.arguments)
696
+ pseudo_call = {
697
+ "type": "apply_patch_call",
698
+ "call_id": output.call_id,
699
+ "operation": parsed_operation,
700
+ }
701
+ items.append(ToolCallItem(raw_item=cast(Any, pseudo_call), agent=agent))
702
+ if apply_patch_tool:
703
+ tools_used.append(apply_patch_tool.name)
704
+ apply_patch_calls.append(
705
+ ToolRunApplyPatchCall(
706
+ tool_call=pseudo_call, apply_patch_tool=apply_patch_tool
707
+ )
708
+ )
709
+ else:
710
+ tools_used.append("apply_patch")
711
+ _error_tracing.attach_error_to_current_span(
712
+ SpanError(
713
+ message="Apply patch tool not found",
714
+ data={},
715
+ )
716
+ )
717
+ raise ModelBehaviorError(
718
+ "Model produced apply_patch call without an apply_patch tool."
719
+ )
720
+ continue
490
721
 
491
722
  elif not isinstance(output, ResponseFunctionToolCall):
492
723
  logger.warning(f"Unexpected output type, ignoring: {type(output)}")
@@ -509,13 +740,29 @@ class RunImpl:
509
740
  # Regular function tool call
510
741
  else:
511
742
  if output.name not in function_map:
512
- _error_tracing.attach_error_to_current_span(
513
- SpanError(
514
- message="Tool not found",
515
- data={"tool_name": output.name},
743
+ if output_schema is not None and output.name == "json_tool_call":
744
+ # LiteLLM could generate non-existent tool calls for structured outputs
745
+ items.append(ToolCallItem(raw_item=output, agent=agent))
746
+ functions.append(
747
+ ToolRunFunction(
748
+ tool_call=output,
749
+ # this tool does not exist in function_map, so generate ad-hoc one,
750
+ # which just parses the input if it's a string, and returns the
751
+ # value otherwise
752
+ function_tool=_build_litellm_json_tool_call(output),
753
+ )
516
754
  )
517
- )
518
- raise ModelBehaviorError(f"Tool {output.name} not found in agent {agent.name}")
755
+ continue
756
+ else:
757
+ _error_tracing.attach_error_to_current_span(
758
+ SpanError(
759
+ message="Tool not found",
760
+ data={"tool_name": output.name},
761
+ )
762
+ )
763
+ error = f"Tool {output.name} not found in agent {agent.name}"
764
+ raise ModelBehaviorError(error)
765
+
519
766
  items.append(ToolCallItem(raw_item=output, agent=agent))
520
767
  functions.append(
521
768
  ToolRunFunction(
@@ -530,10 +777,161 @@ class RunImpl:
530
777
  functions=functions,
531
778
  computer_actions=computer_actions,
532
779
  local_shell_calls=local_shell_calls,
780
+ shell_calls=shell_calls,
781
+ apply_patch_calls=apply_patch_calls,
533
782
  tools_used=tools_used,
534
783
  mcp_approval_requests=mcp_approval_requests,
535
784
  )
536
785
 
786
+ @classmethod
787
+ async def _execute_input_guardrails(
788
+ cls,
789
+ *,
790
+ func_tool: FunctionTool,
791
+ tool_context: ToolContext[TContext],
792
+ agent: Agent[TContext],
793
+ tool_input_guardrail_results: list[ToolInputGuardrailResult],
794
+ ) -> str | None:
795
+ """Execute input guardrails for a tool.
796
+
797
+ Args:
798
+ func_tool: The function tool being executed.
799
+ tool_context: The tool execution context.
800
+ agent: The agent executing the tool.
801
+ tool_input_guardrail_results: List to append guardrail results to.
802
+
803
+ Returns:
804
+ None if tool execution should proceed, or a message string if execution should be
805
+ skipped.
806
+
807
+ Raises:
808
+ ToolInputGuardrailTripwireTriggered: If a guardrail triggers an exception.
809
+ """
810
+ if not func_tool.tool_input_guardrails:
811
+ return None
812
+
813
+ for guardrail in func_tool.tool_input_guardrails:
814
+ gr_out = await guardrail.run(
815
+ ToolInputGuardrailData(
816
+ context=tool_context,
817
+ agent=agent,
818
+ )
819
+ )
820
+
821
+ # Store the guardrail result
822
+ tool_input_guardrail_results.append(
823
+ ToolInputGuardrailResult(
824
+ guardrail=guardrail,
825
+ output=gr_out,
826
+ )
827
+ )
828
+
829
+ # Handle different behavior types
830
+ if gr_out.behavior["type"] == "raise_exception":
831
+ raise ToolInputGuardrailTripwireTriggered(guardrail=guardrail, output=gr_out)
832
+ elif gr_out.behavior["type"] == "reject_content":
833
+ # Set final_result to the message and skip tool execution
834
+ return gr_out.behavior["message"]
835
+ elif gr_out.behavior["type"] == "allow":
836
+ # Continue to next guardrail or tool execution
837
+ continue
838
+
839
+ return None
840
+
841
+ @classmethod
842
+ async def _execute_output_guardrails(
843
+ cls,
844
+ *,
845
+ func_tool: FunctionTool,
846
+ tool_context: ToolContext[TContext],
847
+ agent: Agent[TContext],
848
+ real_result: Any,
849
+ tool_output_guardrail_results: list[ToolOutputGuardrailResult],
850
+ ) -> Any:
851
+ """Execute output guardrails for a tool.
852
+
853
+ Args:
854
+ func_tool: The function tool being executed.
855
+ tool_context: The tool execution context.
856
+ agent: The agent executing the tool.
857
+ real_result: The actual result from the tool execution.
858
+ tool_output_guardrail_results: List to append guardrail results to.
859
+
860
+ Returns:
861
+ The final result after guardrail processing (may be modified).
862
+
863
+ Raises:
864
+ ToolOutputGuardrailTripwireTriggered: If a guardrail triggers an exception.
865
+ """
866
+ if not func_tool.tool_output_guardrails:
867
+ return real_result
868
+
869
+ final_result = real_result
870
+ for output_guardrail in func_tool.tool_output_guardrails:
871
+ gr_out = await output_guardrail.run(
872
+ ToolOutputGuardrailData(
873
+ context=tool_context,
874
+ agent=agent,
875
+ output=real_result,
876
+ )
877
+ )
878
+
879
+ # Store the guardrail result
880
+ tool_output_guardrail_results.append(
881
+ ToolOutputGuardrailResult(
882
+ guardrail=output_guardrail,
883
+ output=gr_out,
884
+ )
885
+ )
886
+
887
+ # Handle different behavior types
888
+ if gr_out.behavior["type"] == "raise_exception":
889
+ raise ToolOutputGuardrailTripwireTriggered(
890
+ guardrail=output_guardrail, output=gr_out
891
+ )
892
+ elif gr_out.behavior["type"] == "reject_content":
893
+ # Override the result with the guardrail message
894
+ final_result = gr_out.behavior["message"]
895
+ break
896
+ elif gr_out.behavior["type"] == "allow":
897
+ # Continue to next guardrail
898
+ continue
899
+
900
+ return final_result
901
+
902
+ @classmethod
903
+ async def _execute_tool_with_hooks(
904
+ cls,
905
+ *,
906
+ func_tool: FunctionTool,
907
+ tool_context: ToolContext[TContext],
908
+ agent: Agent[TContext],
909
+ hooks: RunHooks[TContext],
910
+ tool_call: ResponseFunctionToolCall,
911
+ ) -> Any:
912
+ """Execute the core tool function with before/after hooks.
913
+
914
+ Args:
915
+ func_tool: The function tool being executed.
916
+ tool_context: The tool execution context.
917
+ agent: The agent executing the tool.
918
+ hooks: The run hooks to execute.
919
+ tool_call: The tool call details.
920
+
921
+ Returns:
922
+ The result from the tool execution.
923
+ """
924
+ await asyncio.gather(
925
+ hooks.on_tool_start(tool_context, agent, func_tool),
926
+ (
927
+ agent.hooks.on_tool_start(tool_context, agent, func_tool)
928
+ if agent.hooks
929
+ else _coro.noop_coroutine()
930
+ ),
931
+ )
932
+
933
+ return await func_tool.on_invoke_tool(tool_context, tool_call.arguments)
934
+
537
935
  @classmethod
538
936
  async def execute_function_tool_calls(
539
937
  cls,
@@ -543,7 +941,13 @@ class RunImpl:
543
941
  hooks: RunHooks[TContext],
544
942
  context_wrapper: RunContextWrapper[TContext],
545
943
  config: RunConfig,
546
- ) -> list[FunctionToolResult]:
944
+ ) -> tuple[
945
+ list[FunctionToolResult], list[ToolInputGuardrailResult], list[ToolOutputGuardrailResult]
946
+ ]:
947
+ # Collect guardrail results
948
+ tool_input_guardrail_results: list[ToolInputGuardrailResult] = []
949
+ tool_output_guardrail_results: list[ToolOutputGuardrailResult] = []
950
+
547
951
  async def run_single_tool(
548
952
  func_tool: FunctionTool, tool_call: ResponseFunctionToolCall
549
953
  ) -> Any:
@@ -556,24 +960,48 @@ class RunImpl:
556
960
  if config.trace_include_sensitive_data:
557
961
  span_fn.span_data.input = tool_call.arguments
558
962
  try:
559
- _, _, result = await asyncio.gather(
560
- hooks.on_tool_start(tool_context, agent, func_tool),
561
- (
562
- agent.hooks.on_tool_start(tool_context, agent, func_tool)
563
- if agent.hooks
564
- else _coro.noop_coroutine()
565
- ),
566
- func_tool.on_invoke_tool(tool_context, tool_call.arguments),
963
+ # 1) Run input tool guardrails, if any
964
+ rejected_message = await cls._execute_input_guardrails(
965
+ func_tool=func_tool,
966
+ tool_context=tool_context,
967
+ agent=agent,
968
+ tool_input_guardrail_results=tool_input_guardrail_results,
567
969
  )
568
970
 
569
- await asyncio.gather(
570
- hooks.on_tool_end(tool_context, agent, func_tool, result),
571
- (
572
- agent.hooks.on_tool_end(tool_context, agent, func_tool, result)
573
- if agent.hooks
574
- else _coro.noop_coroutine()
575
- ),
576
- )
971
+ if rejected_message is not None:
972
+ # Input guardrail rejected the tool call
973
+ final_result = rejected_message
974
+ else:
975
+ # 2) Actually run the tool
976
+ real_result = await cls._execute_tool_with_hooks(
977
+ func_tool=func_tool,
978
+ tool_context=tool_context,
979
+ agent=agent,
980
+ hooks=hooks,
981
+ tool_call=tool_call,
982
+ )
983
+
984
+ # 3) Run output tool guardrails, if any
985
+ final_result = await cls._execute_output_guardrails(
986
+ func_tool=func_tool,
987
+ tool_context=tool_context,
988
+ agent=agent,
989
+ real_result=real_result,
990
+ tool_output_guardrail_results=tool_output_guardrail_results,
991
+ )
992
+
993
+ # 4) Tool end hooks (with final result, which may have been overridden)
994
+ await asyncio.gather(
995
+ hooks.on_tool_end(tool_context, agent, func_tool, final_result),
996
+ (
997
+ agent.hooks.on_tool_end(
998
+ tool_context, agent, func_tool, final_result
999
+ )
1000
+ if agent.hooks
1001
+ else _coro.noop_coroutine()
1002
+ ),
1003
+ )
1004
+ result = final_result
577
1005
  except Exception as e:
578
1006
  _error_tracing.attach_error_to_current_span(
579
1007
  SpanError(
@@ -596,19 +1024,21 @@ class RunImpl:
596
1024
 
597
1025
  results = await asyncio.gather(*tasks)
598
1026
 
599
- return [
1027
+ function_tool_results = [
600
1028
  FunctionToolResult(
601
1029
  tool=tool_run.function_tool,
602
1030
  output=result,
603
1031
  run_item=ToolCallOutputItem(
604
1032
  output=result,
605
- raw_item=ItemHelpers.tool_call_output_item(tool_run.tool_call, str(result)),
1033
+ raw_item=ItemHelpers.tool_call_output_item(tool_run.tool_call, result),
606
1034
  agent=agent,
607
1035
  ),
608
1036
  )
609
1037
  for tool_run, result in zip(tool_runs, results)
610
1038
  ]
611
1039
 
1040
+ return function_tool_results, tool_input_guardrail_results, tool_output_guardrail_results
1041
+
612
1042
  @classmethod
613
1043
  async def execute_local_shell_calls(
614
1044
  cls,
@@ -633,6 +1063,52 @@ class RunImpl:
633
1063
  )
634
1064
  return results
635
1065
 
1066
+ @classmethod
1067
+ async def execute_shell_calls(
1068
+ cls,
1069
+ *,
1070
+ agent: Agent[TContext],
1071
+ calls: list[ToolRunShellCall],
1072
+ context_wrapper: RunContextWrapper[TContext],
1073
+ hooks: RunHooks[TContext],
1074
+ config: RunConfig,
1075
+ ) -> list[RunItem]:
1076
+ results: list[RunItem] = []
1077
+ for call in calls:
1078
+ results.append(
1079
+ await ShellAction.execute(
1080
+ agent=agent,
1081
+ call=call,
1082
+ hooks=hooks,
1083
+ context_wrapper=context_wrapper,
1084
+ config=config,
1085
+ )
1086
+ )
1087
+ return results
1088
+
1089
+ @classmethod
1090
+ async def execute_apply_patch_calls(
1091
+ cls,
1092
+ *,
1093
+ agent: Agent[TContext],
1094
+ calls: list[ToolRunApplyPatchCall],
1095
+ context_wrapper: RunContextWrapper[TContext],
1096
+ hooks: RunHooks[TContext],
1097
+ config: RunConfig,
1098
+ ) -> list[RunItem]:
1099
+ results: list[RunItem] = []
1100
+ for call in calls:
1101
+ results.append(
1102
+ await ApplyPatchAction.execute(
1103
+ agent=agent,
1104
+ call=call,
1105
+ hooks=hooks,
1106
+ context_wrapper=context_wrapper,
1107
+ config=config,
1108
+ )
1109
+ )
1110
+ return results
1111
+
636
1112
  @classmethod
637
1113
  async def execute_computer_actions(
638
1114
  cls,
@@ -766,8 +1242,14 @@ class RunImpl:
766
1242
  input_filter = handoff.input_filter or (
767
1243
  run_config.handoff_input_filter if run_config else None
768
1244
  )
769
- if input_filter:
770
- logger.debug("Filtering inputs for handoff")
1245
+ handoff_nest_setting = handoff.nest_handoff_history
1246
+ should_nest_history = (
1247
+ handoff_nest_setting
1248
+ if handoff_nest_setting is not None
1249
+ else run_config.nest_handoff_history
1250
+ )
1251
+ handoff_input_data: HandoffInputData | None = None
1252
+ if input_filter or should_nest_history:
771
1253
  handoff_input_data = HandoffInputData(
772
1254
  input_history=tuple(original_input)
773
1255
  if isinstance(original_input, list)
@@ -776,6 +1258,17 @@ class RunImpl:
776
1258
  new_items=tuple(new_step_items),
777
1259
  run_context=context_wrapper,
778
1260
  )
1261
+
1262
+ if input_filter and handoff_input_data is not None:
1263
+ filter_name = getattr(input_filter, "__qualname__", repr(input_filter))
1264
+ from_agent = getattr(agent, "name", agent.__class__.__name__)
1265
+ to_agent = getattr(new_agent, "name", new_agent.__class__.__name__)
1266
+ logger.debug(
1267
+ "Filtering handoff inputs with %s for %s -> %s",
1268
+ filter_name,
1269
+ from_agent,
1270
+ to_agent,
1271
+ )
779
1272
  if not callable(input_filter):
780
1273
  _error_tracing.attach_error_to_span(
781
1274
  span_handoff,
@@ -805,6 +1298,33 @@ class RunImpl:
805
1298
  )
806
1299
  pre_step_items = list(filtered.pre_handoff_items)
807
1300
  new_step_items = list(filtered.new_items)
1301
+ # For custom input filters, use input_items if available, otherwise new_items
1302
+ if filtered.input_items is not None:
1303
+ session_step_items = list(filtered.new_items)
1304
+ new_step_items = list(filtered.input_items)
1305
+ else:
1306
+ session_step_items = None
1307
+ elif should_nest_history and handoff_input_data is not None:
1308
+ nested = nest_handoff_history(
1309
+ handoff_input_data,
1310
+ history_mapper=run_config.handoff_history_mapper,
1311
+ )
1312
+ original_input = (
1313
+ nested.input_history
1314
+ if isinstance(nested.input_history, str)
1315
+ else list(nested.input_history)
1316
+ )
1317
+ pre_step_items = list(nested.pre_handoff_items)
1318
+ # Keep full new_items for session history.
1319
+ session_step_items = list(nested.new_items)
1320
+ # Use input_items (filtered) for model input if available.
1321
+ if nested.input_items is not None:
1322
+ new_step_items = list(nested.input_items)
1323
+ else:
1324
+ new_step_items = session_step_items
1325
+ else:
1326
+ # No filtering or nesting - session_step_items not needed
1327
+ session_step_items = None
808
1328
 
809
1329
  return SingleStepResult(
810
1330
  original_input=original_input,
@@ -812,6 +1332,9 @@ class RunImpl:
812
1332
  pre_step_items=pre_step_items,
813
1333
  new_step_items=new_step_items,
814
1334
  next_step=NextStepHandoff(new_agent),
1335
+ tool_input_guardrail_results=[],
1336
+ tool_output_guardrail_results=[],
1337
+ session_step_items=session_step_items,
815
1338
  )
816
1339
 
817
1340
  @classmethod
@@ -860,9 +1383,13 @@ class RunImpl:
860
1383
  final_output: Any,
861
1384
  hooks: RunHooks[TContext],
862
1385
  context_wrapper: RunContextWrapper[TContext],
1386
+ tool_input_guardrail_results: list[ToolInputGuardrailResult],
1387
+ tool_output_guardrail_results: list[ToolOutputGuardrailResult],
863
1388
  ) -> SingleStepResult:
864
1389
  # Run the on_end hooks
865
- await cls.run_final_output_hooks(agent, hooks, context_wrapper, final_output)
1390
+ await cls.run_final_output_hooks(
1391
+ agent, hooks, context_wrapper, original_input, final_output
1392
+ )
866
1393
 
867
1394
  return SingleStepResult(
868
1395
  original_input=original_input,
@@ -870,6 +1397,8 @@ class RunImpl:
870
1397
  pre_step_items=pre_step_items,
871
1398
  new_step_items=new_step_items,
872
1399
  next_step=NextStepFinalOutput(final_output),
1400
+ tool_input_guardrail_results=tool_input_guardrail_results,
1401
+ tool_output_guardrail_results=tool_output_guardrail_results,
873
1402
  )
874
1403
 
875
1404
  @classmethod
@@ -878,11 +1407,17 @@ class RunImpl:
878
1407
  agent: Agent[TContext],
879
1408
  hooks: RunHooks[TContext],
880
1409
  context_wrapper: RunContextWrapper[TContext],
1410
+ original_input: str | list[TResponseInputItem],
881
1411
  final_output: Any,
882
1412
  ):
1413
+ agent_hook_context = AgentHookContext(
1414
+ context=context_wrapper.context,
1415
+ usage=context_wrapper.usage,
1416
+ turn_input=ItemHelpers.input_to_new_input_list(original_input),
1417
+ )
883
1418
  await asyncio.gather(
884
- hooks.on_agent_end(context_wrapper, agent, final_output),
885
- agent.hooks.on_end(context_wrapper, agent, final_output)
1419
+ hooks.on_agent_end(agent_hook_context, agent, final_output),
1420
+ agent.hooks.on_end(agent_hook_context, agent, final_output)
886
1421
  if agent.hooks
887
1422
  else _coro.noop_coroutine(),
888
1423
  )
@@ -934,6 +1469,8 @@ class RunImpl:
934
1469
  event = RunItemStreamEvent(item=item, name="reasoning_item_created")
935
1470
  elif isinstance(item, MCPApprovalRequestItem):
936
1471
  event = RunItemStreamEvent(item=item, name="mcp_approval_requested")
1472
+ elif isinstance(item, MCPApprovalResponseItem):
1473
+ event = RunItemStreamEvent(item=item, name="mcp_approval_response")
937
1474
  elif isinstance(item, MCPListToolsItem):
938
1475
  event = RunItemStreamEvent(item=item, name="mcp_list_tools")
939
1476
 
@@ -961,7 +1498,10 @@ class RunImpl:
961
1498
  context_wrapper: RunContextWrapper[TContext],
962
1499
  config: RunConfig,
963
1500
  ) -> ToolsToFinalOutputResult:
964
- """Returns (i, final_output)."""
1501
+ """Determine if tool results should produce a final output.
1502
+ Returns:
1503
+ ToolsToFinalOutputResult: Indicates whether final output is ready, and the output value.
1504
+ """
965
1505
  if not tool_results:
966
1506
  return _NOT_FINAL_OUTPUT
967
1507
 
@@ -1004,6 +1544,7 @@ class TraceCtxManager:
1004
1544
  group_id: str | None,
1005
1545
  metadata: dict[str, Any] | None,
1006
1546
  disabled: bool,
1547
+ tracing: TracingConfig | None = None,
1007
1548
  ):
1008
1549
  self.trace: Trace | None = None
1009
1550
  self.workflow_name = workflow_name
@@ -1011,6 +1552,7 @@ class TraceCtxManager:
1011
1552
  self.group_id = group_id
1012
1553
  self.metadata = metadata
1013
1554
  self.disabled = disabled
1555
+ self.tracing = tracing
1014
1556
 
1015
1557
  def __enter__(self) -> TraceCtxManager:
1016
1558
  current_trace = get_current_trace()
@@ -1020,6 +1562,7 @@ class TraceCtxManager:
1020
1562
  trace_id=self.trace_id,
1021
1563
  group_id=self.group_id,
1022
1564
  metadata=self.metadata,
1565
+ tracing=self.tracing,
1023
1566
  disabled=self.disabled,
1024
1567
  )
1025
1568
  self.trace.start(mark_as_current=True)
@@ -1043,10 +1586,11 @@ class ComputerAction:
1043
1586
  config: RunConfig,
1044
1587
  acknowledged_safety_checks: list[ComputerCallOutputAcknowledgedSafetyCheck] | None = None,
1045
1588
  ) -> RunItem:
1589
+ computer = await resolve_computer(tool=action.computer_tool, run_context=context_wrapper)
1046
1590
  output_func = (
1047
- cls._get_screenshot_async(action.computer_tool.computer, action.tool_call)
1048
- if isinstance(action.computer_tool.computer, AsyncComputer)
1049
- else cls._get_screenshot_sync(action.computer_tool.computer, action.tool_call)
1591
+ cls._get_screenshot_async(computer, action.tool_call)
1592
+ if isinstance(computer, AsyncComputer)
1593
+ else cls._get_screenshot_sync(computer, action.tool_call)
1050
1594
  )
1051
1595
 
1052
1596
  _, _, output = await asyncio.gather(
@@ -1180,13 +1724,576 @@ class LocalShellAction:
1180
1724
  ),
1181
1725
  )
1182
1726
 
1727
+ raw_payload: dict[str, Any] = {
1728
+ "type": "local_shell_call_output",
1729
+ "call_id": call.tool_call.call_id,
1730
+ "output": result,
1731
+ }
1732
+ return ToolCallOutputItem(
1733
+ agent=agent,
1734
+ output=result,
1735
+ raw_item=raw_payload,
1736
+ )
1737
+
1738
+
1739
+ class ShellAction:
1740
+ @classmethod
1741
+ async def execute(
1742
+ cls,
1743
+ *,
1744
+ agent: Agent[TContext],
1745
+ call: ToolRunShellCall,
1746
+ hooks: RunHooks[TContext],
1747
+ context_wrapper: RunContextWrapper[TContext],
1748
+ config: RunConfig,
1749
+ ) -> RunItem:
1750
+ await asyncio.gather(
1751
+ hooks.on_tool_start(context_wrapper, agent, call.shell_tool),
1752
+ (
1753
+ agent.hooks.on_tool_start(context_wrapper, agent, call.shell_tool)
1754
+ if agent.hooks
1755
+ else _coro.noop_coroutine()
1756
+ ),
1757
+ )
1758
+
1759
+ shell_call = _coerce_shell_call(call.tool_call)
1760
+ request = ShellCommandRequest(ctx_wrapper=context_wrapper, data=shell_call)
1761
+ status: Literal["completed", "failed"] = "completed"
1762
+ output_text = ""
1763
+ shell_output_payload: list[dict[str, Any]] | None = None
1764
+ provider_meta: dict[str, Any] | None = None
1765
+ max_output_length: int | None = None
1766
+ requested_max_output_length = _normalize_max_output_length(
1767
+ shell_call.action.max_output_length
1768
+ )
1769
+
1770
+ try:
1771
+ executor_result = call.shell_tool.executor(request)
1772
+ result = (
1773
+ await executor_result if inspect.isawaitable(executor_result) else executor_result
1774
+ )
1775
+
1776
+ if isinstance(result, ShellResult):
1777
+ normalized = [_normalize_shell_output(entry) for entry in result.output]
1778
+ result_max_output_length = _normalize_max_output_length(result.max_output_length)
1779
+ if result_max_output_length is None:
1780
+ max_output_length = requested_max_output_length
1781
+ elif requested_max_output_length is None:
1782
+ max_output_length = result_max_output_length
1783
+ else:
1784
+ max_output_length = min(result_max_output_length, requested_max_output_length)
1785
+ if max_output_length is not None:
1786
+ normalized = _truncate_shell_outputs(normalized, max_output_length)
1787
+ output_text = _render_shell_outputs(normalized)
1788
+ if max_output_length is not None:
1789
+ output_text = output_text[:max_output_length]
1790
+ shell_output_payload = [_serialize_shell_output(entry) for entry in normalized]
1791
+ provider_meta = dict(result.provider_data or {})
1792
+ else:
1793
+ output_text = str(result)
1794
+ if requested_max_output_length is not None:
1795
+ max_output_length = requested_max_output_length
1796
+ output_text = output_text[:max_output_length]
1797
+ except Exception as exc:
1798
+ status = "failed"
1799
+ output_text = _format_shell_error(exc)
1800
+ if requested_max_output_length is not None:
1801
+ max_output_length = requested_max_output_length
1802
+ output_text = output_text[:max_output_length]
1803
+ logger.error("Shell executor failed: %s", exc, exc_info=True)
1804
+
1805
+ await asyncio.gather(
1806
+ hooks.on_tool_end(context_wrapper, agent, call.shell_tool, output_text),
1807
+ (
1808
+ agent.hooks.on_tool_end(context_wrapper, agent, call.shell_tool, output_text)
1809
+ if agent.hooks
1810
+ else _coro.noop_coroutine()
1811
+ ),
1812
+ )
1813
+
1814
+ raw_entries: list[dict[str, Any]] | None = None
1815
+ if shell_output_payload:
1816
+ raw_entries = shell_output_payload
1817
+ elif output_text:
1818
+ raw_entries = [
1819
+ {
1820
+ "stdout": output_text,
1821
+ "stderr": "",
1822
+ "status": status,
1823
+ "outcome": "success" if status == "completed" else "failure",
1824
+ }
1825
+ ]
1826
+
1827
+ structured_output: list[dict[str, Any]] = []
1828
+ if raw_entries:
1829
+ for entry in raw_entries:
1830
+ sanitized = dict(entry)
1831
+ status_value = sanitized.pop("status", None)
1832
+ sanitized.pop("provider_data", None)
1833
+ raw_exit_code = sanitized.pop("exit_code", None)
1834
+ sanitized.pop("command", None)
1835
+ outcome_value = sanitized.get("outcome")
1836
+ if isinstance(outcome_value, str):
1837
+ resolved_type = "exit"
1838
+ if status_value == "timeout":
1839
+ resolved_type = "timeout"
1840
+ outcome_payload: dict[str, Any] = {"type": resolved_type}
1841
+ if resolved_type == "exit":
1842
+ outcome_payload["exit_code"] = _resolve_exit_code(
1843
+ raw_exit_code, outcome_value
1844
+ )
1845
+ sanitized["outcome"] = outcome_payload
1846
+ elif isinstance(outcome_value, Mapping):
1847
+ outcome_payload = dict(outcome_value)
1848
+ outcome_status = cast(Optional[str], outcome_payload.pop("status", None))
1849
+ outcome_type = outcome_payload.get("type")
1850
+ if outcome_type != "timeout":
1851
+ outcome_payload.setdefault(
1852
+ "exit_code",
1853
+ _resolve_exit_code(
1854
+ raw_exit_code,
1855
+ outcome_status if isinstance(outcome_status, str) else None,
1856
+ ),
1857
+ )
1858
+ sanitized["outcome"] = outcome_payload
1859
+ structured_output.append(sanitized)
1860
+
1861
+ raw_item: dict[str, Any] = {
1862
+ "type": "shell_call_output",
1863
+ "call_id": shell_call.call_id,
1864
+ "output": structured_output,
1865
+ "status": status,
1866
+ }
1867
+ if max_output_length is not None:
1868
+ raw_item["max_output_length"] = max_output_length
1869
+ if raw_entries:
1870
+ raw_item["shell_output"] = raw_entries
1871
+ if provider_meta:
1872
+ raw_item["provider_data"] = provider_meta
1873
+
1874
+ return ToolCallOutputItem(
1875
+ agent=agent,
1876
+ output=output_text,
1877
+ raw_item=cast(Any, raw_item),
1878
+ )
1879
+
1880
+
1881
+ class ApplyPatchAction:
1882
+ @classmethod
1883
+ async def execute(
1884
+ cls,
1885
+ *,
1886
+ agent: Agent[TContext],
1887
+ call: ToolRunApplyPatchCall,
1888
+ hooks: RunHooks[TContext],
1889
+ context_wrapper: RunContextWrapper[TContext],
1890
+ config: RunConfig,
1891
+ ) -> RunItem:
1892
+ apply_patch_tool = call.apply_patch_tool
1893
+ await asyncio.gather(
1894
+ hooks.on_tool_start(context_wrapper, agent, apply_patch_tool),
1895
+ (
1896
+ agent.hooks.on_tool_start(context_wrapper, agent, apply_patch_tool)
1897
+ if agent.hooks
1898
+ else _coro.noop_coroutine()
1899
+ ),
1900
+ )
1901
+
1902
+ status: Literal["completed", "failed"] = "completed"
1903
+ output_text = ""
1904
+
1905
+ try:
1906
+ operation = _coerce_apply_patch_operation(
1907
+ call.tool_call,
1908
+ context_wrapper=context_wrapper,
1909
+ )
1910
+ editor = apply_patch_tool.editor
1911
+ if operation.type == "create_file":
1912
+ result = editor.create_file(operation)
1913
+ elif operation.type == "update_file":
1914
+ result = editor.update_file(operation)
1915
+ elif operation.type == "delete_file":
1916
+ result = editor.delete_file(operation)
1917
+ else: # pragma: no cover - validated in _coerce_apply_patch_operation
1918
+ raise ModelBehaviorError(f"Unsupported apply_patch operation: {operation.type}")
1919
+
1920
+ awaited = await result if inspect.isawaitable(result) else result
1921
+ normalized = _normalize_apply_patch_result(awaited)
1922
+ if normalized:
1923
+ if normalized.status in {"completed", "failed"}:
1924
+ status = normalized.status
1925
+ if normalized.output:
1926
+ output_text = normalized.output
1927
+ except Exception as exc:
1928
+ status = "failed"
1929
+ output_text = _format_shell_error(exc)
1930
+ logger.error("Apply patch editor failed: %s", exc, exc_info=True)
1931
+
1932
+ await asyncio.gather(
1933
+ hooks.on_tool_end(context_wrapper, agent, apply_patch_tool, output_text),
1934
+ (
1935
+ agent.hooks.on_tool_end(context_wrapper, agent, apply_patch_tool, output_text)
1936
+ if agent.hooks
1937
+ else _coro.noop_coroutine()
1938
+ ),
1939
+ )
1940
+
1941
+ raw_item: dict[str, Any] = {
1942
+ "type": "apply_patch_call_output",
1943
+ "call_id": _extract_apply_patch_call_id(call.tool_call),
1944
+ "status": status,
1945
+ }
1946
+ if output_text:
1947
+ raw_item["output"] = output_text
1948
+
1183
1949
  return ToolCallOutputItem(
1184
1950
  agent=agent,
1185
- output=output,
1186
- raw_item={
1187
- "type": "local_shell_call_output",
1188
- "id": call.tool_call.call_id,
1189
- "output": result,
1190
- # "id": "out" + call.tool_call.id, # TODO remove this, it should be optional
1191
- },
1951
+ output=output_text,
1952
+ raw_item=cast(Any, raw_item),
1953
+ )
1954
+
1955
+
1956
+ def _normalize_shell_output(entry: ShellCommandOutput | Mapping[str, Any]) -> ShellCommandOutput:
1957
+ if isinstance(entry, ShellCommandOutput):
1958
+ return entry
1959
+
1960
+ stdout = str(entry.get("stdout", "") or "")
1961
+ stderr = str(entry.get("stderr", "") or "")
1962
+ command_value = entry.get("command")
1963
+ provider_data_value = entry.get("provider_data")
1964
+ outcome_value = entry.get("outcome")
1965
+
1966
+ outcome_type: Literal["exit", "timeout"] = "exit"
1967
+ exit_code_value: Any | None = None
1968
+
1969
+ if isinstance(outcome_value, Mapping):
1970
+ type_value = outcome_value.get("type")
1971
+ if type_value == "timeout":
1972
+ outcome_type = "timeout"
1973
+ elif isinstance(type_value, str):
1974
+ outcome_type = "exit"
1975
+ exit_code_value = outcome_value.get("exit_code") or outcome_value.get("exitCode")
1976
+ else:
1977
+ status_str = str(entry.get("status", "completed") or "completed").lower()
1978
+ if status_str == "timeout":
1979
+ outcome_type = "timeout"
1980
+ if isinstance(outcome_value, str):
1981
+ if outcome_value == "failure":
1982
+ exit_code_value = 1
1983
+ elif outcome_value == "success":
1984
+ exit_code_value = 0
1985
+ exit_code_value = exit_code_value or entry.get("exit_code") or entry.get("exitCode")
1986
+
1987
+ outcome = ShellCallOutcome(
1988
+ type=outcome_type,
1989
+ exit_code=_normalize_exit_code(exit_code_value),
1990
+ )
1991
+
1992
+ return ShellCommandOutput(
1993
+ stdout=stdout,
1994
+ stderr=stderr,
1995
+ outcome=outcome,
1996
+ command=str(command_value) if command_value is not None else None,
1997
+ provider_data=cast(dict[str, Any], provider_data_value)
1998
+ if isinstance(provider_data_value, Mapping)
1999
+ else provider_data_value,
2000
+ )
2001
+
2002
+
2003
+ def _serialize_shell_output(output: ShellCommandOutput) -> dict[str, Any]:
2004
+ payload: dict[str, Any] = {
2005
+ "stdout": output.stdout,
2006
+ "stderr": output.stderr,
2007
+ "status": output.status,
2008
+ "outcome": {"type": output.outcome.type},
2009
+ }
2010
+ if output.outcome.type == "exit":
2011
+ payload["outcome"]["exit_code"] = output.outcome.exit_code
2012
+ if output.outcome.exit_code is not None:
2013
+ payload["exit_code"] = output.outcome.exit_code
2014
+ if output.command is not None:
2015
+ payload["command"] = output.command
2016
+ if output.provider_data:
2017
+ payload["provider_data"] = output.provider_data
2018
+ return payload
2019
+
2020
+
2021
+ def _resolve_exit_code(raw_exit_code: Any, outcome_status: str | None) -> int:
2022
+ normalized = _normalize_exit_code(raw_exit_code)
2023
+ if normalized is not None:
2024
+ return normalized
2025
+
2026
+ normalized_status = (outcome_status or "").lower()
2027
+ if normalized_status == "success":
2028
+ return 0
2029
+ if normalized_status == "failure":
2030
+ return 1
2031
+ return 0
2032
+
2033
+
2034
+ def _normalize_exit_code(value: Any) -> int | None:
2035
+ if value is None:
2036
+ return None
2037
+ try:
2038
+ return int(value)
2039
+ except (TypeError, ValueError):
2040
+ return None
2041
+
2042
+
2043
+ def _render_shell_outputs(outputs: Sequence[ShellCommandOutput]) -> str:
2044
+ if not outputs:
2045
+ return "(no output)"
2046
+
2047
+ rendered_chunks: list[str] = []
2048
+ for result in outputs:
2049
+ chunk_lines: list[str] = []
2050
+ if result.command:
2051
+ chunk_lines.append(f"$ {result.command}")
2052
+
2053
+ stdout = result.stdout.rstrip("\n")
2054
+ stderr = result.stderr.rstrip("\n")
2055
+
2056
+ if stdout:
2057
+ chunk_lines.append(stdout)
2058
+ if stderr:
2059
+ if stdout:
2060
+ chunk_lines.append("")
2061
+ chunk_lines.append("stderr:")
2062
+ chunk_lines.append(stderr)
2063
+
2064
+ if result.exit_code not in (None, 0):
2065
+ chunk_lines.append(f"exit code: {result.exit_code}")
2066
+ if result.status == "timeout":
2067
+ chunk_lines.append("status: timeout")
2068
+
2069
+ chunk = "\n".join(chunk_lines).strip()
2070
+ rendered_chunks.append(chunk if chunk else "(no output)")
2071
+
2072
+ return "\n\n".join(rendered_chunks)
2073
+
2074
+
2075
+ def _truncate_shell_outputs(
2076
+ outputs: Sequence[ShellCommandOutput], max_length: int
2077
+ ) -> list[ShellCommandOutput]:
2078
+ if max_length <= 0:
2079
+ return [
2080
+ ShellCommandOutput(
2081
+ stdout="",
2082
+ stderr="",
2083
+ outcome=output.outcome,
2084
+ command=output.command,
2085
+ provider_data=output.provider_data,
2086
+ )
2087
+ for output in outputs
2088
+ ]
2089
+
2090
+ remaining = max_length
2091
+ truncated: list[ShellCommandOutput] = []
2092
+ for output in outputs:
2093
+ stdout = ""
2094
+ stderr = ""
2095
+ if remaining > 0 and output.stdout:
2096
+ stdout = output.stdout[:remaining]
2097
+ remaining -= len(stdout)
2098
+ if remaining > 0 and output.stderr:
2099
+ stderr = output.stderr[:remaining]
2100
+ remaining -= len(stderr)
2101
+ truncated.append(
2102
+ ShellCommandOutput(
2103
+ stdout=stdout,
2104
+ stderr=stderr,
2105
+ outcome=output.outcome,
2106
+ command=output.command,
2107
+ provider_data=output.provider_data,
2108
+ )
1192
2109
  )
2110
+
2111
+ return truncated
2112
+
2113
+
2114
+ def _normalize_max_output_length(value: int | None) -> int | None:
2115
+ if value is None:
2116
+ return None
2117
+ return max(0, value)
2118
+
2119
+
2120
+ def _format_shell_error(error: Exception | BaseException | Any) -> str:
2121
+ if isinstance(error, Exception):
2122
+ message = str(error)
2123
+ return message or error.__class__.__name__
2124
+ try:
2125
+ return str(error)
2126
+ except Exception: # pragma: no cover - fallback only
2127
+ return repr(error)
2128
+
2129
+
2130
+ def _get_mapping_or_attr(target: Any, key: str) -> Any:
2131
+ if isinstance(target, Mapping):
2132
+ return target.get(key)
2133
+ return getattr(target, key, None)
2134
+
2135
+
2136
+ def _extract_shell_call_id(tool_call: Any) -> str:
2137
+ value = _get_mapping_or_attr(tool_call, "call_id")
2138
+ if not value:
2139
+ value = _get_mapping_or_attr(tool_call, "callId")
2140
+ if not value:
2141
+ raise ModelBehaviorError("Shell call is missing call_id.")
2142
+ return str(value)
2143
+
2144
+
2145
+ def _coerce_shell_call(tool_call: Any) -> ShellCallData:
2146
+ call_id = _extract_shell_call_id(tool_call)
2147
+ action_payload = _get_mapping_or_attr(tool_call, "action")
2148
+ if action_payload is None:
2149
+ raise ModelBehaviorError("Shell call is missing an action payload.")
2150
+
2151
+ commands_value = _get_mapping_or_attr(action_payload, "commands")
2152
+ if not isinstance(commands_value, Sequence):
2153
+ raise ModelBehaviorError("Shell call action is missing commands.")
2154
+ commands: list[str] = []
2155
+ for entry in commands_value:
2156
+ if entry is None:
2157
+ continue
2158
+ commands.append(str(entry))
2159
+ if not commands:
2160
+ raise ModelBehaviorError("Shell call action must include at least one command.")
2161
+
2162
+ timeout_value = (
2163
+ _get_mapping_or_attr(action_payload, "timeout_ms")
2164
+ or _get_mapping_or_attr(action_payload, "timeoutMs")
2165
+ or _get_mapping_or_attr(action_payload, "timeout")
2166
+ )
2167
+ timeout_ms = int(timeout_value) if isinstance(timeout_value, (int, float)) else None
2168
+
2169
+ max_length_value = _get_mapping_or_attr(action_payload, "max_output_length")
2170
+ if max_length_value is None:
2171
+ max_length_value = _get_mapping_or_attr(action_payload, "maxOutputLength")
2172
+ max_output_length = (
2173
+ int(max_length_value) if isinstance(max_length_value, (int, float)) else None
2174
+ )
2175
+
2176
+ action = ShellActionRequest(
2177
+ commands=commands,
2178
+ timeout_ms=timeout_ms,
2179
+ max_output_length=max_output_length,
2180
+ )
2181
+
2182
+ status_value = _get_mapping_or_attr(tool_call, "status")
2183
+ status_literal: Literal["in_progress", "completed"] | None = None
2184
+ if isinstance(status_value, str):
2185
+ lowered = status_value.lower()
2186
+ if lowered in {"in_progress", "completed"}:
2187
+ status_literal = cast(Literal["in_progress", "completed"], lowered)
2188
+
2189
+ return ShellCallData(call_id=call_id, action=action, status=status_literal, raw=tool_call)
2190
+
2191
+
2192
+ def _parse_apply_patch_custom_input(input_json: str) -> dict[str, Any]:
2193
+ try:
2194
+ parsed = json.loads(input_json or "{}")
2195
+ except json.JSONDecodeError as exc:
2196
+ raise ModelBehaviorError(f"Invalid apply_patch input JSON: {exc}") from exc
2197
+ if not isinstance(parsed, Mapping):
2198
+ raise ModelBehaviorError("Apply patch input must be a JSON object.")
2199
+ return dict(parsed)
2200
+
2201
+
2202
+ def _parse_apply_patch_function_args(arguments: str) -> dict[str, Any]:
2203
+ try:
2204
+ parsed = json.loads(arguments or "{}")
2205
+ except json.JSONDecodeError as exc:
2206
+ raise ModelBehaviorError(f"Invalid apply_patch arguments JSON: {exc}") from exc
2207
+ if not isinstance(parsed, Mapping):
2208
+ raise ModelBehaviorError("Apply patch arguments must be a JSON object.")
2209
+ return dict(parsed)
2210
+
2211
+
2212
+ def _extract_apply_patch_call_id(tool_call: Any) -> str:
2213
+ value = _get_mapping_or_attr(tool_call, "call_id")
2214
+ if not value:
2215
+ value = _get_mapping_or_attr(tool_call, "callId")
2216
+ if not value:
2217
+ raise ModelBehaviorError("Apply patch call is missing call_id.")
2218
+ return str(value)
2219
+
2220
+
2221
+ def _coerce_apply_patch_operation(
2222
+ tool_call: Any, *, context_wrapper: RunContextWrapper[Any]
2223
+ ) -> ApplyPatchOperation:
2224
+ raw_operation = _get_mapping_or_attr(tool_call, "operation")
2225
+ if raw_operation is None:
2226
+ raise ModelBehaviorError("Apply patch call is missing an operation payload.")
2227
+
2228
+ op_type_value = str(_get_mapping_or_attr(raw_operation, "type"))
2229
+ if op_type_value not in {"create_file", "update_file", "delete_file"}:
2230
+ raise ModelBehaviorError(f"Unknown apply_patch operation: {op_type_value}")
2231
+ op_type_literal = cast(Literal["create_file", "update_file", "delete_file"], op_type_value)
2232
+
2233
+ path = _get_mapping_or_attr(raw_operation, "path")
2234
+ if not isinstance(path, str) or not path:
2235
+ raise ModelBehaviorError("Apply patch operation is missing a valid path.")
2236
+
2237
+ diff_value = _get_mapping_or_attr(raw_operation, "diff")
2238
+ if op_type_literal in {"create_file", "update_file"}:
2239
+ if not isinstance(diff_value, str) or not diff_value:
2240
+ raise ModelBehaviorError(
2241
+ f"Apply patch operation {op_type_literal} is missing the required diff payload."
2242
+ )
2243
+ diff: str | None = diff_value
2244
+ else:
2245
+ diff = None
2246
+
2247
+ return ApplyPatchOperation(
2248
+ type=op_type_literal,
2249
+ path=str(path),
2250
+ diff=diff,
2251
+ ctx_wrapper=context_wrapper,
2252
+ )
2253
+
2254
+
2255
+ def _normalize_apply_patch_result(
2256
+ result: ApplyPatchResult | Mapping[str, Any] | str | None,
2257
+ ) -> ApplyPatchResult | None:
2258
+ if result is None:
2259
+ return None
2260
+ if isinstance(result, ApplyPatchResult):
2261
+ return result
2262
+ if isinstance(result, Mapping):
2263
+ status = result.get("status")
2264
+ output = result.get("output")
2265
+ normalized_status = status if status in {"completed", "failed"} else None
2266
+ normalized_output = str(output) if output is not None else None
2267
+ return ApplyPatchResult(status=normalized_status, output=normalized_output)
2268
+ if isinstance(result, str):
2269
+ return ApplyPatchResult(output=result)
2270
+ return ApplyPatchResult(output=str(result))
2271
+
2272
+
2273
+ def _is_apply_patch_name(name: str | None, tool: ApplyPatchTool | None) -> bool:
2274
+ if not name:
2275
+ return False
2276
+ candidate = name.strip().lower()
2277
+ if candidate.startswith("apply_patch"):
2278
+ return True
2279
+ if tool and candidate == tool.name.strip().lower():
2280
+ return True
2281
+ return False
2282
+
2283
+
2284
+ def _build_litellm_json_tool_call(output: ResponseFunctionToolCall) -> FunctionTool:
2285
+ async def on_invoke_tool(_ctx: ToolContext[Any], value: Any) -> Any:
2286
+ if isinstance(value, str):
2287
+ import json
2288
+
2289
+ return json.loads(value)
2290
+ return value
2291
+
2292
+ return FunctionTool(
2293
+ name=output.name,
2294
+ description=output.name,
2295
+ params_json_schema={},
2296
+ on_invoke_tool=on_invoke_tool,
2297
+ strict_json_schema=True,
2298
+ is_enabled=True,
2299
+ )