openai-agents 0.2.6__py3-none-any.whl → 0.6.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agents/__init__.py +105 -4
- agents/_debug.py +15 -4
- agents/_run_impl.py +1203 -96
- agents/agent.py +294 -21
- agents/apply_diff.py +329 -0
- agents/editor.py +47 -0
- agents/exceptions.py +35 -0
- agents/extensions/experimental/__init__.py +6 -0
- agents/extensions/experimental/codex/__init__.py +92 -0
- agents/extensions/experimental/codex/codex.py +89 -0
- agents/extensions/experimental/codex/codex_options.py +35 -0
- agents/extensions/experimental/codex/codex_tool.py +1142 -0
- agents/extensions/experimental/codex/events.py +162 -0
- agents/extensions/experimental/codex/exec.py +263 -0
- agents/extensions/experimental/codex/items.py +245 -0
- agents/extensions/experimental/codex/output_schema_file.py +50 -0
- agents/extensions/experimental/codex/payloads.py +31 -0
- agents/extensions/experimental/codex/thread.py +214 -0
- agents/extensions/experimental/codex/thread_options.py +54 -0
- agents/extensions/experimental/codex/turn_options.py +36 -0
- agents/extensions/handoff_filters.py +13 -1
- agents/extensions/memory/__init__.py +120 -0
- agents/extensions/memory/advanced_sqlite_session.py +1285 -0
- agents/extensions/memory/async_sqlite_session.py +239 -0
- agents/extensions/memory/dapr_session.py +423 -0
- agents/extensions/memory/encrypt_session.py +185 -0
- agents/extensions/memory/redis_session.py +261 -0
- agents/extensions/memory/sqlalchemy_session.py +334 -0
- agents/extensions/models/litellm_model.py +449 -36
- agents/extensions/models/litellm_provider.py +3 -1
- agents/function_schema.py +47 -5
- agents/guardrail.py +16 -2
- agents/{handoffs.py → handoffs/__init__.py} +89 -47
- agents/handoffs/history.py +268 -0
- agents/items.py +238 -13
- agents/lifecycle.py +75 -14
- agents/mcp/server.py +280 -37
- agents/mcp/util.py +24 -3
- agents/memory/__init__.py +22 -2
- agents/memory/openai_conversations_session.py +91 -0
- agents/memory/openai_responses_compaction_session.py +249 -0
- agents/memory/session.py +19 -261
- agents/memory/sqlite_session.py +275 -0
- agents/memory/util.py +20 -0
- agents/model_settings.py +18 -3
- agents/models/__init__.py +13 -0
- agents/models/chatcmpl_converter.py +303 -50
- agents/models/chatcmpl_helpers.py +63 -0
- agents/models/chatcmpl_stream_handler.py +290 -68
- agents/models/default_models.py +58 -0
- agents/models/interface.py +4 -0
- agents/models/openai_chatcompletions.py +103 -48
- agents/models/openai_provider.py +10 -4
- agents/models/openai_responses.py +167 -46
- agents/realtime/__init__.py +4 -0
- agents/realtime/_util.py +14 -3
- agents/realtime/agent.py +7 -0
- agents/realtime/audio_formats.py +53 -0
- agents/realtime/config.py +78 -10
- agents/realtime/events.py +18 -0
- agents/realtime/handoffs.py +2 -2
- agents/realtime/items.py +17 -1
- agents/realtime/model.py +13 -0
- agents/realtime/model_events.py +12 -0
- agents/realtime/model_inputs.py +18 -1
- agents/realtime/openai_realtime.py +700 -151
- agents/realtime/session.py +309 -32
- agents/repl.py +7 -3
- agents/result.py +197 -38
- agents/run.py +1053 -178
- agents/run_context.py +13 -2
- agents/stream_events.py +1 -0
- agents/strict_schema.py +14 -0
- agents/tool.py +413 -15
- agents/tool_context.py +22 -1
- agents/tool_guardrails.py +279 -0
- agents/tracing/__init__.py +2 -0
- agents/tracing/config.py +9 -0
- agents/tracing/create.py +4 -0
- agents/tracing/processor_interface.py +84 -11
- agents/tracing/processors.py +65 -54
- agents/tracing/provider.py +64 -7
- agents/tracing/spans.py +105 -0
- agents/tracing/traces.py +116 -16
- agents/usage.py +134 -12
- agents/util/_json.py +19 -1
- agents/util/_transforms.py +12 -2
- agents/voice/input.py +5 -4
- agents/voice/models/openai_stt.py +17 -9
- agents/voice/pipeline.py +2 -0
- agents/voice/pipeline_config.py +4 -0
- {openai_agents-0.2.6.dist-info → openai_agents-0.6.8.dist-info}/METADATA +44 -19
- openai_agents-0.6.8.dist-info/RECORD +134 -0
- {openai_agents-0.2.6.dist-info → openai_agents-0.6.8.dist-info}/WHEEL +1 -1
- openai_agents-0.2.6.dist-info/RECORD +0 -103
- {openai_agents-0.2.6.dist-info → openai_agents-0.6.8.dist-info}/licenses/LICENSE +0 -0
agents/_run_impl.py
CHANGED
|
@@ -3,12 +3,14 @@ from __future__ import annotations
|
|
|
3
3
|
import asyncio
|
|
4
4
|
import dataclasses
|
|
5
5
|
import inspect
|
|
6
|
-
|
|
6
|
+
import json
|
|
7
|
+
from collections.abc import Awaitable, Mapping, Sequence
|
|
7
8
|
from dataclasses import dataclass, field
|
|
8
|
-
from typing import TYPE_CHECKING, Any, cast
|
|
9
|
+
from typing import TYPE_CHECKING, Any, Literal, Optional, cast
|
|
9
10
|
|
|
10
11
|
from openai.types.responses import (
|
|
11
12
|
ResponseComputerToolCall,
|
|
13
|
+
ResponseCustomToolCall,
|
|
12
14
|
ResponseFileSearchToolCall,
|
|
13
15
|
ResponseFunctionToolCall,
|
|
14
16
|
ResponseFunctionWebSearch,
|
|
@@ -44,10 +46,18 @@ from openai.types.responses.response_reasoning_item import ResponseReasoningItem
|
|
|
44
46
|
from .agent import Agent, ToolsToFinalOutputResult
|
|
45
47
|
from .agent_output import AgentOutputSchemaBase
|
|
46
48
|
from .computer import AsyncComputer, Computer
|
|
47
|
-
from .
|
|
49
|
+
from .editor import ApplyPatchOperation, ApplyPatchResult
|
|
50
|
+
from .exceptions import (
|
|
51
|
+
AgentsException,
|
|
52
|
+
ModelBehaviorError,
|
|
53
|
+
ToolInputGuardrailTripwireTriggered,
|
|
54
|
+
ToolOutputGuardrailTripwireTriggered,
|
|
55
|
+
UserError,
|
|
56
|
+
)
|
|
48
57
|
from .guardrail import InputGuardrail, InputGuardrailResult, OutputGuardrail, OutputGuardrailResult
|
|
49
|
-
from .handoffs import Handoff, HandoffInputData
|
|
58
|
+
from .handoffs import Handoff, HandoffInputData, nest_handoff_history
|
|
50
59
|
from .items import (
|
|
60
|
+
CompactionItem,
|
|
51
61
|
HandoffCallItem,
|
|
52
62
|
HandoffOutputItem,
|
|
53
63
|
ItemHelpers,
|
|
@@ -66,9 +76,10 @@ from .lifecycle import RunHooks
|
|
|
66
76
|
from .logger import logger
|
|
67
77
|
from .model_settings import ModelSettings
|
|
68
78
|
from .models.interface import ModelTracing
|
|
69
|
-
from .run_context import RunContextWrapper, TContext
|
|
79
|
+
from .run_context import AgentHookContext, RunContextWrapper, TContext
|
|
70
80
|
from .stream_events import RunItemStreamEvent, StreamEvent
|
|
71
81
|
from .tool import (
|
|
82
|
+
ApplyPatchTool,
|
|
72
83
|
ComputerTool,
|
|
73
84
|
ComputerToolSafetyCheckData,
|
|
74
85
|
FunctionTool,
|
|
@@ -77,12 +88,27 @@ from .tool import (
|
|
|
77
88
|
LocalShellCommandRequest,
|
|
78
89
|
LocalShellTool,
|
|
79
90
|
MCPToolApprovalRequest,
|
|
91
|
+
ShellActionRequest,
|
|
92
|
+
ShellCallData,
|
|
93
|
+
ShellCallOutcome,
|
|
94
|
+
ShellCommandOutput,
|
|
95
|
+
ShellCommandRequest,
|
|
96
|
+
ShellResult,
|
|
97
|
+
ShellTool,
|
|
80
98
|
Tool,
|
|
99
|
+
resolve_computer,
|
|
81
100
|
)
|
|
82
101
|
from .tool_context import ToolContext
|
|
102
|
+
from .tool_guardrails import (
|
|
103
|
+
ToolInputGuardrailData,
|
|
104
|
+
ToolInputGuardrailResult,
|
|
105
|
+
ToolOutputGuardrailData,
|
|
106
|
+
ToolOutputGuardrailResult,
|
|
107
|
+
)
|
|
83
108
|
from .tracing import (
|
|
84
109
|
SpanError,
|
|
85
110
|
Trace,
|
|
111
|
+
TracingConfig,
|
|
86
112
|
function_span,
|
|
87
113
|
get_current_trace,
|
|
88
114
|
guardrail_span,
|
|
@@ -136,7 +162,7 @@ class ToolRunFunction:
|
|
|
136
162
|
@dataclass
|
|
137
163
|
class ToolRunComputerAction:
|
|
138
164
|
tool_call: ResponseComputerToolCall
|
|
139
|
-
computer_tool: ComputerTool
|
|
165
|
+
computer_tool: ComputerTool[Any]
|
|
140
166
|
|
|
141
167
|
|
|
142
168
|
@dataclass
|
|
@@ -151,6 +177,18 @@ class ToolRunLocalShellCall:
|
|
|
151
177
|
local_shell_tool: LocalShellTool
|
|
152
178
|
|
|
153
179
|
|
|
180
|
+
@dataclass
|
|
181
|
+
class ToolRunShellCall:
|
|
182
|
+
tool_call: Any
|
|
183
|
+
shell_tool: ShellTool
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
@dataclass
|
|
187
|
+
class ToolRunApplyPatchCall:
|
|
188
|
+
tool_call: Any
|
|
189
|
+
apply_patch_tool: ApplyPatchTool
|
|
190
|
+
|
|
191
|
+
|
|
154
192
|
@dataclass
|
|
155
193
|
class ProcessedResponse:
|
|
156
194
|
new_items: list[RunItem]
|
|
@@ -158,6 +196,8 @@ class ProcessedResponse:
|
|
|
158
196
|
functions: list[ToolRunFunction]
|
|
159
197
|
computer_actions: list[ToolRunComputerAction]
|
|
160
198
|
local_shell_calls: list[ToolRunLocalShellCall]
|
|
199
|
+
shell_calls: list[ToolRunShellCall]
|
|
200
|
+
apply_patch_calls: list[ToolRunApplyPatchCall]
|
|
161
201
|
tools_used: list[str] # Names of all tools used, including hosted tools
|
|
162
202
|
mcp_approval_requests: list[ToolRunMCPApprovalRequest] # Only requests with callbacks
|
|
163
203
|
|
|
@@ -170,6 +210,8 @@ class ProcessedResponse:
|
|
|
170
210
|
self.functions,
|
|
171
211
|
self.computer_actions,
|
|
172
212
|
self.local_shell_calls,
|
|
213
|
+
self.shell_calls,
|
|
214
|
+
self.apply_patch_calls,
|
|
173
215
|
self.mcp_approval_requests,
|
|
174
216
|
]
|
|
175
217
|
)
|
|
@@ -203,16 +245,30 @@ class SingleStepResult:
|
|
|
203
245
|
"""Items generated before the current step."""
|
|
204
246
|
|
|
205
247
|
new_step_items: list[RunItem]
|
|
206
|
-
"""Items generated during this current step.
|
|
248
|
+
"""Items generated during this current step. May be filtered during handoffs to avoid
|
|
249
|
+
duplication in model input."""
|
|
207
250
|
|
|
208
251
|
next_step: NextStepHandoff | NextStepFinalOutput | NextStepRunAgain
|
|
209
252
|
"""The next step to take."""
|
|
210
253
|
|
|
254
|
+
tool_input_guardrail_results: list[ToolInputGuardrailResult]
|
|
255
|
+
"""Tool input guardrail results from this step."""
|
|
256
|
+
|
|
257
|
+
tool_output_guardrail_results: list[ToolOutputGuardrailResult]
|
|
258
|
+
"""Tool output guardrail results from this step."""
|
|
259
|
+
|
|
260
|
+
session_step_items: list[RunItem] | None = None
|
|
261
|
+
"""Full unfiltered items for session history. When set, these are used instead of
|
|
262
|
+
new_step_items for session saving and generated_items property."""
|
|
263
|
+
|
|
211
264
|
@property
|
|
212
265
|
def generated_items(self) -> list[RunItem]:
|
|
213
266
|
"""Items generated during the agent run (i.e. everything generated after
|
|
214
|
-
`original_input`)."""
|
|
215
|
-
|
|
267
|
+
`original_input`). Uses session_step_items when available for full observability."""
|
|
268
|
+
items = (
|
|
269
|
+
self.session_step_items if self.session_step_items is not None else self.new_step_items
|
|
270
|
+
)
|
|
271
|
+
return self.pre_step_items + items
|
|
216
272
|
|
|
217
273
|
|
|
218
274
|
def get_model_tracing_impl(
|
|
@@ -249,8 +305,15 @@ class RunImpl:
|
|
|
249
305
|
new_step_items: list[RunItem] = []
|
|
250
306
|
new_step_items.extend(processed_response.new_items)
|
|
251
307
|
|
|
252
|
-
# First,
|
|
253
|
-
|
|
308
|
+
# First, run function tools, computer actions, shell calls, apply_patch calls,
|
|
309
|
+
# and legacy local shell calls.
|
|
310
|
+
(
|
|
311
|
+
(function_results, tool_input_guardrail_results, tool_output_guardrail_results),
|
|
312
|
+
computer_results,
|
|
313
|
+
shell_results,
|
|
314
|
+
apply_patch_results,
|
|
315
|
+
local_shell_results,
|
|
316
|
+
) = await asyncio.gather(
|
|
254
317
|
cls.execute_function_tool_calls(
|
|
255
318
|
agent=agent,
|
|
256
319
|
tool_runs=processed_response.functions,
|
|
@@ -265,9 +328,33 @@ class RunImpl:
|
|
|
265
328
|
context_wrapper=context_wrapper,
|
|
266
329
|
config=run_config,
|
|
267
330
|
),
|
|
331
|
+
cls.execute_shell_calls(
|
|
332
|
+
agent=agent,
|
|
333
|
+
calls=processed_response.shell_calls,
|
|
334
|
+
hooks=hooks,
|
|
335
|
+
context_wrapper=context_wrapper,
|
|
336
|
+
config=run_config,
|
|
337
|
+
),
|
|
338
|
+
cls.execute_apply_patch_calls(
|
|
339
|
+
agent=agent,
|
|
340
|
+
calls=processed_response.apply_patch_calls,
|
|
341
|
+
hooks=hooks,
|
|
342
|
+
context_wrapper=context_wrapper,
|
|
343
|
+
config=run_config,
|
|
344
|
+
),
|
|
345
|
+
cls.execute_local_shell_calls(
|
|
346
|
+
agent=agent,
|
|
347
|
+
calls=processed_response.local_shell_calls,
|
|
348
|
+
hooks=hooks,
|
|
349
|
+
context_wrapper=context_wrapper,
|
|
350
|
+
config=run_config,
|
|
351
|
+
),
|
|
268
352
|
)
|
|
269
353
|
new_step_items.extend([result.run_item for result in function_results])
|
|
270
354
|
new_step_items.extend(computer_results)
|
|
355
|
+
new_step_items.extend(shell_results)
|
|
356
|
+
new_step_items.extend(apply_patch_results)
|
|
357
|
+
new_step_items.extend(local_shell_results)
|
|
271
358
|
|
|
272
359
|
# Next, run the MCP approval requests
|
|
273
360
|
if processed_response.mcp_approval_requests:
|
|
@@ -320,6 +407,8 @@ class RunImpl:
|
|
|
320
407
|
final_output=check_tool_use.final_output,
|
|
321
408
|
hooks=hooks,
|
|
322
409
|
context_wrapper=context_wrapper,
|
|
410
|
+
tool_input_guardrail_results=tool_input_guardrail_results,
|
|
411
|
+
tool_output_guardrail_results=tool_output_guardrail_results,
|
|
323
412
|
)
|
|
324
413
|
|
|
325
414
|
# Now we can check if the model also produced a final output
|
|
@@ -330,43 +419,46 @@ class RunImpl:
|
|
|
330
419
|
ItemHelpers.extract_last_text(message_items[-1].raw_item) if message_items else None
|
|
331
420
|
)
|
|
332
421
|
|
|
333
|
-
#
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
not output_schema or output_schema.is_plain_text()
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
)
|
|
422
|
+
# Generate final output only when there are no pending tool calls or approval requests.
|
|
423
|
+
if not processed_response.has_tools_or_approvals_to_run():
|
|
424
|
+
if output_schema and not output_schema.is_plain_text() and potential_final_output_text:
|
|
425
|
+
final_output = output_schema.validate_json(potential_final_output_text)
|
|
426
|
+
return await cls.execute_final_output(
|
|
427
|
+
agent=agent,
|
|
428
|
+
original_input=original_input,
|
|
429
|
+
new_response=new_response,
|
|
430
|
+
pre_step_items=pre_step_items,
|
|
431
|
+
new_step_items=new_step_items,
|
|
432
|
+
final_output=final_output,
|
|
433
|
+
hooks=hooks,
|
|
434
|
+
context_wrapper=context_wrapper,
|
|
435
|
+
tool_input_guardrail_results=tool_input_guardrail_results,
|
|
436
|
+
tool_output_guardrail_results=tool_output_guardrail_results,
|
|
437
|
+
)
|
|
438
|
+
elif not output_schema or output_schema.is_plain_text():
|
|
439
|
+
return await cls.execute_final_output(
|
|
440
|
+
agent=agent,
|
|
441
|
+
original_input=original_input,
|
|
442
|
+
new_response=new_response,
|
|
443
|
+
pre_step_items=pre_step_items,
|
|
444
|
+
new_step_items=new_step_items,
|
|
445
|
+
final_output=potential_final_output_text or "",
|
|
446
|
+
hooks=hooks,
|
|
447
|
+
context_wrapper=context_wrapper,
|
|
448
|
+
tool_input_guardrail_results=tool_input_guardrail_results,
|
|
449
|
+
tool_output_guardrail_results=tool_output_guardrail_results,
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
# If there's no final output, we can just run again
|
|
453
|
+
return SingleStepResult(
|
|
454
|
+
original_input=original_input,
|
|
455
|
+
model_response=new_response,
|
|
456
|
+
pre_step_items=pre_step_items,
|
|
457
|
+
new_step_items=new_step_items,
|
|
458
|
+
next_step=NextStepRunAgain(),
|
|
459
|
+
tool_input_guardrail_results=tool_input_guardrail_results,
|
|
460
|
+
tool_output_guardrail_results=tool_output_guardrail_results,
|
|
461
|
+
)
|
|
370
462
|
|
|
371
463
|
@classmethod
|
|
372
464
|
def maybe_reset_tool_choice(
|
|
@@ -380,6 +472,22 @@ class RunImpl:
|
|
|
380
472
|
|
|
381
473
|
return model_settings
|
|
382
474
|
|
|
475
|
+
@classmethod
|
|
476
|
+
async def initialize_computer_tools(
|
|
477
|
+
cls,
|
|
478
|
+
*,
|
|
479
|
+
tools: list[Tool],
|
|
480
|
+
context_wrapper: RunContextWrapper[TContext],
|
|
481
|
+
) -> None:
|
|
482
|
+
"""Resolve computer tools ahead of model invocation so each run gets its own instance."""
|
|
483
|
+
computer_tools = [tool for tool in tools if isinstance(tool, ComputerTool)]
|
|
484
|
+
if not computer_tools:
|
|
485
|
+
return
|
|
486
|
+
|
|
487
|
+
await asyncio.gather(
|
|
488
|
+
*(resolve_computer(tool=tool, run_context=context_wrapper) for tool in computer_tools)
|
|
489
|
+
)
|
|
490
|
+
|
|
383
491
|
@classmethod
|
|
384
492
|
def process_model_response(
|
|
385
493
|
cls,
|
|
@@ -396,6 +504,8 @@ class RunImpl:
|
|
|
396
504
|
functions = []
|
|
397
505
|
computer_actions = []
|
|
398
506
|
local_shell_calls = []
|
|
507
|
+
shell_calls = []
|
|
508
|
+
apply_patch_calls = []
|
|
399
509
|
mcp_approval_requests = []
|
|
400
510
|
tools_used: list[str] = []
|
|
401
511
|
handoff_map = {handoff.tool_name: handoff for handoff in handoffs}
|
|
@@ -404,6 +514,10 @@ class RunImpl:
|
|
|
404
514
|
local_shell_tool = next(
|
|
405
515
|
(tool for tool in all_tools if isinstance(tool, LocalShellTool)), None
|
|
406
516
|
)
|
|
517
|
+
shell_tool = next((tool for tool in all_tools if isinstance(tool, ShellTool)), None)
|
|
518
|
+
apply_patch_tool = next(
|
|
519
|
+
(tool for tool in all_tools if isinstance(tool, ApplyPatchTool)), None
|
|
520
|
+
)
|
|
407
521
|
hosted_mcp_server_map = {
|
|
408
522
|
tool.tool_config["server_label"]: tool
|
|
409
523
|
for tool in all_tools
|
|
@@ -411,6 +525,59 @@ class RunImpl:
|
|
|
411
525
|
}
|
|
412
526
|
|
|
413
527
|
for output in response.output:
|
|
528
|
+
output_type = _get_mapping_or_attr(output, "type")
|
|
529
|
+
logger.debug(
|
|
530
|
+
"Processing output item type=%s class=%s",
|
|
531
|
+
output_type,
|
|
532
|
+
output.__class__.__name__ if hasattr(output, "__class__") else type(output),
|
|
533
|
+
)
|
|
534
|
+
if output_type == "shell_call":
|
|
535
|
+
items.append(ToolCallItem(raw_item=cast(Any, output), agent=agent))
|
|
536
|
+
if not shell_tool:
|
|
537
|
+
tools_used.append("shell")
|
|
538
|
+
_error_tracing.attach_error_to_current_span(
|
|
539
|
+
SpanError(
|
|
540
|
+
message="Shell tool not found",
|
|
541
|
+
data={},
|
|
542
|
+
)
|
|
543
|
+
)
|
|
544
|
+
raise ModelBehaviorError("Model produced shell call without a shell tool.")
|
|
545
|
+
tools_used.append(shell_tool.name)
|
|
546
|
+
call_identifier = _get_mapping_or_attr(output, "call_id") or _get_mapping_or_attr(
|
|
547
|
+
output, "callId"
|
|
548
|
+
)
|
|
549
|
+
logger.debug("Queuing shell_call %s", call_identifier)
|
|
550
|
+
shell_calls.append(ToolRunShellCall(tool_call=output, shell_tool=shell_tool))
|
|
551
|
+
continue
|
|
552
|
+
if output_type == "compaction":
|
|
553
|
+
items.append(CompactionItem(raw_item=cast(TResponseInputItem, output), agent=agent))
|
|
554
|
+
continue
|
|
555
|
+
if output_type == "apply_patch_call":
|
|
556
|
+
items.append(ToolCallItem(raw_item=cast(Any, output), agent=agent))
|
|
557
|
+
if apply_patch_tool:
|
|
558
|
+
tools_used.append(apply_patch_tool.name)
|
|
559
|
+
call_identifier = _get_mapping_or_attr(output, "call_id")
|
|
560
|
+
if not call_identifier:
|
|
561
|
+
call_identifier = _get_mapping_or_attr(output, "callId")
|
|
562
|
+
logger.debug("Queuing apply_patch_call %s", call_identifier)
|
|
563
|
+
apply_patch_calls.append(
|
|
564
|
+
ToolRunApplyPatchCall(
|
|
565
|
+
tool_call=output,
|
|
566
|
+
apply_patch_tool=apply_patch_tool,
|
|
567
|
+
)
|
|
568
|
+
)
|
|
569
|
+
else:
|
|
570
|
+
tools_used.append("apply_patch")
|
|
571
|
+
_error_tracing.attach_error_to_current_span(
|
|
572
|
+
SpanError(
|
|
573
|
+
message="Apply patch tool not found",
|
|
574
|
+
data={},
|
|
575
|
+
)
|
|
576
|
+
)
|
|
577
|
+
raise ModelBehaviorError(
|
|
578
|
+
"Model produced apply_patch call without an apply_patch tool."
|
|
579
|
+
)
|
|
580
|
+
continue
|
|
414
581
|
if isinstance(output, ResponseOutputMessage):
|
|
415
582
|
items.append(MessageOutputItem(raw_item=output, agent=agent))
|
|
416
583
|
elif isinstance(output, ResponseFileSearchToolCall):
|
|
@@ -473,20 +640,84 @@ class RunImpl:
|
|
|
473
640
|
tools_used.append("code_interpreter")
|
|
474
641
|
elif isinstance(output, LocalShellCall):
|
|
475
642
|
items.append(ToolCallItem(raw_item=output, agent=agent))
|
|
476
|
-
|
|
477
|
-
|
|
643
|
+
if shell_tool:
|
|
644
|
+
tools_used.append(shell_tool.name)
|
|
645
|
+
shell_calls.append(ToolRunShellCall(tool_call=output, shell_tool=shell_tool))
|
|
646
|
+
else:
|
|
647
|
+
tools_used.append("local_shell")
|
|
648
|
+
if not local_shell_tool:
|
|
649
|
+
_error_tracing.attach_error_to_current_span(
|
|
650
|
+
SpanError(
|
|
651
|
+
message="Local shell tool not found",
|
|
652
|
+
data={},
|
|
653
|
+
)
|
|
654
|
+
)
|
|
655
|
+
raise ModelBehaviorError(
|
|
656
|
+
"Model produced local shell call without a local shell tool."
|
|
657
|
+
)
|
|
658
|
+
local_shell_calls.append(
|
|
659
|
+
ToolRunLocalShellCall(tool_call=output, local_shell_tool=local_shell_tool)
|
|
660
|
+
)
|
|
661
|
+
elif isinstance(output, ResponseCustomToolCall) and _is_apply_patch_name(
|
|
662
|
+
output.name, apply_patch_tool
|
|
663
|
+
):
|
|
664
|
+
parsed_operation = _parse_apply_patch_custom_input(output.input)
|
|
665
|
+
pseudo_call = {
|
|
666
|
+
"type": "apply_patch_call",
|
|
667
|
+
"call_id": output.call_id,
|
|
668
|
+
"operation": parsed_operation,
|
|
669
|
+
}
|
|
670
|
+
items.append(ToolCallItem(raw_item=cast(Any, pseudo_call), agent=agent))
|
|
671
|
+
if apply_patch_tool:
|
|
672
|
+
tools_used.append(apply_patch_tool.name)
|
|
673
|
+
apply_patch_calls.append(
|
|
674
|
+
ToolRunApplyPatchCall(
|
|
675
|
+
tool_call=pseudo_call,
|
|
676
|
+
apply_patch_tool=apply_patch_tool,
|
|
677
|
+
)
|
|
678
|
+
)
|
|
679
|
+
else:
|
|
680
|
+
tools_used.append("apply_patch")
|
|
478
681
|
_error_tracing.attach_error_to_current_span(
|
|
479
682
|
SpanError(
|
|
480
|
-
message="
|
|
683
|
+
message="Apply patch tool not found",
|
|
481
684
|
data={},
|
|
482
685
|
)
|
|
483
686
|
)
|
|
484
687
|
raise ModelBehaviorError(
|
|
485
|
-
"Model produced
|
|
688
|
+
"Model produced apply_patch call without an apply_patch tool."
|
|
486
689
|
)
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
)
|
|
690
|
+
elif (
|
|
691
|
+
isinstance(output, ResponseFunctionToolCall)
|
|
692
|
+
and _is_apply_patch_name(output.name, apply_patch_tool)
|
|
693
|
+
and output.name not in function_map
|
|
694
|
+
):
|
|
695
|
+
parsed_operation = _parse_apply_patch_function_args(output.arguments)
|
|
696
|
+
pseudo_call = {
|
|
697
|
+
"type": "apply_patch_call",
|
|
698
|
+
"call_id": output.call_id,
|
|
699
|
+
"operation": parsed_operation,
|
|
700
|
+
}
|
|
701
|
+
items.append(ToolCallItem(raw_item=cast(Any, pseudo_call), agent=agent))
|
|
702
|
+
if apply_patch_tool:
|
|
703
|
+
tools_used.append(apply_patch_tool.name)
|
|
704
|
+
apply_patch_calls.append(
|
|
705
|
+
ToolRunApplyPatchCall(
|
|
706
|
+
tool_call=pseudo_call, apply_patch_tool=apply_patch_tool
|
|
707
|
+
)
|
|
708
|
+
)
|
|
709
|
+
else:
|
|
710
|
+
tools_used.append("apply_patch")
|
|
711
|
+
_error_tracing.attach_error_to_current_span(
|
|
712
|
+
SpanError(
|
|
713
|
+
message="Apply patch tool not found",
|
|
714
|
+
data={},
|
|
715
|
+
)
|
|
716
|
+
)
|
|
717
|
+
raise ModelBehaviorError(
|
|
718
|
+
"Model produced apply_patch call without an apply_patch tool."
|
|
719
|
+
)
|
|
720
|
+
continue
|
|
490
721
|
|
|
491
722
|
elif not isinstance(output, ResponseFunctionToolCall):
|
|
492
723
|
logger.warning(f"Unexpected output type, ignoring: {type(output)}")
|
|
@@ -509,13 +740,29 @@ class RunImpl:
|
|
|
509
740
|
# Regular function tool call
|
|
510
741
|
else:
|
|
511
742
|
if output.name not in function_map:
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
743
|
+
if output_schema is not None and output.name == "json_tool_call":
|
|
744
|
+
# LiteLLM could generate non-existent tool calls for structured outputs
|
|
745
|
+
items.append(ToolCallItem(raw_item=output, agent=agent))
|
|
746
|
+
functions.append(
|
|
747
|
+
ToolRunFunction(
|
|
748
|
+
tool_call=output,
|
|
749
|
+
# this tool does not exist in function_map, so generate ad-hoc one,
|
|
750
|
+
# which just parses the input if it's a string, and returns the
|
|
751
|
+
# value otherwise
|
|
752
|
+
function_tool=_build_litellm_json_tool_call(output),
|
|
753
|
+
)
|
|
516
754
|
)
|
|
517
|
-
|
|
518
|
-
|
|
755
|
+
continue
|
|
756
|
+
else:
|
|
757
|
+
_error_tracing.attach_error_to_current_span(
|
|
758
|
+
SpanError(
|
|
759
|
+
message="Tool not found",
|
|
760
|
+
data={"tool_name": output.name},
|
|
761
|
+
)
|
|
762
|
+
)
|
|
763
|
+
error = f"Tool {output.name} not found in agent {agent.name}"
|
|
764
|
+
raise ModelBehaviorError(error)
|
|
765
|
+
|
|
519
766
|
items.append(ToolCallItem(raw_item=output, agent=agent))
|
|
520
767
|
functions.append(
|
|
521
768
|
ToolRunFunction(
|
|
@@ -530,10 +777,161 @@ class RunImpl:
|
|
|
530
777
|
functions=functions,
|
|
531
778
|
computer_actions=computer_actions,
|
|
532
779
|
local_shell_calls=local_shell_calls,
|
|
780
|
+
shell_calls=shell_calls,
|
|
781
|
+
apply_patch_calls=apply_patch_calls,
|
|
533
782
|
tools_used=tools_used,
|
|
534
783
|
mcp_approval_requests=mcp_approval_requests,
|
|
535
784
|
)
|
|
536
785
|
|
|
786
|
+
@classmethod
|
|
787
|
+
async def _execute_input_guardrails(
|
|
788
|
+
cls,
|
|
789
|
+
*,
|
|
790
|
+
func_tool: FunctionTool,
|
|
791
|
+
tool_context: ToolContext[TContext],
|
|
792
|
+
agent: Agent[TContext],
|
|
793
|
+
tool_input_guardrail_results: list[ToolInputGuardrailResult],
|
|
794
|
+
) -> str | None:
|
|
795
|
+
"""Execute input guardrails for a tool.
|
|
796
|
+
|
|
797
|
+
Args:
|
|
798
|
+
func_tool: The function tool being executed.
|
|
799
|
+
tool_context: The tool execution context.
|
|
800
|
+
agent: The agent executing the tool.
|
|
801
|
+
tool_input_guardrail_results: List to append guardrail results to.
|
|
802
|
+
|
|
803
|
+
Returns:
|
|
804
|
+
None if tool execution should proceed, or a message string if execution should be
|
|
805
|
+
skipped.
|
|
806
|
+
|
|
807
|
+
Raises:
|
|
808
|
+
ToolInputGuardrailTripwireTriggered: If a guardrail triggers an exception.
|
|
809
|
+
"""
|
|
810
|
+
if not func_tool.tool_input_guardrails:
|
|
811
|
+
return None
|
|
812
|
+
|
|
813
|
+
for guardrail in func_tool.tool_input_guardrails:
|
|
814
|
+
gr_out = await guardrail.run(
|
|
815
|
+
ToolInputGuardrailData(
|
|
816
|
+
context=tool_context,
|
|
817
|
+
agent=agent,
|
|
818
|
+
)
|
|
819
|
+
)
|
|
820
|
+
|
|
821
|
+
# Store the guardrail result
|
|
822
|
+
tool_input_guardrail_results.append(
|
|
823
|
+
ToolInputGuardrailResult(
|
|
824
|
+
guardrail=guardrail,
|
|
825
|
+
output=gr_out,
|
|
826
|
+
)
|
|
827
|
+
)
|
|
828
|
+
|
|
829
|
+
# Handle different behavior types
|
|
830
|
+
if gr_out.behavior["type"] == "raise_exception":
|
|
831
|
+
raise ToolInputGuardrailTripwireTriggered(guardrail=guardrail, output=gr_out)
|
|
832
|
+
elif gr_out.behavior["type"] == "reject_content":
|
|
833
|
+
# Set final_result to the message and skip tool execution
|
|
834
|
+
return gr_out.behavior["message"]
|
|
835
|
+
elif gr_out.behavior["type"] == "allow":
|
|
836
|
+
# Continue to next guardrail or tool execution
|
|
837
|
+
continue
|
|
838
|
+
|
|
839
|
+
return None
|
|
840
|
+
|
|
841
|
+
@classmethod
|
|
842
|
+
async def _execute_output_guardrails(
|
|
843
|
+
cls,
|
|
844
|
+
*,
|
|
845
|
+
func_tool: FunctionTool,
|
|
846
|
+
tool_context: ToolContext[TContext],
|
|
847
|
+
agent: Agent[TContext],
|
|
848
|
+
real_result: Any,
|
|
849
|
+
tool_output_guardrail_results: list[ToolOutputGuardrailResult],
|
|
850
|
+
) -> Any:
|
|
851
|
+
"""Execute output guardrails for a tool.
|
|
852
|
+
|
|
853
|
+
Args:
|
|
854
|
+
func_tool: The function tool being executed.
|
|
855
|
+
tool_context: The tool execution context.
|
|
856
|
+
agent: The agent executing the tool.
|
|
857
|
+
real_result: The actual result from the tool execution.
|
|
858
|
+
tool_output_guardrail_results: List to append guardrail results to.
|
|
859
|
+
|
|
860
|
+
Returns:
|
|
861
|
+
The final result after guardrail processing (may be modified).
|
|
862
|
+
|
|
863
|
+
Raises:
|
|
864
|
+
ToolOutputGuardrailTripwireTriggered: If a guardrail triggers an exception.
|
|
865
|
+
"""
|
|
866
|
+
if not func_tool.tool_output_guardrails:
|
|
867
|
+
return real_result
|
|
868
|
+
|
|
869
|
+
final_result = real_result
|
|
870
|
+
for output_guardrail in func_tool.tool_output_guardrails:
|
|
871
|
+
gr_out = await output_guardrail.run(
|
|
872
|
+
ToolOutputGuardrailData(
|
|
873
|
+
context=tool_context,
|
|
874
|
+
agent=agent,
|
|
875
|
+
output=real_result,
|
|
876
|
+
)
|
|
877
|
+
)
|
|
878
|
+
|
|
879
|
+
# Store the guardrail result
|
|
880
|
+
tool_output_guardrail_results.append(
|
|
881
|
+
ToolOutputGuardrailResult(
|
|
882
|
+
guardrail=output_guardrail,
|
|
883
|
+
output=gr_out,
|
|
884
|
+
)
|
|
885
|
+
)
|
|
886
|
+
|
|
887
|
+
# Handle different behavior types
|
|
888
|
+
if gr_out.behavior["type"] == "raise_exception":
|
|
889
|
+
raise ToolOutputGuardrailTripwireTriggered(
|
|
890
|
+
guardrail=output_guardrail, output=gr_out
|
|
891
|
+
)
|
|
892
|
+
elif gr_out.behavior["type"] == "reject_content":
|
|
893
|
+
# Override the result with the guardrail message
|
|
894
|
+
final_result = gr_out.behavior["message"]
|
|
895
|
+
break
|
|
896
|
+
elif gr_out.behavior["type"] == "allow":
|
|
897
|
+
# Continue to next guardrail
|
|
898
|
+
continue
|
|
899
|
+
|
|
900
|
+
return final_result
|
|
901
|
+
|
|
902
|
+
@classmethod
|
|
903
|
+
async def _execute_tool_with_hooks(
|
|
904
|
+
cls,
|
|
905
|
+
*,
|
|
906
|
+
func_tool: FunctionTool,
|
|
907
|
+
tool_context: ToolContext[TContext],
|
|
908
|
+
agent: Agent[TContext],
|
|
909
|
+
hooks: RunHooks[TContext],
|
|
910
|
+
tool_call: ResponseFunctionToolCall,
|
|
911
|
+
) -> Any:
|
|
912
|
+
"""Execute the core tool function with before/after hooks.
|
|
913
|
+
|
|
914
|
+
Args:
|
|
915
|
+
func_tool: The function tool being executed.
|
|
916
|
+
tool_context: The tool execution context.
|
|
917
|
+
agent: The agent executing the tool.
|
|
918
|
+
hooks: The run hooks to execute.
|
|
919
|
+
tool_call: The tool call details.
|
|
920
|
+
|
|
921
|
+
Returns:
|
|
922
|
+
The result from the tool execution.
|
|
923
|
+
"""
|
|
924
|
+
await asyncio.gather(
|
|
925
|
+
hooks.on_tool_start(tool_context, agent, func_tool),
|
|
926
|
+
(
|
|
927
|
+
agent.hooks.on_tool_start(tool_context, agent, func_tool)
|
|
928
|
+
if agent.hooks
|
|
929
|
+
else _coro.noop_coroutine()
|
|
930
|
+
),
|
|
931
|
+
)
|
|
932
|
+
|
|
933
|
+
return await func_tool.on_invoke_tool(tool_context, tool_call.arguments)
|
|
934
|
+
|
|
537
935
|
@classmethod
|
|
538
936
|
async def execute_function_tool_calls(
|
|
539
937
|
cls,
|
|
@@ -543,7 +941,13 @@ class RunImpl:
|
|
|
543
941
|
hooks: RunHooks[TContext],
|
|
544
942
|
context_wrapper: RunContextWrapper[TContext],
|
|
545
943
|
config: RunConfig,
|
|
546
|
-
) ->
|
|
944
|
+
) -> tuple[
|
|
945
|
+
list[FunctionToolResult], list[ToolInputGuardrailResult], list[ToolOutputGuardrailResult]
|
|
946
|
+
]:
|
|
947
|
+
# Collect guardrail results
|
|
948
|
+
tool_input_guardrail_results: list[ToolInputGuardrailResult] = []
|
|
949
|
+
tool_output_guardrail_results: list[ToolOutputGuardrailResult] = []
|
|
950
|
+
|
|
547
951
|
async def run_single_tool(
|
|
548
952
|
func_tool: FunctionTool, tool_call: ResponseFunctionToolCall
|
|
549
953
|
) -> Any:
|
|
@@ -556,24 +960,48 @@ class RunImpl:
|
|
|
556
960
|
if config.trace_include_sensitive_data:
|
|
557
961
|
span_fn.span_data.input = tool_call.arguments
|
|
558
962
|
try:
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
),
|
|
566
|
-
func_tool.on_invoke_tool(tool_context, tool_call.arguments),
|
|
963
|
+
# 1) Run input tool guardrails, if any
|
|
964
|
+
rejected_message = await cls._execute_input_guardrails(
|
|
965
|
+
func_tool=func_tool,
|
|
966
|
+
tool_context=tool_context,
|
|
967
|
+
agent=agent,
|
|
968
|
+
tool_input_guardrail_results=tool_input_guardrail_results,
|
|
567
969
|
)
|
|
568
970
|
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
971
|
+
if rejected_message is not None:
|
|
972
|
+
# Input guardrail rejected the tool call
|
|
973
|
+
final_result = rejected_message
|
|
974
|
+
else:
|
|
975
|
+
# 2) Actually run the tool
|
|
976
|
+
real_result = await cls._execute_tool_with_hooks(
|
|
977
|
+
func_tool=func_tool,
|
|
978
|
+
tool_context=tool_context,
|
|
979
|
+
agent=agent,
|
|
980
|
+
hooks=hooks,
|
|
981
|
+
tool_call=tool_call,
|
|
982
|
+
)
|
|
983
|
+
|
|
984
|
+
# 3) Run output tool guardrails, if any
|
|
985
|
+
final_result = await cls._execute_output_guardrails(
|
|
986
|
+
func_tool=func_tool,
|
|
987
|
+
tool_context=tool_context,
|
|
988
|
+
agent=agent,
|
|
989
|
+
real_result=real_result,
|
|
990
|
+
tool_output_guardrail_results=tool_output_guardrail_results,
|
|
991
|
+
)
|
|
992
|
+
|
|
993
|
+
# 4) Tool end hooks (with final result, which may have been overridden)
|
|
994
|
+
await asyncio.gather(
|
|
995
|
+
hooks.on_tool_end(tool_context, agent, func_tool, final_result),
|
|
996
|
+
(
|
|
997
|
+
agent.hooks.on_tool_end(
|
|
998
|
+
tool_context, agent, func_tool, final_result
|
|
999
|
+
)
|
|
1000
|
+
if agent.hooks
|
|
1001
|
+
else _coro.noop_coroutine()
|
|
1002
|
+
),
|
|
1003
|
+
)
|
|
1004
|
+
result = final_result
|
|
577
1005
|
except Exception as e:
|
|
578
1006
|
_error_tracing.attach_error_to_current_span(
|
|
579
1007
|
SpanError(
|
|
@@ -596,19 +1024,21 @@ class RunImpl:
|
|
|
596
1024
|
|
|
597
1025
|
results = await asyncio.gather(*tasks)
|
|
598
1026
|
|
|
599
|
-
|
|
1027
|
+
function_tool_results = [
|
|
600
1028
|
FunctionToolResult(
|
|
601
1029
|
tool=tool_run.function_tool,
|
|
602
1030
|
output=result,
|
|
603
1031
|
run_item=ToolCallOutputItem(
|
|
604
1032
|
output=result,
|
|
605
|
-
raw_item=ItemHelpers.tool_call_output_item(tool_run.tool_call,
|
|
1033
|
+
raw_item=ItemHelpers.tool_call_output_item(tool_run.tool_call, result),
|
|
606
1034
|
agent=agent,
|
|
607
1035
|
),
|
|
608
1036
|
)
|
|
609
1037
|
for tool_run, result in zip(tool_runs, results)
|
|
610
1038
|
]
|
|
611
1039
|
|
|
1040
|
+
return function_tool_results, tool_input_guardrail_results, tool_output_guardrail_results
|
|
1041
|
+
|
|
612
1042
|
@classmethod
|
|
613
1043
|
async def execute_local_shell_calls(
|
|
614
1044
|
cls,
|
|
@@ -633,6 +1063,52 @@ class RunImpl:
|
|
|
633
1063
|
)
|
|
634
1064
|
return results
|
|
635
1065
|
|
|
1066
|
+
@classmethod
|
|
1067
|
+
async def execute_shell_calls(
|
|
1068
|
+
cls,
|
|
1069
|
+
*,
|
|
1070
|
+
agent: Agent[TContext],
|
|
1071
|
+
calls: list[ToolRunShellCall],
|
|
1072
|
+
context_wrapper: RunContextWrapper[TContext],
|
|
1073
|
+
hooks: RunHooks[TContext],
|
|
1074
|
+
config: RunConfig,
|
|
1075
|
+
) -> list[RunItem]:
|
|
1076
|
+
results: list[RunItem] = []
|
|
1077
|
+
for call in calls:
|
|
1078
|
+
results.append(
|
|
1079
|
+
await ShellAction.execute(
|
|
1080
|
+
agent=agent,
|
|
1081
|
+
call=call,
|
|
1082
|
+
hooks=hooks,
|
|
1083
|
+
context_wrapper=context_wrapper,
|
|
1084
|
+
config=config,
|
|
1085
|
+
)
|
|
1086
|
+
)
|
|
1087
|
+
return results
|
|
1088
|
+
|
|
1089
|
+
@classmethod
|
|
1090
|
+
async def execute_apply_patch_calls(
|
|
1091
|
+
cls,
|
|
1092
|
+
*,
|
|
1093
|
+
agent: Agent[TContext],
|
|
1094
|
+
calls: list[ToolRunApplyPatchCall],
|
|
1095
|
+
context_wrapper: RunContextWrapper[TContext],
|
|
1096
|
+
hooks: RunHooks[TContext],
|
|
1097
|
+
config: RunConfig,
|
|
1098
|
+
) -> list[RunItem]:
|
|
1099
|
+
results: list[RunItem] = []
|
|
1100
|
+
for call in calls:
|
|
1101
|
+
results.append(
|
|
1102
|
+
await ApplyPatchAction.execute(
|
|
1103
|
+
agent=agent,
|
|
1104
|
+
call=call,
|
|
1105
|
+
hooks=hooks,
|
|
1106
|
+
context_wrapper=context_wrapper,
|
|
1107
|
+
config=config,
|
|
1108
|
+
)
|
|
1109
|
+
)
|
|
1110
|
+
return results
|
|
1111
|
+
|
|
636
1112
|
@classmethod
|
|
637
1113
|
async def execute_computer_actions(
|
|
638
1114
|
cls,
|
|
@@ -766,8 +1242,14 @@ class RunImpl:
|
|
|
766
1242
|
input_filter = handoff.input_filter or (
|
|
767
1243
|
run_config.handoff_input_filter if run_config else None
|
|
768
1244
|
)
|
|
769
|
-
|
|
770
|
-
|
|
1245
|
+
handoff_nest_setting = handoff.nest_handoff_history
|
|
1246
|
+
should_nest_history = (
|
|
1247
|
+
handoff_nest_setting
|
|
1248
|
+
if handoff_nest_setting is not None
|
|
1249
|
+
else run_config.nest_handoff_history
|
|
1250
|
+
)
|
|
1251
|
+
handoff_input_data: HandoffInputData | None = None
|
|
1252
|
+
if input_filter or should_nest_history:
|
|
771
1253
|
handoff_input_data = HandoffInputData(
|
|
772
1254
|
input_history=tuple(original_input)
|
|
773
1255
|
if isinstance(original_input, list)
|
|
@@ -776,6 +1258,17 @@ class RunImpl:
|
|
|
776
1258
|
new_items=tuple(new_step_items),
|
|
777
1259
|
run_context=context_wrapper,
|
|
778
1260
|
)
|
|
1261
|
+
|
|
1262
|
+
if input_filter and handoff_input_data is not None:
|
|
1263
|
+
filter_name = getattr(input_filter, "__qualname__", repr(input_filter))
|
|
1264
|
+
from_agent = getattr(agent, "name", agent.__class__.__name__)
|
|
1265
|
+
to_agent = getattr(new_agent, "name", new_agent.__class__.__name__)
|
|
1266
|
+
logger.debug(
|
|
1267
|
+
"Filtering handoff inputs with %s for %s -> %s",
|
|
1268
|
+
filter_name,
|
|
1269
|
+
from_agent,
|
|
1270
|
+
to_agent,
|
|
1271
|
+
)
|
|
779
1272
|
if not callable(input_filter):
|
|
780
1273
|
_error_tracing.attach_error_to_span(
|
|
781
1274
|
span_handoff,
|
|
@@ -805,6 +1298,33 @@ class RunImpl:
|
|
|
805
1298
|
)
|
|
806
1299
|
pre_step_items = list(filtered.pre_handoff_items)
|
|
807
1300
|
new_step_items = list(filtered.new_items)
|
|
1301
|
+
# For custom input filters, use input_items if available, otherwise new_items
|
|
1302
|
+
if filtered.input_items is not None:
|
|
1303
|
+
session_step_items = list(filtered.new_items)
|
|
1304
|
+
new_step_items = list(filtered.input_items)
|
|
1305
|
+
else:
|
|
1306
|
+
session_step_items = None
|
|
1307
|
+
elif should_nest_history and handoff_input_data is not None:
|
|
1308
|
+
nested = nest_handoff_history(
|
|
1309
|
+
handoff_input_data,
|
|
1310
|
+
history_mapper=run_config.handoff_history_mapper,
|
|
1311
|
+
)
|
|
1312
|
+
original_input = (
|
|
1313
|
+
nested.input_history
|
|
1314
|
+
if isinstance(nested.input_history, str)
|
|
1315
|
+
else list(nested.input_history)
|
|
1316
|
+
)
|
|
1317
|
+
pre_step_items = list(nested.pre_handoff_items)
|
|
1318
|
+
# Keep full new_items for session history.
|
|
1319
|
+
session_step_items = list(nested.new_items)
|
|
1320
|
+
# Use input_items (filtered) for model input if available.
|
|
1321
|
+
if nested.input_items is not None:
|
|
1322
|
+
new_step_items = list(nested.input_items)
|
|
1323
|
+
else:
|
|
1324
|
+
new_step_items = session_step_items
|
|
1325
|
+
else:
|
|
1326
|
+
# No filtering or nesting - session_step_items not needed
|
|
1327
|
+
session_step_items = None
|
|
808
1328
|
|
|
809
1329
|
return SingleStepResult(
|
|
810
1330
|
original_input=original_input,
|
|
@@ -812,6 +1332,9 @@ class RunImpl:
|
|
|
812
1332
|
pre_step_items=pre_step_items,
|
|
813
1333
|
new_step_items=new_step_items,
|
|
814
1334
|
next_step=NextStepHandoff(new_agent),
|
|
1335
|
+
tool_input_guardrail_results=[],
|
|
1336
|
+
tool_output_guardrail_results=[],
|
|
1337
|
+
session_step_items=session_step_items,
|
|
815
1338
|
)
|
|
816
1339
|
|
|
817
1340
|
@classmethod
|
|
@@ -860,9 +1383,13 @@ class RunImpl:
|
|
|
860
1383
|
final_output: Any,
|
|
861
1384
|
hooks: RunHooks[TContext],
|
|
862
1385
|
context_wrapper: RunContextWrapper[TContext],
|
|
1386
|
+
tool_input_guardrail_results: list[ToolInputGuardrailResult],
|
|
1387
|
+
tool_output_guardrail_results: list[ToolOutputGuardrailResult],
|
|
863
1388
|
) -> SingleStepResult:
|
|
864
1389
|
# Run the on_end hooks
|
|
865
|
-
await cls.run_final_output_hooks(
|
|
1390
|
+
await cls.run_final_output_hooks(
|
|
1391
|
+
agent, hooks, context_wrapper, original_input, final_output
|
|
1392
|
+
)
|
|
866
1393
|
|
|
867
1394
|
return SingleStepResult(
|
|
868
1395
|
original_input=original_input,
|
|
@@ -870,6 +1397,8 @@ class RunImpl:
|
|
|
870
1397
|
pre_step_items=pre_step_items,
|
|
871
1398
|
new_step_items=new_step_items,
|
|
872
1399
|
next_step=NextStepFinalOutput(final_output),
|
|
1400
|
+
tool_input_guardrail_results=tool_input_guardrail_results,
|
|
1401
|
+
tool_output_guardrail_results=tool_output_guardrail_results,
|
|
873
1402
|
)
|
|
874
1403
|
|
|
875
1404
|
@classmethod
|
|
@@ -878,11 +1407,17 @@ class RunImpl:
|
|
|
878
1407
|
agent: Agent[TContext],
|
|
879
1408
|
hooks: RunHooks[TContext],
|
|
880
1409
|
context_wrapper: RunContextWrapper[TContext],
|
|
1410
|
+
original_input: str | list[TResponseInputItem],
|
|
881
1411
|
final_output: Any,
|
|
882
1412
|
):
|
|
1413
|
+
agent_hook_context = AgentHookContext(
|
|
1414
|
+
context=context_wrapper.context,
|
|
1415
|
+
usage=context_wrapper.usage,
|
|
1416
|
+
turn_input=ItemHelpers.input_to_new_input_list(original_input),
|
|
1417
|
+
)
|
|
883
1418
|
await asyncio.gather(
|
|
884
|
-
hooks.on_agent_end(
|
|
885
|
-
agent.hooks.on_end(
|
|
1419
|
+
hooks.on_agent_end(agent_hook_context, agent, final_output),
|
|
1420
|
+
agent.hooks.on_end(agent_hook_context, agent, final_output)
|
|
886
1421
|
if agent.hooks
|
|
887
1422
|
else _coro.noop_coroutine(),
|
|
888
1423
|
)
|
|
@@ -934,6 +1469,8 @@ class RunImpl:
|
|
|
934
1469
|
event = RunItemStreamEvent(item=item, name="reasoning_item_created")
|
|
935
1470
|
elif isinstance(item, MCPApprovalRequestItem):
|
|
936
1471
|
event = RunItemStreamEvent(item=item, name="mcp_approval_requested")
|
|
1472
|
+
elif isinstance(item, MCPApprovalResponseItem):
|
|
1473
|
+
event = RunItemStreamEvent(item=item, name="mcp_approval_response")
|
|
937
1474
|
elif isinstance(item, MCPListToolsItem):
|
|
938
1475
|
event = RunItemStreamEvent(item=item, name="mcp_list_tools")
|
|
939
1476
|
|
|
@@ -961,7 +1498,10 @@ class RunImpl:
|
|
|
961
1498
|
context_wrapper: RunContextWrapper[TContext],
|
|
962
1499
|
config: RunConfig,
|
|
963
1500
|
) -> ToolsToFinalOutputResult:
|
|
964
|
-
"""
|
|
1501
|
+
"""Determine if tool results should produce a final output.
|
|
1502
|
+
Returns:
|
|
1503
|
+
ToolsToFinalOutputResult: Indicates whether final output is ready, and the output value.
|
|
1504
|
+
"""
|
|
965
1505
|
if not tool_results:
|
|
966
1506
|
return _NOT_FINAL_OUTPUT
|
|
967
1507
|
|
|
@@ -1004,6 +1544,7 @@ class TraceCtxManager:
|
|
|
1004
1544
|
group_id: str | None,
|
|
1005
1545
|
metadata: dict[str, Any] | None,
|
|
1006
1546
|
disabled: bool,
|
|
1547
|
+
tracing: TracingConfig | None = None,
|
|
1007
1548
|
):
|
|
1008
1549
|
self.trace: Trace | None = None
|
|
1009
1550
|
self.workflow_name = workflow_name
|
|
@@ -1011,6 +1552,7 @@ class TraceCtxManager:
|
|
|
1011
1552
|
self.group_id = group_id
|
|
1012
1553
|
self.metadata = metadata
|
|
1013
1554
|
self.disabled = disabled
|
|
1555
|
+
self.tracing = tracing
|
|
1014
1556
|
|
|
1015
1557
|
def __enter__(self) -> TraceCtxManager:
|
|
1016
1558
|
current_trace = get_current_trace()
|
|
@@ -1020,6 +1562,7 @@ class TraceCtxManager:
|
|
|
1020
1562
|
trace_id=self.trace_id,
|
|
1021
1563
|
group_id=self.group_id,
|
|
1022
1564
|
metadata=self.metadata,
|
|
1565
|
+
tracing=self.tracing,
|
|
1023
1566
|
disabled=self.disabled,
|
|
1024
1567
|
)
|
|
1025
1568
|
self.trace.start(mark_as_current=True)
|
|
@@ -1043,10 +1586,11 @@ class ComputerAction:
|
|
|
1043
1586
|
config: RunConfig,
|
|
1044
1587
|
acknowledged_safety_checks: list[ComputerCallOutputAcknowledgedSafetyCheck] | None = None,
|
|
1045
1588
|
) -> RunItem:
|
|
1589
|
+
computer = await resolve_computer(tool=action.computer_tool, run_context=context_wrapper)
|
|
1046
1590
|
output_func = (
|
|
1047
|
-
cls._get_screenshot_async(
|
|
1048
|
-
if isinstance(
|
|
1049
|
-
else cls._get_screenshot_sync(
|
|
1591
|
+
cls._get_screenshot_async(computer, action.tool_call)
|
|
1592
|
+
if isinstance(computer, AsyncComputer)
|
|
1593
|
+
else cls._get_screenshot_sync(computer, action.tool_call)
|
|
1050
1594
|
)
|
|
1051
1595
|
|
|
1052
1596
|
_, _, output = await asyncio.gather(
|
|
@@ -1180,13 +1724,576 @@ class LocalShellAction:
|
|
|
1180
1724
|
),
|
|
1181
1725
|
)
|
|
1182
1726
|
|
|
1727
|
+
raw_payload: dict[str, Any] = {
|
|
1728
|
+
"type": "local_shell_call_output",
|
|
1729
|
+
"call_id": call.tool_call.call_id,
|
|
1730
|
+
"output": result,
|
|
1731
|
+
}
|
|
1732
|
+
return ToolCallOutputItem(
|
|
1733
|
+
agent=agent,
|
|
1734
|
+
output=result,
|
|
1735
|
+
raw_item=raw_payload,
|
|
1736
|
+
)
|
|
1737
|
+
|
|
1738
|
+
|
|
1739
|
+
class ShellAction:
|
|
1740
|
+
@classmethod
|
|
1741
|
+
async def execute(
|
|
1742
|
+
cls,
|
|
1743
|
+
*,
|
|
1744
|
+
agent: Agent[TContext],
|
|
1745
|
+
call: ToolRunShellCall,
|
|
1746
|
+
hooks: RunHooks[TContext],
|
|
1747
|
+
context_wrapper: RunContextWrapper[TContext],
|
|
1748
|
+
config: RunConfig,
|
|
1749
|
+
) -> RunItem:
|
|
1750
|
+
await asyncio.gather(
|
|
1751
|
+
hooks.on_tool_start(context_wrapper, agent, call.shell_tool),
|
|
1752
|
+
(
|
|
1753
|
+
agent.hooks.on_tool_start(context_wrapper, agent, call.shell_tool)
|
|
1754
|
+
if agent.hooks
|
|
1755
|
+
else _coro.noop_coroutine()
|
|
1756
|
+
),
|
|
1757
|
+
)
|
|
1758
|
+
|
|
1759
|
+
shell_call = _coerce_shell_call(call.tool_call)
|
|
1760
|
+
request = ShellCommandRequest(ctx_wrapper=context_wrapper, data=shell_call)
|
|
1761
|
+
status: Literal["completed", "failed"] = "completed"
|
|
1762
|
+
output_text = ""
|
|
1763
|
+
shell_output_payload: list[dict[str, Any]] | None = None
|
|
1764
|
+
provider_meta: dict[str, Any] | None = None
|
|
1765
|
+
max_output_length: int | None = None
|
|
1766
|
+
requested_max_output_length = _normalize_max_output_length(
|
|
1767
|
+
shell_call.action.max_output_length
|
|
1768
|
+
)
|
|
1769
|
+
|
|
1770
|
+
try:
|
|
1771
|
+
executor_result = call.shell_tool.executor(request)
|
|
1772
|
+
result = (
|
|
1773
|
+
await executor_result if inspect.isawaitable(executor_result) else executor_result
|
|
1774
|
+
)
|
|
1775
|
+
|
|
1776
|
+
if isinstance(result, ShellResult):
|
|
1777
|
+
normalized = [_normalize_shell_output(entry) for entry in result.output]
|
|
1778
|
+
result_max_output_length = _normalize_max_output_length(result.max_output_length)
|
|
1779
|
+
if result_max_output_length is None:
|
|
1780
|
+
max_output_length = requested_max_output_length
|
|
1781
|
+
elif requested_max_output_length is None:
|
|
1782
|
+
max_output_length = result_max_output_length
|
|
1783
|
+
else:
|
|
1784
|
+
max_output_length = min(result_max_output_length, requested_max_output_length)
|
|
1785
|
+
if max_output_length is not None:
|
|
1786
|
+
normalized = _truncate_shell_outputs(normalized, max_output_length)
|
|
1787
|
+
output_text = _render_shell_outputs(normalized)
|
|
1788
|
+
if max_output_length is not None:
|
|
1789
|
+
output_text = output_text[:max_output_length]
|
|
1790
|
+
shell_output_payload = [_serialize_shell_output(entry) for entry in normalized]
|
|
1791
|
+
provider_meta = dict(result.provider_data or {})
|
|
1792
|
+
else:
|
|
1793
|
+
output_text = str(result)
|
|
1794
|
+
if requested_max_output_length is not None:
|
|
1795
|
+
max_output_length = requested_max_output_length
|
|
1796
|
+
output_text = output_text[:max_output_length]
|
|
1797
|
+
except Exception as exc:
|
|
1798
|
+
status = "failed"
|
|
1799
|
+
output_text = _format_shell_error(exc)
|
|
1800
|
+
if requested_max_output_length is not None:
|
|
1801
|
+
max_output_length = requested_max_output_length
|
|
1802
|
+
output_text = output_text[:max_output_length]
|
|
1803
|
+
logger.error("Shell executor failed: %s", exc, exc_info=True)
|
|
1804
|
+
|
|
1805
|
+
await asyncio.gather(
|
|
1806
|
+
hooks.on_tool_end(context_wrapper, agent, call.shell_tool, output_text),
|
|
1807
|
+
(
|
|
1808
|
+
agent.hooks.on_tool_end(context_wrapper, agent, call.shell_tool, output_text)
|
|
1809
|
+
if agent.hooks
|
|
1810
|
+
else _coro.noop_coroutine()
|
|
1811
|
+
),
|
|
1812
|
+
)
|
|
1813
|
+
|
|
1814
|
+
raw_entries: list[dict[str, Any]] | None = None
|
|
1815
|
+
if shell_output_payload:
|
|
1816
|
+
raw_entries = shell_output_payload
|
|
1817
|
+
elif output_text:
|
|
1818
|
+
raw_entries = [
|
|
1819
|
+
{
|
|
1820
|
+
"stdout": output_text,
|
|
1821
|
+
"stderr": "",
|
|
1822
|
+
"status": status,
|
|
1823
|
+
"outcome": "success" if status == "completed" else "failure",
|
|
1824
|
+
}
|
|
1825
|
+
]
|
|
1826
|
+
|
|
1827
|
+
structured_output: list[dict[str, Any]] = []
|
|
1828
|
+
if raw_entries:
|
|
1829
|
+
for entry in raw_entries:
|
|
1830
|
+
sanitized = dict(entry)
|
|
1831
|
+
status_value = sanitized.pop("status", None)
|
|
1832
|
+
sanitized.pop("provider_data", None)
|
|
1833
|
+
raw_exit_code = sanitized.pop("exit_code", None)
|
|
1834
|
+
sanitized.pop("command", None)
|
|
1835
|
+
outcome_value = sanitized.get("outcome")
|
|
1836
|
+
if isinstance(outcome_value, str):
|
|
1837
|
+
resolved_type = "exit"
|
|
1838
|
+
if status_value == "timeout":
|
|
1839
|
+
resolved_type = "timeout"
|
|
1840
|
+
outcome_payload: dict[str, Any] = {"type": resolved_type}
|
|
1841
|
+
if resolved_type == "exit":
|
|
1842
|
+
outcome_payload["exit_code"] = _resolve_exit_code(
|
|
1843
|
+
raw_exit_code, outcome_value
|
|
1844
|
+
)
|
|
1845
|
+
sanitized["outcome"] = outcome_payload
|
|
1846
|
+
elif isinstance(outcome_value, Mapping):
|
|
1847
|
+
outcome_payload = dict(outcome_value)
|
|
1848
|
+
outcome_status = cast(Optional[str], outcome_payload.pop("status", None))
|
|
1849
|
+
outcome_type = outcome_payload.get("type")
|
|
1850
|
+
if outcome_type != "timeout":
|
|
1851
|
+
outcome_payload.setdefault(
|
|
1852
|
+
"exit_code",
|
|
1853
|
+
_resolve_exit_code(
|
|
1854
|
+
raw_exit_code,
|
|
1855
|
+
outcome_status if isinstance(outcome_status, str) else None,
|
|
1856
|
+
),
|
|
1857
|
+
)
|
|
1858
|
+
sanitized["outcome"] = outcome_payload
|
|
1859
|
+
structured_output.append(sanitized)
|
|
1860
|
+
|
|
1861
|
+
raw_item: dict[str, Any] = {
|
|
1862
|
+
"type": "shell_call_output",
|
|
1863
|
+
"call_id": shell_call.call_id,
|
|
1864
|
+
"output": structured_output,
|
|
1865
|
+
"status": status,
|
|
1866
|
+
}
|
|
1867
|
+
if max_output_length is not None:
|
|
1868
|
+
raw_item["max_output_length"] = max_output_length
|
|
1869
|
+
if raw_entries:
|
|
1870
|
+
raw_item["shell_output"] = raw_entries
|
|
1871
|
+
if provider_meta:
|
|
1872
|
+
raw_item["provider_data"] = provider_meta
|
|
1873
|
+
|
|
1874
|
+
return ToolCallOutputItem(
|
|
1875
|
+
agent=agent,
|
|
1876
|
+
output=output_text,
|
|
1877
|
+
raw_item=cast(Any, raw_item),
|
|
1878
|
+
)
|
|
1879
|
+
|
|
1880
|
+
|
|
1881
|
+
class ApplyPatchAction:
|
|
1882
|
+
@classmethod
|
|
1883
|
+
async def execute(
|
|
1884
|
+
cls,
|
|
1885
|
+
*,
|
|
1886
|
+
agent: Agent[TContext],
|
|
1887
|
+
call: ToolRunApplyPatchCall,
|
|
1888
|
+
hooks: RunHooks[TContext],
|
|
1889
|
+
context_wrapper: RunContextWrapper[TContext],
|
|
1890
|
+
config: RunConfig,
|
|
1891
|
+
) -> RunItem:
|
|
1892
|
+
apply_patch_tool = call.apply_patch_tool
|
|
1893
|
+
await asyncio.gather(
|
|
1894
|
+
hooks.on_tool_start(context_wrapper, agent, apply_patch_tool),
|
|
1895
|
+
(
|
|
1896
|
+
agent.hooks.on_tool_start(context_wrapper, agent, apply_patch_tool)
|
|
1897
|
+
if agent.hooks
|
|
1898
|
+
else _coro.noop_coroutine()
|
|
1899
|
+
),
|
|
1900
|
+
)
|
|
1901
|
+
|
|
1902
|
+
status: Literal["completed", "failed"] = "completed"
|
|
1903
|
+
output_text = ""
|
|
1904
|
+
|
|
1905
|
+
try:
|
|
1906
|
+
operation = _coerce_apply_patch_operation(
|
|
1907
|
+
call.tool_call,
|
|
1908
|
+
context_wrapper=context_wrapper,
|
|
1909
|
+
)
|
|
1910
|
+
editor = apply_patch_tool.editor
|
|
1911
|
+
if operation.type == "create_file":
|
|
1912
|
+
result = editor.create_file(operation)
|
|
1913
|
+
elif operation.type == "update_file":
|
|
1914
|
+
result = editor.update_file(operation)
|
|
1915
|
+
elif operation.type == "delete_file":
|
|
1916
|
+
result = editor.delete_file(operation)
|
|
1917
|
+
else: # pragma: no cover - validated in _coerce_apply_patch_operation
|
|
1918
|
+
raise ModelBehaviorError(f"Unsupported apply_patch operation: {operation.type}")
|
|
1919
|
+
|
|
1920
|
+
awaited = await result if inspect.isawaitable(result) else result
|
|
1921
|
+
normalized = _normalize_apply_patch_result(awaited)
|
|
1922
|
+
if normalized:
|
|
1923
|
+
if normalized.status in {"completed", "failed"}:
|
|
1924
|
+
status = normalized.status
|
|
1925
|
+
if normalized.output:
|
|
1926
|
+
output_text = normalized.output
|
|
1927
|
+
except Exception as exc:
|
|
1928
|
+
status = "failed"
|
|
1929
|
+
output_text = _format_shell_error(exc)
|
|
1930
|
+
logger.error("Apply patch editor failed: %s", exc, exc_info=True)
|
|
1931
|
+
|
|
1932
|
+
await asyncio.gather(
|
|
1933
|
+
hooks.on_tool_end(context_wrapper, agent, apply_patch_tool, output_text),
|
|
1934
|
+
(
|
|
1935
|
+
agent.hooks.on_tool_end(context_wrapper, agent, apply_patch_tool, output_text)
|
|
1936
|
+
if agent.hooks
|
|
1937
|
+
else _coro.noop_coroutine()
|
|
1938
|
+
),
|
|
1939
|
+
)
|
|
1940
|
+
|
|
1941
|
+
raw_item: dict[str, Any] = {
|
|
1942
|
+
"type": "apply_patch_call_output",
|
|
1943
|
+
"call_id": _extract_apply_patch_call_id(call.tool_call),
|
|
1944
|
+
"status": status,
|
|
1945
|
+
}
|
|
1946
|
+
if output_text:
|
|
1947
|
+
raw_item["output"] = output_text
|
|
1948
|
+
|
|
1183
1949
|
return ToolCallOutputItem(
|
|
1184
1950
|
agent=agent,
|
|
1185
|
-
output=
|
|
1186
|
-
raw_item=
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1951
|
+
output=output_text,
|
|
1952
|
+
raw_item=cast(Any, raw_item),
|
|
1953
|
+
)
|
|
1954
|
+
|
|
1955
|
+
|
|
1956
|
+
def _normalize_shell_output(entry: ShellCommandOutput | Mapping[str, Any]) -> ShellCommandOutput:
|
|
1957
|
+
if isinstance(entry, ShellCommandOutput):
|
|
1958
|
+
return entry
|
|
1959
|
+
|
|
1960
|
+
stdout = str(entry.get("stdout", "") or "")
|
|
1961
|
+
stderr = str(entry.get("stderr", "") or "")
|
|
1962
|
+
command_value = entry.get("command")
|
|
1963
|
+
provider_data_value = entry.get("provider_data")
|
|
1964
|
+
outcome_value = entry.get("outcome")
|
|
1965
|
+
|
|
1966
|
+
outcome_type: Literal["exit", "timeout"] = "exit"
|
|
1967
|
+
exit_code_value: Any | None = None
|
|
1968
|
+
|
|
1969
|
+
if isinstance(outcome_value, Mapping):
|
|
1970
|
+
type_value = outcome_value.get("type")
|
|
1971
|
+
if type_value == "timeout":
|
|
1972
|
+
outcome_type = "timeout"
|
|
1973
|
+
elif isinstance(type_value, str):
|
|
1974
|
+
outcome_type = "exit"
|
|
1975
|
+
exit_code_value = outcome_value.get("exit_code") or outcome_value.get("exitCode")
|
|
1976
|
+
else:
|
|
1977
|
+
status_str = str(entry.get("status", "completed") or "completed").lower()
|
|
1978
|
+
if status_str == "timeout":
|
|
1979
|
+
outcome_type = "timeout"
|
|
1980
|
+
if isinstance(outcome_value, str):
|
|
1981
|
+
if outcome_value == "failure":
|
|
1982
|
+
exit_code_value = 1
|
|
1983
|
+
elif outcome_value == "success":
|
|
1984
|
+
exit_code_value = 0
|
|
1985
|
+
exit_code_value = exit_code_value or entry.get("exit_code") or entry.get("exitCode")
|
|
1986
|
+
|
|
1987
|
+
outcome = ShellCallOutcome(
|
|
1988
|
+
type=outcome_type,
|
|
1989
|
+
exit_code=_normalize_exit_code(exit_code_value),
|
|
1990
|
+
)
|
|
1991
|
+
|
|
1992
|
+
return ShellCommandOutput(
|
|
1993
|
+
stdout=stdout,
|
|
1994
|
+
stderr=stderr,
|
|
1995
|
+
outcome=outcome,
|
|
1996
|
+
command=str(command_value) if command_value is not None else None,
|
|
1997
|
+
provider_data=cast(dict[str, Any], provider_data_value)
|
|
1998
|
+
if isinstance(provider_data_value, Mapping)
|
|
1999
|
+
else provider_data_value,
|
|
2000
|
+
)
|
|
2001
|
+
|
|
2002
|
+
|
|
2003
|
+
def _serialize_shell_output(output: ShellCommandOutput) -> dict[str, Any]:
|
|
2004
|
+
payload: dict[str, Any] = {
|
|
2005
|
+
"stdout": output.stdout,
|
|
2006
|
+
"stderr": output.stderr,
|
|
2007
|
+
"status": output.status,
|
|
2008
|
+
"outcome": {"type": output.outcome.type},
|
|
2009
|
+
}
|
|
2010
|
+
if output.outcome.type == "exit":
|
|
2011
|
+
payload["outcome"]["exit_code"] = output.outcome.exit_code
|
|
2012
|
+
if output.outcome.exit_code is not None:
|
|
2013
|
+
payload["exit_code"] = output.outcome.exit_code
|
|
2014
|
+
if output.command is not None:
|
|
2015
|
+
payload["command"] = output.command
|
|
2016
|
+
if output.provider_data:
|
|
2017
|
+
payload["provider_data"] = output.provider_data
|
|
2018
|
+
return payload
|
|
2019
|
+
|
|
2020
|
+
|
|
2021
|
+
def _resolve_exit_code(raw_exit_code: Any, outcome_status: str | None) -> int:
|
|
2022
|
+
normalized = _normalize_exit_code(raw_exit_code)
|
|
2023
|
+
if normalized is not None:
|
|
2024
|
+
return normalized
|
|
2025
|
+
|
|
2026
|
+
normalized_status = (outcome_status or "").lower()
|
|
2027
|
+
if normalized_status == "success":
|
|
2028
|
+
return 0
|
|
2029
|
+
if normalized_status == "failure":
|
|
2030
|
+
return 1
|
|
2031
|
+
return 0
|
|
2032
|
+
|
|
2033
|
+
|
|
2034
|
+
def _normalize_exit_code(value: Any) -> int | None:
|
|
2035
|
+
if value is None:
|
|
2036
|
+
return None
|
|
2037
|
+
try:
|
|
2038
|
+
return int(value)
|
|
2039
|
+
except (TypeError, ValueError):
|
|
2040
|
+
return None
|
|
2041
|
+
|
|
2042
|
+
|
|
2043
|
+
def _render_shell_outputs(outputs: Sequence[ShellCommandOutput]) -> str:
|
|
2044
|
+
if not outputs:
|
|
2045
|
+
return "(no output)"
|
|
2046
|
+
|
|
2047
|
+
rendered_chunks: list[str] = []
|
|
2048
|
+
for result in outputs:
|
|
2049
|
+
chunk_lines: list[str] = []
|
|
2050
|
+
if result.command:
|
|
2051
|
+
chunk_lines.append(f"$ {result.command}")
|
|
2052
|
+
|
|
2053
|
+
stdout = result.stdout.rstrip("\n")
|
|
2054
|
+
stderr = result.stderr.rstrip("\n")
|
|
2055
|
+
|
|
2056
|
+
if stdout:
|
|
2057
|
+
chunk_lines.append(stdout)
|
|
2058
|
+
if stderr:
|
|
2059
|
+
if stdout:
|
|
2060
|
+
chunk_lines.append("")
|
|
2061
|
+
chunk_lines.append("stderr:")
|
|
2062
|
+
chunk_lines.append(stderr)
|
|
2063
|
+
|
|
2064
|
+
if result.exit_code not in (None, 0):
|
|
2065
|
+
chunk_lines.append(f"exit code: {result.exit_code}")
|
|
2066
|
+
if result.status == "timeout":
|
|
2067
|
+
chunk_lines.append("status: timeout")
|
|
2068
|
+
|
|
2069
|
+
chunk = "\n".join(chunk_lines).strip()
|
|
2070
|
+
rendered_chunks.append(chunk if chunk else "(no output)")
|
|
2071
|
+
|
|
2072
|
+
return "\n\n".join(rendered_chunks)
|
|
2073
|
+
|
|
2074
|
+
|
|
2075
|
+
def _truncate_shell_outputs(
|
|
2076
|
+
outputs: Sequence[ShellCommandOutput], max_length: int
|
|
2077
|
+
) -> list[ShellCommandOutput]:
|
|
2078
|
+
if max_length <= 0:
|
|
2079
|
+
return [
|
|
2080
|
+
ShellCommandOutput(
|
|
2081
|
+
stdout="",
|
|
2082
|
+
stderr="",
|
|
2083
|
+
outcome=output.outcome,
|
|
2084
|
+
command=output.command,
|
|
2085
|
+
provider_data=output.provider_data,
|
|
2086
|
+
)
|
|
2087
|
+
for output in outputs
|
|
2088
|
+
]
|
|
2089
|
+
|
|
2090
|
+
remaining = max_length
|
|
2091
|
+
truncated: list[ShellCommandOutput] = []
|
|
2092
|
+
for output in outputs:
|
|
2093
|
+
stdout = ""
|
|
2094
|
+
stderr = ""
|
|
2095
|
+
if remaining > 0 and output.stdout:
|
|
2096
|
+
stdout = output.stdout[:remaining]
|
|
2097
|
+
remaining -= len(stdout)
|
|
2098
|
+
if remaining > 0 and output.stderr:
|
|
2099
|
+
stderr = output.stderr[:remaining]
|
|
2100
|
+
remaining -= len(stderr)
|
|
2101
|
+
truncated.append(
|
|
2102
|
+
ShellCommandOutput(
|
|
2103
|
+
stdout=stdout,
|
|
2104
|
+
stderr=stderr,
|
|
2105
|
+
outcome=output.outcome,
|
|
2106
|
+
command=output.command,
|
|
2107
|
+
provider_data=output.provider_data,
|
|
2108
|
+
)
|
|
1192
2109
|
)
|
|
2110
|
+
|
|
2111
|
+
return truncated
|
|
2112
|
+
|
|
2113
|
+
|
|
2114
|
+
def _normalize_max_output_length(value: int | None) -> int | None:
|
|
2115
|
+
if value is None:
|
|
2116
|
+
return None
|
|
2117
|
+
return max(0, value)
|
|
2118
|
+
|
|
2119
|
+
|
|
2120
|
+
def _format_shell_error(error: Exception | BaseException | Any) -> str:
|
|
2121
|
+
if isinstance(error, Exception):
|
|
2122
|
+
message = str(error)
|
|
2123
|
+
return message or error.__class__.__name__
|
|
2124
|
+
try:
|
|
2125
|
+
return str(error)
|
|
2126
|
+
except Exception: # pragma: no cover - fallback only
|
|
2127
|
+
return repr(error)
|
|
2128
|
+
|
|
2129
|
+
|
|
2130
|
+
def _get_mapping_or_attr(target: Any, key: str) -> Any:
|
|
2131
|
+
if isinstance(target, Mapping):
|
|
2132
|
+
return target.get(key)
|
|
2133
|
+
return getattr(target, key, None)
|
|
2134
|
+
|
|
2135
|
+
|
|
2136
|
+
def _extract_shell_call_id(tool_call: Any) -> str:
|
|
2137
|
+
value = _get_mapping_or_attr(tool_call, "call_id")
|
|
2138
|
+
if not value:
|
|
2139
|
+
value = _get_mapping_or_attr(tool_call, "callId")
|
|
2140
|
+
if not value:
|
|
2141
|
+
raise ModelBehaviorError("Shell call is missing call_id.")
|
|
2142
|
+
return str(value)
|
|
2143
|
+
|
|
2144
|
+
|
|
2145
|
+
def _coerce_shell_call(tool_call: Any) -> ShellCallData:
|
|
2146
|
+
call_id = _extract_shell_call_id(tool_call)
|
|
2147
|
+
action_payload = _get_mapping_or_attr(tool_call, "action")
|
|
2148
|
+
if action_payload is None:
|
|
2149
|
+
raise ModelBehaviorError("Shell call is missing an action payload.")
|
|
2150
|
+
|
|
2151
|
+
commands_value = _get_mapping_or_attr(action_payload, "commands")
|
|
2152
|
+
if not isinstance(commands_value, Sequence):
|
|
2153
|
+
raise ModelBehaviorError("Shell call action is missing commands.")
|
|
2154
|
+
commands: list[str] = []
|
|
2155
|
+
for entry in commands_value:
|
|
2156
|
+
if entry is None:
|
|
2157
|
+
continue
|
|
2158
|
+
commands.append(str(entry))
|
|
2159
|
+
if not commands:
|
|
2160
|
+
raise ModelBehaviorError("Shell call action must include at least one command.")
|
|
2161
|
+
|
|
2162
|
+
timeout_value = (
|
|
2163
|
+
_get_mapping_or_attr(action_payload, "timeout_ms")
|
|
2164
|
+
or _get_mapping_or_attr(action_payload, "timeoutMs")
|
|
2165
|
+
or _get_mapping_or_attr(action_payload, "timeout")
|
|
2166
|
+
)
|
|
2167
|
+
timeout_ms = int(timeout_value) if isinstance(timeout_value, (int, float)) else None
|
|
2168
|
+
|
|
2169
|
+
max_length_value = _get_mapping_or_attr(action_payload, "max_output_length")
|
|
2170
|
+
if max_length_value is None:
|
|
2171
|
+
max_length_value = _get_mapping_or_attr(action_payload, "maxOutputLength")
|
|
2172
|
+
max_output_length = (
|
|
2173
|
+
int(max_length_value) if isinstance(max_length_value, (int, float)) else None
|
|
2174
|
+
)
|
|
2175
|
+
|
|
2176
|
+
action = ShellActionRequest(
|
|
2177
|
+
commands=commands,
|
|
2178
|
+
timeout_ms=timeout_ms,
|
|
2179
|
+
max_output_length=max_output_length,
|
|
2180
|
+
)
|
|
2181
|
+
|
|
2182
|
+
status_value = _get_mapping_or_attr(tool_call, "status")
|
|
2183
|
+
status_literal: Literal["in_progress", "completed"] | None = None
|
|
2184
|
+
if isinstance(status_value, str):
|
|
2185
|
+
lowered = status_value.lower()
|
|
2186
|
+
if lowered in {"in_progress", "completed"}:
|
|
2187
|
+
status_literal = cast(Literal["in_progress", "completed"], lowered)
|
|
2188
|
+
|
|
2189
|
+
return ShellCallData(call_id=call_id, action=action, status=status_literal, raw=tool_call)
|
|
2190
|
+
|
|
2191
|
+
|
|
2192
|
+
def _parse_apply_patch_custom_input(input_json: str) -> dict[str, Any]:
|
|
2193
|
+
try:
|
|
2194
|
+
parsed = json.loads(input_json or "{}")
|
|
2195
|
+
except json.JSONDecodeError as exc:
|
|
2196
|
+
raise ModelBehaviorError(f"Invalid apply_patch input JSON: {exc}") from exc
|
|
2197
|
+
if not isinstance(parsed, Mapping):
|
|
2198
|
+
raise ModelBehaviorError("Apply patch input must be a JSON object.")
|
|
2199
|
+
return dict(parsed)
|
|
2200
|
+
|
|
2201
|
+
|
|
2202
|
+
def _parse_apply_patch_function_args(arguments: str) -> dict[str, Any]:
|
|
2203
|
+
try:
|
|
2204
|
+
parsed = json.loads(arguments or "{}")
|
|
2205
|
+
except json.JSONDecodeError as exc:
|
|
2206
|
+
raise ModelBehaviorError(f"Invalid apply_patch arguments JSON: {exc}") from exc
|
|
2207
|
+
if not isinstance(parsed, Mapping):
|
|
2208
|
+
raise ModelBehaviorError("Apply patch arguments must be a JSON object.")
|
|
2209
|
+
return dict(parsed)
|
|
2210
|
+
|
|
2211
|
+
|
|
2212
|
+
def _extract_apply_patch_call_id(tool_call: Any) -> str:
|
|
2213
|
+
value = _get_mapping_or_attr(tool_call, "call_id")
|
|
2214
|
+
if not value:
|
|
2215
|
+
value = _get_mapping_or_attr(tool_call, "callId")
|
|
2216
|
+
if not value:
|
|
2217
|
+
raise ModelBehaviorError("Apply patch call is missing call_id.")
|
|
2218
|
+
return str(value)
|
|
2219
|
+
|
|
2220
|
+
|
|
2221
|
+
def _coerce_apply_patch_operation(
|
|
2222
|
+
tool_call: Any, *, context_wrapper: RunContextWrapper[Any]
|
|
2223
|
+
) -> ApplyPatchOperation:
|
|
2224
|
+
raw_operation = _get_mapping_or_attr(tool_call, "operation")
|
|
2225
|
+
if raw_operation is None:
|
|
2226
|
+
raise ModelBehaviorError("Apply patch call is missing an operation payload.")
|
|
2227
|
+
|
|
2228
|
+
op_type_value = str(_get_mapping_or_attr(raw_operation, "type"))
|
|
2229
|
+
if op_type_value not in {"create_file", "update_file", "delete_file"}:
|
|
2230
|
+
raise ModelBehaviorError(f"Unknown apply_patch operation: {op_type_value}")
|
|
2231
|
+
op_type_literal = cast(Literal["create_file", "update_file", "delete_file"], op_type_value)
|
|
2232
|
+
|
|
2233
|
+
path = _get_mapping_or_attr(raw_operation, "path")
|
|
2234
|
+
if not isinstance(path, str) or not path:
|
|
2235
|
+
raise ModelBehaviorError("Apply patch operation is missing a valid path.")
|
|
2236
|
+
|
|
2237
|
+
diff_value = _get_mapping_or_attr(raw_operation, "diff")
|
|
2238
|
+
if op_type_literal in {"create_file", "update_file"}:
|
|
2239
|
+
if not isinstance(diff_value, str) or not diff_value:
|
|
2240
|
+
raise ModelBehaviorError(
|
|
2241
|
+
f"Apply patch operation {op_type_literal} is missing the required diff payload."
|
|
2242
|
+
)
|
|
2243
|
+
diff: str | None = diff_value
|
|
2244
|
+
else:
|
|
2245
|
+
diff = None
|
|
2246
|
+
|
|
2247
|
+
return ApplyPatchOperation(
|
|
2248
|
+
type=op_type_literal,
|
|
2249
|
+
path=str(path),
|
|
2250
|
+
diff=diff,
|
|
2251
|
+
ctx_wrapper=context_wrapper,
|
|
2252
|
+
)
|
|
2253
|
+
|
|
2254
|
+
|
|
2255
|
+
def _normalize_apply_patch_result(
|
|
2256
|
+
result: ApplyPatchResult | Mapping[str, Any] | str | None,
|
|
2257
|
+
) -> ApplyPatchResult | None:
|
|
2258
|
+
if result is None:
|
|
2259
|
+
return None
|
|
2260
|
+
if isinstance(result, ApplyPatchResult):
|
|
2261
|
+
return result
|
|
2262
|
+
if isinstance(result, Mapping):
|
|
2263
|
+
status = result.get("status")
|
|
2264
|
+
output = result.get("output")
|
|
2265
|
+
normalized_status = status if status in {"completed", "failed"} else None
|
|
2266
|
+
normalized_output = str(output) if output is not None else None
|
|
2267
|
+
return ApplyPatchResult(status=normalized_status, output=normalized_output)
|
|
2268
|
+
if isinstance(result, str):
|
|
2269
|
+
return ApplyPatchResult(output=result)
|
|
2270
|
+
return ApplyPatchResult(output=str(result))
|
|
2271
|
+
|
|
2272
|
+
|
|
2273
|
+
def _is_apply_patch_name(name: str | None, tool: ApplyPatchTool | None) -> bool:
|
|
2274
|
+
if not name:
|
|
2275
|
+
return False
|
|
2276
|
+
candidate = name.strip().lower()
|
|
2277
|
+
if candidate.startswith("apply_patch"):
|
|
2278
|
+
return True
|
|
2279
|
+
if tool and candidate == tool.name.strip().lower():
|
|
2280
|
+
return True
|
|
2281
|
+
return False
|
|
2282
|
+
|
|
2283
|
+
|
|
2284
|
+
def _build_litellm_json_tool_call(output: ResponseFunctionToolCall) -> FunctionTool:
|
|
2285
|
+
async def on_invoke_tool(_ctx: ToolContext[Any], value: Any) -> Any:
|
|
2286
|
+
if isinstance(value, str):
|
|
2287
|
+
import json
|
|
2288
|
+
|
|
2289
|
+
return json.loads(value)
|
|
2290
|
+
return value
|
|
2291
|
+
|
|
2292
|
+
return FunctionTool(
|
|
2293
|
+
name=output.name,
|
|
2294
|
+
description=output.name,
|
|
2295
|
+
params_json_schema={},
|
|
2296
|
+
on_invoke_tool=on_invoke_tool,
|
|
2297
|
+
strict_json_schema=True,
|
|
2298
|
+
is_enabled=True,
|
|
2299
|
+
)
|