PyPI - openai-agents - Versions diffs - 0.2.8__py3-none-any.whl → 0.6.8__py3-none-any.whl - Mend

openai-agents 0.2.8py3-none-any.whl → 0.6.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (96) hide show

agents/__init__.py +105 -4
agents/_debug.py +15 -4
agents/_run_impl.py +1203 -96
agents/agent.py +164 -19
agents/apply_diff.py +329 -0
agents/editor.py +47 -0
agents/exceptions.py +35 -0
agents/extensions/experimental/__init__.py +6 -0
agents/extensions/experimental/codex/__init__.py +92 -0
agents/extensions/experimental/codex/codex.py +89 -0
agents/extensions/experimental/codex/codex_options.py +35 -0
agents/extensions/experimental/codex/codex_tool.py +1142 -0
agents/extensions/experimental/codex/events.py +162 -0
agents/extensions/experimental/codex/exec.py +263 -0
agents/extensions/experimental/codex/items.py +245 -0
agents/extensions/experimental/codex/output_schema_file.py +50 -0
agents/extensions/experimental/codex/payloads.py +31 -0
agents/extensions/experimental/codex/thread.py +214 -0
agents/extensions/experimental/codex/thread_options.py +54 -0
agents/extensions/experimental/codex/turn_options.py +36 -0
agents/extensions/handoff_filters.py +13 -1
agents/extensions/memory/__init__.py +120 -0
agents/extensions/memory/advanced_sqlite_session.py +1285 -0
agents/extensions/memory/async_sqlite_session.py +239 -0
agents/extensions/memory/dapr_session.py +423 -0
agents/extensions/memory/encrypt_session.py +185 -0
agents/extensions/memory/redis_session.py +261 -0
agents/extensions/memory/sqlalchemy_session.py +334 -0
agents/extensions/models/litellm_model.py +449 -36
agents/extensions/models/litellm_provider.py +3 -1
agents/function_schema.py +47 -5
agents/guardrail.py +16 -2
agents/{handoffs.py → handoffs/__init__.py} +89 -47
agents/handoffs/history.py +268 -0
agents/items.py +237 -11
agents/lifecycle.py +75 -14
agents/mcp/server.py +280 -37
agents/mcp/util.py +24 -3
agents/memory/__init__.py +22 -2
agents/memory/openai_conversations_session.py +91 -0
agents/memory/openai_responses_compaction_session.py +249 -0
agents/memory/session.py +19 -261
agents/memory/sqlite_session.py +275 -0
agents/memory/util.py +20 -0
agents/model_settings.py +14 -3
agents/models/__init__.py +13 -0
agents/models/chatcmpl_converter.py +303 -50
agents/models/chatcmpl_helpers.py +63 -0
agents/models/chatcmpl_stream_handler.py +290 -68
agents/models/default_models.py +58 -0
agents/models/interface.py +4 -0
agents/models/openai_chatcompletions.py +103 -49
agents/models/openai_provider.py +10 -4
agents/models/openai_responses.py +162 -46
agents/realtime/__init__.py +4 -0
agents/realtime/_util.py +14 -3
agents/realtime/agent.py +7 -0
agents/realtime/audio_formats.py +53 -0
agents/realtime/config.py +78 -10
agents/realtime/events.py +18 -0
agents/realtime/handoffs.py +2 -2
agents/realtime/items.py +17 -1
agents/realtime/model.py +13 -0
agents/realtime/model_events.py +12 -0
agents/realtime/model_inputs.py +18 -1
agents/realtime/openai_realtime.py +696 -150
agents/realtime/session.py +243 -23
agents/repl.py +7 -3
agents/result.py +197 -38
agents/run.py +949 -168
agents/run_context.py +13 -2
agents/stream_events.py +1 -0
agents/strict_schema.py +14 -0
agents/tool.py +413 -15
agents/tool_context.py +22 -1
agents/tool_guardrails.py +279 -0
agents/tracing/__init__.py +2 -0
agents/tracing/config.py +9 -0
agents/tracing/create.py +4 -0
agents/tracing/processor_interface.py +84 -11
agents/tracing/processors.py +65 -54
agents/tracing/provider.py +64 -7
agents/tracing/spans.py +105 -0
agents/tracing/traces.py +116 -16
agents/usage.py +134 -12
agents/util/_json.py +19 -1
agents/util/_transforms.py +12 -2
agents/voice/input.py +5 -4
agents/voice/models/openai_stt.py +17 -9
agents/voice/pipeline.py +2 -0
agents/voice/pipeline_config.py +4 -0
{openai_agents-0.2.8.dist-info → openai_agents-0.6.8.dist-info}/METADATA +44 -19
openai_agents-0.6.8.dist-info/RECORD +134 -0
{openai_agents-0.2.8.dist-info → openai_agents-0.6.8.dist-info}/WHEEL +1 -1
openai_agents-0.2.8.dist-info/RECORD +0 -103
{openai_agents-0.2.8.dist-info → openai_agents-0.6.8.dist-info}/licenses/LICENSE +0 -0

agents/run.py CHANGED Viewed

@@ -1,14 +1,21 @@
 from __future__ import annotations
 import asyncio
+import contextlib
 import inspect
+import os
+import warnings
 from dataclasses import dataclass, field
-from typing import Any, Callable, Generic, cast
+from typing import Any, Callable, Generic, cast, get_args, get_origin
-from openai.types.responses import ResponseCompletedEvent
+from openai.types.responses import (
+    ResponseCompletedEvent,
+    ResponseOutputItemDoneEvent,
+)
 from openai.types.responses.response_prompt_param import (
     ResponsePromptParam,
 )
+from openai.types.responses.response_reasoning_item import ResponseReasoningItem
 from typing_extensions import NotRequired, TypedDict, Unpack
 from ._run_impl import (
@@ -39,19 +46,36 @@ from .guardrail import (
     OutputGuardrail,
     OutputGuardrailResult,
 )
-from .handoffs import Handoff, HandoffInputFilter, handoff
-from .items import ItemHelpers, ModelResponse, RunItem, TResponseInputItem
-from .lifecycle import RunHooks
+from .handoffs import Handoff, HandoffHistoryMapper, HandoffInputFilter, handoff
+from .items import (
+    HandoffCallItem,
+    HandoffOutputItem,
+    ItemHelpers,
+    ModelResponse,
+    ReasoningItem,
+    RunItem,
+    ToolCallItem,
+    ToolCallItemTypes,
+    ToolCallOutputItem,
+    TResponseInputItem,
+)
+from .lifecycle import AgentHooksBase, RunHooks, RunHooksBase
 from .logger import logger
-from .memory import Session
+from .memory import Session, SessionInputCallback, is_openai_responses_compaction_aware_session
 from .model_settings import ModelSettings
 from .models.interface import Model, ModelProvider
 from .models.multi_provider import MultiProvider
 from .result import RunResult, RunResultStreaming
-from .run_context import RunContextWrapper, TContext
-from .stream_events import AgentUpdatedStreamEvent, RawResponsesStreamEvent
-from .tool import Tool
-from .tracing import Span, SpanError, agent_span, get_current_trace, trace
+from .run_context import AgentHookContext, RunContextWrapper, TContext
+from .stream_events import (
+    AgentUpdatedStreamEvent,
+    RawResponsesStreamEvent,
+    RunItemStreamEvent,
+    StreamEvent,
+)
+from .tool import Tool, dispose_resolved_computers
+from .tool_guardrails import ToolInputGuardrailResult, ToolOutputGuardrailResult
+from .tracing import Span, SpanError, TracingConfig, agent_span, get_current_trace, trace
 from .tracing.span_data import AgentSpanData
 from .usage import Usage
 from .util import _coro, _error_tracing
@@ -81,6 +105,12 @@ def get_default_agent_runner() -> AgentRunner:
     return DEFAULT_AGENT_RUNNER
+def _default_trace_include_sensitive_data() -> bool:
+    """Returns the default value for trace_include_sensitive_data based on environment variable."""
+    val = os.getenv("OPENAI_AGENTS_TRACE_INCLUDE_SENSITIVE_DATA", "true")
+    return val.strip().lower() in ("1", "true", "yes", "on")
 @dataclass
 class ModelInputData:
     """Container for the data that will be sent to the model."""
@@ -98,6 +128,56 @@ class CallModelData(Generic[TContext]):
     context: TContext | None
+@dataclass
+class _ServerConversationTracker:
+    """Tracks server-side conversation state for either conversation_id or
+    previous_response_id modes.
+    Note: When auto_previous_response_id=True is used, response chaining is enabled
+    automatically for the first turn, even when there's no actual previous response ID yet.
+    """
+    conversation_id: str | None = None
+    previous_response_id: str | None = None
+    auto_previous_response_id: bool = False
+    sent_items: set[int] = field(default_factory=set)
+    server_items: set[int] = field(default_factory=set)
+    def track_server_items(self, model_response: ModelResponse) -> None:
+        for output_item in model_response.output:
+            self.server_items.add(id(output_item))
+        # Update previous_response_id when using previous_response_id mode or auto mode
+        if (
+            self.conversation_id is None
+            and (self.previous_response_id is not None or self.auto_previous_response_id)
+            and model_response.response_id is not None
+        ):
+            self.previous_response_id = model_response.response_id
+    def prepare_input(
+        self,
+        original_input: str | list[TResponseInputItem],
+        generated_items: list[RunItem],
+    ) -> list[TResponseInputItem]:
+        input_items: list[TResponseInputItem] = []
+        # On first call (when there are no generated items yet), include the original input
+        if not generated_items:
+            input_items.extend(ItemHelpers.input_to_new_input_list(original_input))
+        # Process generated_items, skip items already sent or from server
+        for item in generated_items:
+            raw_item_id = id(item.raw_item)
+            if raw_item_id in self.sent_items or raw_item_id in self.server_items:
+                continue
+            input_items.append(item.to_input_item())
+            self.sent_items.add(raw_item_id)
+        return input_items
 # Type alias for the optional input filter callback
 CallModelInputFilter = Callable[[CallModelData[Any]], MaybeAwaitable[ModelInputData]]
@@ -125,6 +205,19 @@ class RunConfig:
     agent. See the documentation in `Handoff.input_filter` for more details.
     """
+    nest_handoff_history: bool = True
+    """Wrap prior run history in a single assistant message before handing off when no custom
+    input filter is set. Set to False to preserve the raw transcript behavior from previous
+    releases.
+    """
+    handoff_history_mapper: HandoffHistoryMapper | None = None
+    """Optional function that receives the normalized transcript (history + handoff items) and
+    returns the input history that should be passed to the next agent. When left as `None`, the
+    runner collapses the transcript into a single assistant message. This function only runs when
+    `nest_handoff_history` is True.
+    """
     input_guardrails: list[InputGuardrail[Any]] | None = None
     """A list of input guardrails to run on the initial run input."""
@@ -135,7 +228,12 @@ class RunConfig:
     """Whether tracing is disabled for the agent run. If disabled, we will not trace the agent run.
     """
-    trace_include_sensitive_data: bool = True
+    tracing: TracingConfig | None = None
+    """Tracing configuration for this run."""
+    trace_include_sensitive_data: bool = field(
+        default_factory=_default_trace_include_sensitive_data
+    )
     """Whether we include potentially sensitive data (for example: inputs/outputs of tool calls or
     LLM generations) in traces. If False, we'll still create spans for these events, but the
     sensitive data will not be included.
@@ -160,6 +258,13 @@ class RunConfig:
     An optional dictionary of additional metadata to include with the trace.
     """
+    session_input_callback: SessionInputCallback | None = None
+    """Defines how to handle session history when new input is provided.
+    - `None` (default): The new input is appended to the session history.
+    - `SessionInputCallback`: A custom function that receives the history and new input, and
+      returns the desired combined list of items.
+    """
     call_model_input_filter: CallModelInputFilter | None = None
     """
     Optional callback that is invoked immediately before calling the model. It receives the current
@@ -189,6 +294,12 @@ class RunOptions(TypedDict, Generic[TContext]):
     previous_response_id: NotRequired[str | None]
     """The ID of the previous response, if any."""
+    auto_previous_response_id: NotRequired[bool]
+    """Enable automatic response chaining for the first turn."""
+    conversation_id: NotRequired[str | None]
+    """The ID of the stored conversation, if any."""
     session: NotRequired[Session | None]
     """The session for the run."""
@@ -205,34 +316,58 @@ class Runner:
         hooks: RunHooks[TContext] | None = None,
         run_config: RunConfig | None = None,
         previous_response_id: str | None = None,
+        auto_previous_response_id: bool = False,
+        conversation_id: str | None = None,
         session: Session | None = None,
     ) -> RunResult:
-        """Run a workflow starting at the given agent. The agent will run in a loop until a final
-        output is generated. The loop runs like so:
-        1. The agent is invoked with the given input.
-        2. If there is a final output (i.e. the agent produces something of type
-            `agent.output_type`, the loop terminates.
-        3. If there's a handoff, we run the loop again, with the new agent.
-        4. Else, we run tool calls (if any), and re-run the loop.
+        """
+        Run a workflow starting at the given agent.
+        The agent will run in a loop until a final output is generated. The loop runs like so:
+          1. The agent is invoked with the given input.
+          2. If there is a final output (i.e. the agent produces something of type
+             `agent.output_type`), the loop terminates.
+          3. If there's a handoff, we run the loop again, with the new agent.
+          4. Else, we run tool calls (if any), and re-run the loop.
         In two cases, the agent may raise an exception:
-        1. If the max_turns is exceeded, a MaxTurnsExceeded exception is raised.
-        2. If a guardrail tripwire is triggered, a GuardrailTripwireTriggered exception is raised.
-        Note that only the first agent's input guardrails are run.
+          1. If the max_turns is exceeded, a MaxTurnsExceeded exception is raised.
+          2. If a guardrail tripwire is triggered, a GuardrailTripwireTriggered
+             exception is raised.
+        Note:
+            Only the first agent's input guardrails are run.
         Args:
             starting_agent: The starting agent to run.
-            input: The initial input to the agent. You can pass a single string for a user message,
-                or a list of input items.
+            input: The initial input to the agent. You can pass a single string for a
+                user message, or a list of input items.
             context: The context to run the agent with.
-            max_turns: The maximum number of turns to run the agent for. A turn is defined as one
-                AI invocation (including any tool calls that might occur).
+            max_turns: The maximum number of turns to run the agent for. A turn is
+                defined as one AI invocation (including any tool calls that might occur).
             hooks: An object that receives callbacks on various lifecycle events.
             run_config: Global settings for the entire agent run.
-            previous_response_id: The ID of the previous response, if using OpenAI models via the
-                Responses API, this allows you to skip passing in input from the previous turn.
+            previous_response_id: The ID of the previous response. If using OpenAI
+                models via the Responses API, this allows you to skip passing in input
+                from the previous turn.
+            conversation_id: The conversation ID
+                (https://platform.openai.com/docs/guides/conversation-state?api-mode=responses).
+                If provided, the conversation will be used to read and write items.
+                Every agent will have access to the conversation history so far,
+                and its output items will be written to the conversation.
+                We recommend only using this if you are exclusively using OpenAI models;
+                other model providers don't write to the Conversation object,
+                so you'll end up having partial conversations stored.
+            session: A session for automatic conversation history management.
         Returns:
-            A run result containing all the inputs, guardrail results and the output of the last
-            agent. Agents may perform handoffs, so we don't know the specific type of the output.
+            A run result containing all the inputs, guardrail results and the output of
+            the last agent. Agents may perform handoffs, so we don't know the specific
+            type of the output.
         """
         runner = DEFAULT_AGENT_RUNNER
         return await runner.run(
             starting_agent,
@@ -242,6 +377,8 @@ class Runner:
             hooks=hooks,
             run_config=run_config,
             previous_response_id=previous_response_id,
+            auto_previous_response_id=auto_previous_response_id,
+            conversation_id=conversation_id,
             session=session,
         )
@@ -256,37 +393,56 @@ class Runner:
         hooks: RunHooks[TContext] | None = None,
         run_config: RunConfig | None = None,
         previous_response_id: str | None = None,
+        auto_previous_response_id: bool = False,
+        conversation_id: str | None = None,
         session: Session | None = None,
     ) -> RunResult:
-        """Run a workflow synchronously, starting at the given agent. Note that this just wraps the
-        `run` method, so it will not work if there's already an event loop (e.g. inside an async
-        function, or in a Jupyter notebook or async context like FastAPI). For those cases, use
-        the `run` method instead.
-        The agent will run in a loop until a final output is generated. The loop runs like so:
-        1. The agent is invoked with the given input.
-        2. If there is a final output (i.e. the agent produces something of type
-            `agent.output_type`, the loop terminates.
-        3. If there's a handoff, we run the loop again, with the new agent.
-        4. Else, we run tool calls (if any), and re-run the loop.
+        """
+        Run a workflow synchronously, starting at the given agent.
+        Note:
+            This just wraps the `run` method, so it will not work if there's already an
+            event loop (e.g. inside an async function, or in a Jupyter notebook or async
+            context like FastAPI). For those cases, use the `run` method instead.
+        The agent will run in a loop until a final output is generated. The loop runs:
+          1. The agent is invoked with the given input.
+          2. If there is a final output (i.e. the agent produces something of type
+             `agent.output_type`), the loop terminates.
+          3. If there's a handoff, we run the loop again, with the new agent.
+          4. Else, we run tool calls (if any), and re-run the loop.
         In two cases, the agent may raise an exception:
-        1. If the max_turns is exceeded, a MaxTurnsExceeded exception is raised.
-        2. If a guardrail tripwire is triggered, a GuardrailTripwireTriggered exception is raised.
-        Note that only the first agent's input guardrails are run.
+          1. If the max_turns is exceeded, a MaxTurnsExceeded exception is raised.
+          2. If a guardrail tripwire is triggered, a GuardrailTripwireTriggered
+             exception is raised.
+        Note:
+            Only the first agent's input guardrails are run.
         Args:
             starting_agent: The starting agent to run.
-            input: The initial input to the agent. You can pass a single string for a user message,
-                or a list of input items.
+            input: The initial input to the agent. You can pass a single string for a
+                user message, or a list of input items.
             context: The context to run the agent with.
-            max_turns: The maximum number of turns to run the agent for. A turn is defined as one
-                AI invocation (including any tool calls that might occur).
+            max_turns: The maximum number of turns to run the agent for. A turn is
+                defined as one AI invocation (including any tool calls that might occur).
             hooks: An object that receives callbacks on various lifecycle events.
             run_config: Global settings for the entire agent run.
-            previous_response_id: The ID of the previous response, if using OpenAI models via the
-                Responses API, this allows you to skip passing in input from the previous turn.
+            previous_response_id: The ID of the previous response, if using OpenAI
+                models via the Responses API, this allows you to skip passing in input
+                from the previous turn.
+            conversation_id: The ID of the stored conversation, if any.
+            session: A session for automatic conversation history management.
         Returns:
-            A run result containing all the inputs, guardrail results and the output of the last
-            agent. Agents may perform handoffs, so we don't know the specific type of the output.
+            A run result containing all the inputs, guardrail results and the output of
+            the last agent. Agents may perform handoffs, so we don't know the specific
+            type of the output.
         """
         runner = DEFAULT_AGENT_RUNNER
         return runner.run_sync(
             starting_agent,
@@ -296,7 +452,9 @@ class Runner:
             hooks=hooks,
             run_config=run_config,
             previous_response_id=previous_response_id,
+            conversation_id=conversation_id,
             session=session,
+            auto_previous_response_id=auto_previous_response_id,
         )
     @classmethod
@@ -309,34 +467,53 @@ class Runner:
         hooks: RunHooks[TContext] | None = None,
         run_config: RunConfig | None = None,
         previous_response_id: str | None = None,
+        auto_previous_response_id: bool = False,
+        conversation_id: str | None = None,
         session: Session | None = None,
     ) -> RunResultStreaming:
-        """Run a workflow starting at the given agent in streaming mode. The returned result object
-        contains a method you can use to stream semantic events as they are generated.
+        """
+        Run a workflow starting at the given agent in streaming mode.
+        The returned result object contains a method you can use to stream semantic
+        events as they are generated.
         The agent will run in a loop until a final output is generated. The loop runs like so:
-        1. The agent is invoked with the given input.
-        2. If there is a final output (i.e. the agent produces something of type
-            `agent.output_type`, the loop terminates.
-        3. If there's a handoff, we run the loop again, with the new agent.
-        4. Else, we run tool calls (if any), and re-run the loop.
+          1. The agent is invoked with the given input.
+          2. If there is a final output (i.e. the agent produces something of type
+             `agent.output_type`), the loop terminates.
+          3. If there's a handoff, we run the loop again, with the new agent.
+          4. Else, we run tool calls (if any), and re-run the loop.
         In two cases, the agent may raise an exception:
-        1. If the max_turns is exceeded, a MaxTurnsExceeded exception is raised.
-        2. If a guardrail tripwire is triggered, a GuardrailTripwireTriggered exception is raised.
-        Note that only the first agent's input guardrails are run.
+          1. If the max_turns is exceeded, a MaxTurnsExceeded exception is raised.
+          2. If a guardrail tripwire is triggered, a GuardrailTripwireTriggered
+             exception is raised.
+        Note:
+            Only the first agent's input guardrails are run.
         Args:
             starting_agent: The starting agent to run.
-            input: The initial input to the agent. You can pass a single string for a user message,
-                or a list of input items.
+            input: The initial input to the agent. You can pass a single string for a
+                user message, or a list of input items.
             context: The context to run the agent with.
-            max_turns: The maximum number of turns to run the agent for. A turn is defined as one
-                AI invocation (including any tool calls that might occur).
+            max_turns: The maximum number of turns to run the agent for. A turn is
+                defined as one AI invocation (including any tool calls that might occur).
             hooks: An object that receives callbacks on various lifecycle events.
             run_config: Global settings for the entire agent run.
-            previous_response_id: The ID of the previous response, if using OpenAI models via the
-                Responses API, this allows you to skip passing in input from the previous turn.
+            previous_response_id: The ID of the previous response, if using OpenAI
+                models via the Responses API, this allows you to skip passing in input
+                from the previous turn.
+            conversation_id: The ID of the stored conversation, if any.
+            session: A session for automatic conversation history management.
         Returns:
-            A result object that contains data about the run, as well as a method to stream events.
+            A result object that contains data about the run, as well as a method to
+            stream events.
         """
         runner = DEFAULT_AGENT_RUNNER
         return runner.run_streamed(
             starting_agent,
@@ -346,6 +523,8 @@ class Runner:
             hooks=hooks,
             run_config=run_config,
             previous_response_id=previous_response_id,
+            auto_previous_response_id=auto_previous_response_id,
+            conversation_id=conversation_id,
             session=session,
         )
@@ -364,17 +543,35 @@ class AgentRunner:
     ) -> RunResult:
         context = kwargs.get("context")
         max_turns = kwargs.get("max_turns", DEFAULT_MAX_TURNS)
-        hooks = kwargs.get("hooks")
+        hooks = cast(RunHooks[TContext], self._validate_run_hooks(kwargs.get("hooks")))
         run_config = kwargs.get("run_config")
         previous_response_id = kwargs.get("previous_response_id")
+        auto_previous_response_id = kwargs.get("auto_previous_response_id", False)
+        conversation_id = kwargs.get("conversation_id")
         session = kwargs.get("session")
-        if hooks is None:
-            hooks = RunHooks[Any]()
         if run_config is None:
             run_config = RunConfig()
-        # Prepare input with session if enabled
-        prepared_input = await self._prepare_input_with_session(input, session)
+        # Check whether to enable OpenAI server-managed conversation
+        if (
+            conversation_id is not None
+            or previous_response_id is not None
+            or auto_previous_response_id
+        ):
+            server_conversation_tracker = _ServerConversationTracker(
+                conversation_id=conversation_id,
+                previous_response_id=previous_response_id,
+                auto_previous_response_id=auto_previous_response_id,
+            )
+        else:
+            server_conversation_tracker = None
+        # Keep original user input separate from session-prepared input
+        original_user_input = input
+        prepared_input = await self._prepare_input_with_session(
+            input, session, run_config.session_input_callback
+        )
         tool_use_tracker = AgentToolUseTracker()
@@ -383,11 +580,13 @@ class AgentRunner:
             trace_id=run_config.trace_id,
             group_id=run_config.group_id,
             metadata=run_config.trace_metadata,
+            tracing=run_config.tracing,
             disabled=run_config.tracing_disabled,
         ):
             current_turn = 0
             original_input: str | list[TResponseInputItem] = _copy_str_or_list(prepared_input)
-            generated_items: list[RunItem] = []
+            generated_items: list[RunItem] = []  # For model input (may be filtered on handoffs)
+            session_items: list[RunItem] = []  # For observability (always unfiltered)
             model_responses: list[ModelResponse] = []
             context_wrapper: RunContextWrapper[TContext] = RunContextWrapper(
@@ -395,14 +594,22 @@ class AgentRunner:
             )
             input_guardrail_results: list[InputGuardrailResult] = []
+            tool_input_guardrail_results: list[ToolInputGuardrailResult] = []
+            tool_output_guardrail_results: list[ToolOutputGuardrailResult] = []
             current_span: Span[AgentSpanData] | None = None
             current_agent = starting_agent
             should_run_agent_start_hooks = True
+            # save only the new user input to the session, not the combined history
+            await self._save_result_to_session(session, original_user_input, [])
             try:
                 while True:
                     all_tools = await AgentRunner._get_all_tools(current_agent, context_wrapper)
+                    await RunImpl.initialize_computer_tools(
+                        tools=all_tools, context_wrapper=context_wrapper
+                    )
                     # Start an agent span if we don't have one. This span is ended if the current
                     # agent changes, or if the agent loop ends.
@@ -440,11 +647,31 @@ class AgentRunner:
                     )
                     if current_turn == 1:
+                        # Separate guardrails based on execution mode.
+                        all_input_guardrails = starting_agent.input_guardrails + (
+                            run_config.input_guardrails or []
+                        )
+                        sequential_guardrails = [
+                            g for g in all_input_guardrails if not g.run_in_parallel
+                        ]
+                        parallel_guardrails = [g for g in all_input_guardrails if g.run_in_parallel]
+                        # Run blocking guardrails first, before agent starts.
+                        # (will raise exception if tripwire triggered).
+                        sequential_results = []
+                        if sequential_guardrails:
+                            sequential_results = await self._run_input_guardrails(
+                                starting_agent,
+                                sequential_guardrails,
+                                _copy_str_or_list(prepared_input),
+                                context_wrapper,
+                            )
+                        # Run parallel guardrails + agent together.
                         input_guardrail_results, turn_result = await asyncio.gather(
                             self._run_input_guardrails(
                                 starting_agent,
-                                starting_agent.input_guardrails
-                                + (run_config.input_guardrails or []),
+                                parallel_guardrails,
                                 _copy_str_or_list(prepared_input),
                                 context_wrapper,
                             ),
@@ -458,9 +685,12 @@ class AgentRunner:
                                 run_config=run_config,
                                 should_run_agent_start_hooks=should_run_agent_start_hooks,
                                 tool_use_tracker=tool_use_tracker,
-                                previous_response_id=previous_response_id,
+                                server_conversation_tracker=server_conversation_tracker,
                             ),
                         )
+                        # Combine sequential and parallel results.
+                        input_guardrail_results = sequential_results + input_guardrail_results
                     else:
                         turn_result = await self._run_single_turn(
                             agent=current_agent,
@@ -472,51 +702,111 @@ class AgentRunner:
                             run_config=run_config,
                             should_run_agent_start_hooks=should_run_agent_start_hooks,
                             tool_use_tracker=tool_use_tracker,
-                            previous_response_id=previous_response_id,
+                            server_conversation_tracker=server_conversation_tracker,
                         )
                     should_run_agent_start_hooks = False
                     model_responses.append(turn_result.model_response)
                     original_input = turn_result.original_input
-                    generated_items = turn_result.generated_items
+                    # For model input, use new_step_items (filtered on handoffs)
+                    generated_items = turn_result.pre_step_items + turn_result.new_step_items
+                    # Accumulate unfiltered items for observability
+                    session_items_for_turn = (
+                        turn_result.session_step_items
+                        if turn_result.session_step_items is not None
+                        else turn_result.new_step_items
+                    )
+                    session_items.extend(session_items_for_turn)
-                    if isinstance(turn_result.next_step, NextStepFinalOutput):
-                        output_guardrail_results = await self._run_output_guardrails(
-                            current_agent.output_guardrails + (run_config.output_guardrails or []),
-                            current_agent,
-                            turn_result.next_step.output,
-                            context_wrapper,
-                        )
-                        result = RunResult(
-                            input=original_input,
-                            new_items=generated_items,
-                            raw_responses=model_responses,
-                            final_output=turn_result.next_step.output,
-                            _last_agent=current_agent,
-                            input_guardrail_results=input_guardrail_results,
-                            output_guardrail_results=output_guardrail_results,
-                            context_wrapper=context_wrapper,
-                        )
+                    if server_conversation_tracker is not None:
+                        server_conversation_tracker.track_server_items(turn_result.model_response)
-                        # Save the conversation to session if enabled
-                        await self._save_result_to_session(session, input, result)
+                    # Collect tool guardrail results from this turn
+                    tool_input_guardrail_results.extend(turn_result.tool_input_guardrail_results)
+                    tool_output_guardrail_results.extend(turn_result.tool_output_guardrail_results)
-                        return result
-                    elif isinstance(turn_result.next_step, NextStepHandoff):
-                        current_agent = cast(Agent[TContext], turn_result.next_step.new_agent)
-                        current_span.finish(reset_current=True)
-                        current_span = None
-                        should_run_agent_start_hooks = True
-                    elif isinstance(turn_result.next_step, NextStepRunAgain):
-                        pass
-                    else:
-                        raise AgentsException(
-                            f"Unknown next step type: {type(turn_result.next_step)}"
-                        )
+                    try:
+                        if isinstance(turn_result.next_step, NextStepFinalOutput):
+                            output_guardrail_results = await self._run_output_guardrails(
+                                current_agent.output_guardrails
+                                + (run_config.output_guardrails or []),
+                                current_agent,
+                                turn_result.next_step.output,
+                                context_wrapper,
+                            )
+                            result = RunResult(
+                                input=original_input,
+                                new_items=session_items,  # Use unfiltered items for observability
+                                raw_responses=model_responses,
+                                final_output=turn_result.next_step.output,
+                                _last_agent=current_agent,
+                                input_guardrail_results=input_guardrail_results,
+                                output_guardrail_results=output_guardrail_results,
+                                tool_input_guardrail_results=tool_input_guardrail_results,
+                                tool_output_guardrail_results=tool_output_guardrail_results,
+                                context_wrapper=context_wrapper,
+                            )
+                            if not any(
+                                guardrail_result.output.tripwire_triggered
+                                for guardrail_result in input_guardrail_results
+                            ):
+                                await self._save_result_to_session(
+                                    session,
+                                    [],
+                                    turn_result.session_step_items
+                                    if turn_result.session_step_items is not None
+                                    else turn_result.new_step_items,
+                                    turn_result.model_response.response_id,
+                                )
+                            return result
+                        elif isinstance(turn_result.next_step, NextStepHandoff):
+                            # Save the conversation to session if enabled (before handoff)
+                            if session is not None:
+                                if not any(
+                                    guardrail_result.output.tripwire_triggered
+                                    for guardrail_result in input_guardrail_results
+                                ):
+                                    await self._save_result_to_session(
+                                        session,
+                                        [],
+                                        turn_result.session_step_items
+                                        if turn_result.session_step_items is not None
+                                        else turn_result.new_step_items,
+                                        turn_result.model_response.response_id,
+                                    )
+                            current_agent = cast(Agent[TContext], turn_result.next_step.new_agent)
+                            current_span.finish(reset_current=True)
+                            current_span = None
+                            should_run_agent_start_hooks = True
+                        elif isinstance(turn_result.next_step, NextStepRunAgain):
+                            if not any(
+                                guardrail_result.output.tripwire_triggered
+                                for guardrail_result in input_guardrail_results
+                            ):
+                                await self._save_result_to_session(
+                                    session,
+                                    [],
+                                    turn_result.session_step_items
+                                    if turn_result.session_step_items is not None
+                                    else turn_result.new_step_items,
+                                    turn_result.model_response.response_id,
+                                )
+                        else:
+                            raise AgentsException(
+                                f"Unknown next step type: {type(turn_result.next_step)}"
+                            )
+                    finally:
+                        # RunImpl.execute_tools_and_side_effects returns a SingleStepResult that
+                        # stores direct references to the `pre_step_items` and `new_step_items`
+                        # lists it manages internally. Clear them here so the next turn does not
+                        # hold on to items from previous turns and to avoid leaking agent refs.
+                        turn_result.pre_step_items.clear()
+                        turn_result.new_step_items.clear()
             except AgentsException as exc:
                 exc.run_data = RunErrorDetails(
                     input=original_input,
-                    new_items=generated_items,
+                    new_items=session_items,  # Use unfiltered items for observability
                     raw_responses=model_responses,
                     last_agent=current_agent,
                     context_wrapper=context_wrapper,
@@ -525,6 +815,10 @@ class AgentRunner:
                 )
                 raise
             finally:
+                try:
+                    await dispose_resolved_computers(run_context=context_wrapper)
+                except Exception as error:
+                    logger.warning("Failed to dispose computers after run: %s", error)
                 if current_span:
                     current_span.finish(reset_current=True)
@@ -539,9 +833,44 @@ class AgentRunner:
         hooks = kwargs.get("hooks")
         run_config = kwargs.get("run_config")
         previous_response_id = kwargs.get("previous_response_id")
+        auto_previous_response_id = kwargs.get("auto_previous_response_id", False)
+        conversation_id = kwargs.get("conversation_id")
         session = kwargs.get("session")
-        return asyncio.get_event_loop().run_until_complete(
+        # Python 3.14 stopped implicitly wiring up a default event loop
+        # when synchronous code touches asyncio APIs for the first time.
+        # Several of our synchronous entry points (for example the Redis/SQLAlchemy session helpers)
+        # construct asyncio primitives like asyncio.Lock during __init__,
+        # which binds them to whatever loop happens to be the thread's default at that moment.
+        # To keep those locks usable we must ensure that run_sync reuses that same default loop
+        # instead of hopping over to a brand-new asyncio.run() loop.
+        try:
+            already_running_loop = asyncio.get_running_loop()
+        except RuntimeError:
+            already_running_loop = None
+        if already_running_loop is not None:
+            # This method is only expected to run when no loop is already active.
+            # (Each thread has its own default loop; concurrent sync runs should happen on
+            # different threads. In a single thread use the async API to interleave work.)
+            raise RuntimeError(
+                "AgentRunner.run_sync() cannot be called when an event loop is already running."
+            )
+        policy = asyncio.get_event_loop_policy()
+        with warnings.catch_warnings():
+            warnings.simplefilter("ignore", DeprecationWarning)
+            try:
+                default_loop = policy.get_event_loop()
+            except RuntimeError:
+                default_loop = policy.new_event_loop()
+                policy.set_event_loop(default_loop)
+        # We intentionally leave the default loop open even if we had to create one above. Session
+        # instances and other helpers stash loop-bound primitives between calls and expect to find
+        # the same default loop every time run_sync is invoked on this thread.
+        # Schedule the async run on the default loop so that we can manage cancellation explicitly.
+        task = default_loop.create_task(
             self.run(
                 starting_agent,
                 input,
@@ -551,9 +880,29 @@ class AgentRunner:
                 hooks=hooks,
                 run_config=run_config,
                 previous_response_id=previous_response_id,
+                auto_previous_response_id=auto_previous_response_id,
+                conversation_id=conversation_id,
             )
         )
+        try:
+            # Drive the coroutine to completion, harvesting the final RunResult.
+            return default_loop.run_until_complete(task)
+        except BaseException:
+            # If the sync caller aborts (KeyboardInterrupt, etc.), make sure the scheduled task
+            # does not linger on the shared loop by cancelling it and waiting for completion.
+            if not task.done():
+                task.cancel()
+                with contextlib.suppress(asyncio.CancelledError):
+                    default_loop.run_until_complete(task)
+            raise
+        finally:
+            if not default_loop.is_closed():
+                # The loop stays open for subsequent runs, but we still need to flush any pending
+                # async generators so their cleanup code executes promptly.
+                with contextlib.suppress(RuntimeError):
+                    default_loop.run_until_complete(default_loop.shutdown_asyncgens())
     def run_streamed(
         self,
         starting_agent: Agent[TContext],
@@ -562,13 +911,13 @@ class AgentRunner:
     ) -> RunResultStreaming:
         context = kwargs.get("context")
         max_turns = kwargs.get("max_turns", DEFAULT_MAX_TURNS)
-        hooks = kwargs.get("hooks")
+        hooks = cast(RunHooks[TContext], self._validate_run_hooks(kwargs.get("hooks")))
         run_config = kwargs.get("run_config")
         previous_response_id = kwargs.get("previous_response_id")
+        auto_previous_response_id = kwargs.get("auto_previous_response_id", False)
+        conversation_id = kwargs.get("conversation_id")
         session = kwargs.get("session")
-        if hooks is None:
-            hooks = RunHooks[Any]()
         if run_config is None:
             run_config = RunConfig()
@@ -583,6 +932,7 @@ class AgentRunner:
                 trace_id=run_config.trace_id,
                 group_id=run_config.group_id,
                 metadata=run_config.trace_metadata,
+                tracing=run_config.tracing,
                 disabled=run_config.tracing_disabled,
             )
         )
@@ -603,6 +953,8 @@ class AgentRunner:
             max_turns=max_turns,
             input_guardrail_results=[],
             output_guardrail_results=[],
+            tool_input_guardrail_results=[],
+            tool_output_guardrail_results=[],
             _current_agent_output_schema=output_schema,
             trace=new_trace,
             context_wrapper=context_wrapper,
@@ -619,11 +971,30 @@ class AgentRunner:
                 context_wrapper=context_wrapper,
                 run_config=run_config,
                 previous_response_id=previous_response_id,
+                auto_previous_response_id=auto_previous_response_id,
+                conversation_id=conversation_id,
                 session=session,
             )
         )
         return streamed_result
+    @staticmethod
+    def _validate_run_hooks(
+        hooks: RunHooksBase[Any, Agent[Any]] | AgentHooksBase[Any, Agent[Any]] | Any | None,
+    ) -> RunHooks[Any]:
+        if hooks is None:
+            return RunHooks[Any]()
+        input_hook_type = type(hooks).__name__
+        if isinstance(hooks, AgentHooksBase):
+            raise TypeError(
+                "Run hooks must be instances of RunHooks. "
+                f"Received agent-scoped hooks ({input_hook_type}). "
+                "Attach AgentHooks to an Agent via Agent(..., hooks=...)."
+            )
+        if not isinstance(hooks, RunHooksBase):
+            raise TypeError(f"Run hooks must be instances of RunHooks. Received {input_hook_type}.")
+        return hooks
     @classmethod
     async def _maybe_filter_model_input(
         cls,
@@ -689,6 +1060,11 @@ class AgentRunner:
             for done in asyncio.as_completed(guardrail_tasks):
                 result = await done
                 if result.output.tripwire_triggered:
+                    # Cancel all remaining guardrail tasks if a tripwire is triggered.
+                    for t in guardrail_tasks:
+                        t.cancel()
+                    # Wait for cancellations to propagate by awaiting the cancelled tasks.
+                    await asyncio.gather(*guardrail_tasks, return_exceptions=True)
                     _error_tracing.attach_error_to_span(
                         parent_span,
                         SpanError(
@@ -699,6 +1075,9 @@ class AgentRunner:
                             },
                         ),
                     )
+                    queue.put_nowait(result)
+                    guardrail_results.append(result)
+                    break
                 queue.put_nowait(result)
                 guardrail_results.append(result)
         except Exception:
@@ -706,7 +1085,9 @@ class AgentRunner:
                 t.cancel()
             raise
-        streamed_result.input_guardrail_results = guardrail_results
+        streamed_result.input_guardrail_results = (
+            streamed_result.input_guardrail_results + guardrail_results
+        )
     @classmethod
     async def _start_streaming(
@@ -719,6 +1100,8 @@ class AgentRunner:
         context_wrapper: RunContextWrapper[TContext],
         run_config: RunConfig,
         previous_response_id: str | None,
+        auto_previous_response_id: bool,
+        conversation_id: str | None,
         session: Session | None,
     ):
         if streamed_result.trace:
@@ -730,20 +1113,47 @@ class AgentRunner:
         should_run_agent_start_hooks = True
         tool_use_tracker = AgentToolUseTracker()
+        # Check whether to enable OpenAI server-managed conversation
+        if (
+            conversation_id is not None
+            or previous_response_id is not None
+            or auto_previous_response_id
+        ):
+            server_conversation_tracker = _ServerConversationTracker(
+                conversation_id=conversation_id,
+                previous_response_id=previous_response_id,
+                auto_previous_response_id=auto_previous_response_id,
+            )
+        else:
+            server_conversation_tracker = None
         streamed_result._event_queue.put_nowait(AgentUpdatedStreamEvent(new_agent=current_agent))
         try:
             # Prepare input with session if enabled
-            prepared_input = await AgentRunner._prepare_input_with_session(starting_input, session)
+            prepared_input = await AgentRunner._prepare_input_with_session(
+                starting_input, session, run_config.session_input_callback
+            )
             # Update the streamed result with the prepared input
             streamed_result.input = prepared_input
+            await AgentRunner._save_result_to_session(session, starting_input, [])
             while True:
+                # Check for soft cancel before starting new turn
+                if streamed_result._cancel_mode == "after_turn":
+                    streamed_result.is_complete = True
+                    streamed_result._event_queue.put_nowait(QueueCompleteSentinel())
+                    break
                 if streamed_result.is_complete:
                     break
                 all_tools = await cls._get_all_tools(current_agent, context_wrapper)
+                await RunImpl.initialize_computer_tools(
+                    tools=all_tools, context_wrapper=context_wrapper
+                )
                 # Start an agent span if we don't have one. This span is ended if the current
                 # agent changes, or if the agent loop ends.
@@ -780,11 +1190,36 @@ class AgentRunner:
                     break
                 if current_turn == 1:
-                    # Run the input guardrails in the background and put the results on the queue
+                    # Separate guardrails based on execution mode.
+                    all_input_guardrails = starting_agent.input_guardrails + (
+                        run_config.input_guardrails or []
+                    )
+                    sequential_guardrails = [
+                        g for g in all_input_guardrails if not g.run_in_parallel
+                    ]
+                    parallel_guardrails = [g for g in all_input_guardrails if g.run_in_parallel]
+                    # Run sequential guardrails first.
+                    if sequential_guardrails:
+                        await cls._run_input_guardrails_with_queue(
+                            starting_agent,
+                            sequential_guardrails,
+                            ItemHelpers.input_to_new_input_list(prepared_input),
+                            context_wrapper,
+                            streamed_result,
+                            current_span,
+                        )
+                        # Check if any blocking guardrail triggered and raise before starting agent.
+                        for result in streamed_result.input_guardrail_results:
+                            if result.output.tripwire_triggered:
+                                streamed_result._event_queue.put_nowait(QueueCompleteSentinel())
+                                raise InputGuardrailTripwireTriggered(result)
+                    # Run parallel guardrails in background.
                     streamed_result._input_guardrails_task = asyncio.create_task(
                         cls._run_input_guardrails_with_queue(
                             starting_agent,
-                            starting_agent.input_guardrails + (run_config.input_guardrails or []),
+                            parallel_guardrails,
                             ItemHelpers.input_to_new_input_list(prepared_input),
                             context_wrapper,
                             streamed_result,
@@ -801,7 +1236,7 @@ class AgentRunner:
                         should_run_agent_start_hooks,
                         tool_use_tracker,
                         all_tools,
-                        previous_response_id,
+                        server_conversation_tracker,
                     )
                     should_run_agent_start_hooks = False
@@ -809,9 +1244,40 @@ class AgentRunner:
                         turn_result.model_response
                     ]
                     streamed_result.input = turn_result.original_input
-                    streamed_result.new_items = turn_result.generated_items
+                    # Keep filtered items for building model input on the next turn.
+                    streamed_result._model_input_items = (
+                        turn_result.pre_step_items + turn_result.new_step_items
+                    )
+                    # Accumulate unfiltered items for observability
+                    session_items_for_turn = (
+                        turn_result.session_step_items
+                        if turn_result.session_step_items is not None
+                        else turn_result.new_step_items
+                    )
+                    streamed_result.new_items.extend(session_items_for_turn)
+                    if server_conversation_tracker is not None:
+                        server_conversation_tracker.track_server_items(turn_result.model_response)
                     if isinstance(turn_result.next_step, NextStepHandoff):
+                        # Save the conversation to session if enabled (before handoff)
+                        # Streaming needs to save for graceful cancellation support
+                        if session is not None:
+                            should_skip_session_save = (
+                                await AgentRunner._input_guardrail_tripwire_triggered_for_stream(
+                                    streamed_result
+                                )
+                            )
+                            if should_skip_session_save is False:
+                                await AgentRunner._save_result_to_session(
+                                    session,
+                                    [],
+                                    turn_result.session_step_items
+                                    if turn_result.session_step_items is not None
+                                    else turn_result.new_step_items,
+                                    turn_result.model_response.response_id,
+                                )
                         current_agent = turn_result.next_step.new_agent
                         current_span.finish(reset_current=True)
                         current_span = None
@@ -819,6 +1285,12 @@ class AgentRunner:
                         streamed_result._event_queue.put_nowait(
                             AgentUpdatedStreamEvent(new_agent=current_agent)
                         )
+                        # Check for soft cancel after handoff
+                        if streamed_result._cancel_mode == "after_turn":  # type: ignore[comparison-overlap]
+                            streamed_result.is_complete = True
+                            streamed_result._event_queue.put_nowait(QueueCompleteSentinel())
+                            break
                     elif isinstance(turn_result.next_step, NextStepFinalOutput):
                         streamed_result._output_guardrails_task = asyncio.create_task(
                             cls._run_output_guardrails(
@@ -841,24 +1313,45 @@ class AgentRunner:
                         streamed_result.is_complete = True
                         # Save the conversation to session if enabled
-                        # Create a temporary RunResult for session saving
-                        temp_result = RunResult(
-                            input=streamed_result.input,
-                            new_items=streamed_result.new_items,
-                            raw_responses=streamed_result.raw_responses,
-                            final_output=streamed_result.final_output,
-                            _last_agent=current_agent,
-                            input_guardrail_results=streamed_result.input_guardrail_results,
-                            output_guardrail_results=streamed_result.output_guardrail_results,
-                            context_wrapper=context_wrapper,
-                        )
-                        await AgentRunner._save_result_to_session(
-                            session, starting_input, temp_result
-                        )
+                        if session is not None:
+                            should_skip_session_save = (
+                                await AgentRunner._input_guardrail_tripwire_triggered_for_stream(
+                                    streamed_result
+                                )
+                            )
+                            if should_skip_session_save is False:
+                                await AgentRunner._save_result_to_session(
+                                    session,
+                                    [],
+                                    turn_result.session_step_items
+                                    if turn_result.session_step_items is not None
+                                    else turn_result.new_step_items,
+                                    turn_result.model_response.response_id,
+                                )
                         streamed_result._event_queue.put_nowait(QueueCompleteSentinel())
                     elif isinstance(turn_result.next_step, NextStepRunAgain):
-                        pass
+                        if session is not None:
+                            should_skip_session_save = (
+                                await AgentRunner._input_guardrail_tripwire_triggered_for_stream(
+                                    streamed_result
+                                )
+                            )
+                            if should_skip_session_save is False:
+                                await AgentRunner._save_result_to_session(
+                                    session,
+                                    [],
+                                    turn_result.session_step_items
+                                    if turn_result.session_step_items is not None
+                                    else turn_result.new_step_items,
+                                    turn_result.model_response.response_id,
+                                )
+                        # Check for soft cancel after turn completion
+                        if streamed_result._cancel_mode == "after_turn":  # type: ignore[comparison-overlap]
+                            streamed_result.is_complete = True
+                            streamed_result._event_queue.put_nowait(QueueCompleteSentinel())
+                            break
                 except AgentsException as exc:
                     streamed_result.is_complete = True
                     streamed_result._event_queue.put_nowait(QueueCompleteSentinel())
@@ -887,11 +1380,32 @@ class AgentRunner:
             streamed_result.is_complete = True
         finally:
+            if streamed_result._input_guardrails_task:
+                try:
+                    await AgentRunner._input_guardrail_tripwire_triggered_for_stream(
+                        streamed_result
+                    )
+                except Exception as e:
+                    logger.debug(
+                        f"Error in streamed_result finalize for agent {current_agent.name} - {e}"
+                    )
+            try:
+                await dispose_resolved_computers(run_context=context_wrapper)
+            except Exception as error:
+                logger.warning("Failed to dispose computers after streamed run: %s", error)
             if current_span:
                 current_span.finish(reset_current=True)
             if streamed_result.trace:
                 streamed_result.trace.finish(reset_current=True)
+            # Ensure QueueCompleteSentinel is always put in the queue when the stream ends,
+            # even if an exception occurs before the inner try/except block (e.g., in
+            # _save_result_to_session at the beginning). Without this, stream_events()
+            # would hang forever waiting for more items.
+            if not streamed_result.is_complete:
+                streamed_result.is_complete = True
+                streamed_result._event_queue.put_nowait(QueueCompleteSentinel())
     @classmethod
     async def _run_single_turn_streamed(
         cls,
@@ -903,13 +1417,21 @@ class AgentRunner:
         should_run_agent_start_hooks: bool,
         tool_use_tracker: AgentToolUseTracker,
         all_tools: list[Tool],
-        previous_response_id: str | None,
+        server_conversation_tracker: _ServerConversationTracker | None = None,
     ) -> SingleStepResult:
+        emitted_tool_call_ids: set[str] = set()
+        emitted_reasoning_item_ids: set[str] = set()
         if should_run_agent_start_hooks:
+            agent_hook_context = AgentHookContext(
+                context=context_wrapper.context,
+                usage=context_wrapper.usage,
+                turn_input=ItemHelpers.input_to_new_input_list(streamed_result.input),
+            )
             await asyncio.gather(
-                hooks.on_agent_start(context_wrapper, agent),
+                hooks.on_agent_start(agent_hook_context, agent),
                 (
-                    agent.hooks.on_start(context_wrapper, agent)
+                    agent.hooks.on_start(agent_hook_context, agent)
                     if agent.hooks
                     else _coro.noop_coroutine()
                 ),
@@ -932,9 +1454,15 @@ class AgentRunner:
         final_response: ModelResponse | None = None
-        input = ItemHelpers.input_to_new_input_list(streamed_result.input)
-        input.extend([item.to_input_item() for item in streamed_result.new_items])
+        if server_conversation_tracker is not None:
+            input = server_conversation_tracker.prepare_input(
+                streamed_result.input, streamed_result._model_input_items
+            )
+        else:
+            input = ItemHelpers.input_to_new_input_list(streamed_result.input)
+            input.extend([item.to_input_item() for item in streamed_result._model_input_items])
+        # THIS IS THE RESOLVED CONFLICT BLOCK
         filtered = await cls._maybe_filter_model_input(
             agent=agent,
             run_config=run_config,
@@ -943,6 +1471,28 @@ class AgentRunner:
             system_instructions=system_prompt,
         )
+        # Call hook just before the model is invoked, with the correct system_prompt.
+        await asyncio.gather(
+            hooks.on_llm_start(context_wrapper, agent, filtered.instructions, filtered.input),
+            (
+                agent.hooks.on_llm_start(
+                    context_wrapper, agent, filtered.instructions, filtered.input
+                )
+                if agent.hooks
+                else _coro.noop_coroutine()
+            ),
+        )
+        previous_response_id = (
+            server_conversation_tracker.previous_response_id
+            if server_conversation_tracker
+            and server_conversation_tracker.previous_response_id is not None
+            else None
+        )
+        conversation_id = (
+            server_conversation_tracker.conversation_id if server_conversation_tracker else None
+        )
         # 1. Stream the output events
         async for event in model.stream_response(
             filtered.instructions,
@@ -955,8 +1505,12 @@ class AgentRunner:
                 run_config.tracing_disabled, run_config.trace_include_sensitive_data
             ),
             previous_response_id=previous_response_id,
+            conversation_id=conversation_id,
             prompt=prompt_config,
         ):
+            # Emit the raw event ASAP
+            streamed_result._event_queue.put_nowait(RawResponsesStreamEvent(data=event))
             if isinstance(event, ResponseCompletedEvent):
                 usage = (
                     Usage(
@@ -977,16 +1531,56 @@ class AgentRunner:
                 )
                 context_wrapper.usage.add(usage)
-            streamed_result._event_queue.put_nowait(RawResponsesStreamEvent(data=event))
+            if isinstance(event, ResponseOutputItemDoneEvent):
+                output_item = event.item
+                if isinstance(output_item, _TOOL_CALL_TYPES):
+                    call_id: str | None = getattr(
+                        output_item, "call_id", getattr(output_item, "id", None)
+                    )
+                    if call_id and call_id not in emitted_tool_call_ids:
+                        emitted_tool_call_ids.add(call_id)
+                        tool_item = ToolCallItem(
+                            raw_item=cast(ToolCallItemTypes, output_item),
+                            agent=agent,
+                        )
+                        streamed_result._event_queue.put_nowait(
+                            RunItemStreamEvent(item=tool_item, name="tool_called")
+                        )
+                elif isinstance(output_item, ResponseReasoningItem):
+                    reasoning_id: str | None = getattr(output_item, "id", None)
+                    if reasoning_id and reasoning_id not in emitted_reasoning_item_ids:
+                        emitted_reasoning_item_ids.add(reasoning_id)
+                        reasoning_item = ReasoningItem(raw_item=output_item, agent=agent)
+                        streamed_result._event_queue.put_nowait(
+                            RunItemStreamEvent(item=reasoning_item, name="reasoning_item_created")
+                        )
+        # Call hook just after the model response is finalized.
+        if final_response is not None:
+            await asyncio.gather(
+                (
+                    agent.hooks.on_llm_end(context_wrapper, agent, final_response)
+                    if agent.hooks
+                    else _coro.noop_coroutine()
+                ),
+                hooks.on_llm_end(context_wrapper, agent, final_response),
+            )
         # 2. At this point, the streaming is complete for this turn of the agent loop.
         if not final_response:
             raise ModelBehaviorError("Model did not produce a final response!")
         # 3. Now, we can process the turn as we do in the non-streaming case
-        return await cls._get_single_step_result_from_streamed_response(
+        single_step_result = await cls._get_single_step_result_from_response(
             agent=agent,
-            streamed_result=streamed_result,
+            original_input=streamed_result.input,
+            pre_step_items=streamed_result._model_input_items,
             new_response=final_response,
             output_schema=output_schema,
             all_tools=all_tools,
@@ -995,8 +1589,59 @@ class AgentRunner:
             context_wrapper=context_wrapper,
             run_config=run_config,
             tool_use_tracker=tool_use_tracker,
+            event_queue=streamed_result._event_queue,
         )
+        import dataclasses as _dc
+        # Stream session items (unfiltered) when available for observability.
+        streaming_items = (
+            single_step_result.session_step_items
+            if single_step_result.session_step_items is not None
+            else single_step_result.new_step_items
+        )
+        # Filter out items that have already been sent to avoid duplicates.
+        items_to_stream = streaming_items
+        if emitted_tool_call_ids:
+            # Filter out tool call items that were already emitted during streaming
+            items_to_stream = [
+                item
+                for item in items_to_stream
+                if not (
+                    isinstance(item, ToolCallItem)
+                    and (
+                        call_id := getattr(
+                            item.raw_item, "call_id", getattr(item.raw_item, "id", None)
+                        )
+                    )
+                    and call_id in emitted_tool_call_ids
+                )
+            ]
+        if emitted_reasoning_item_ids:
+            # Filter out reasoning items that were already emitted during streaming
+            items_to_stream = [
+                item
+                for item in items_to_stream
+                if not (
+                    isinstance(item, ReasoningItem)
+                    and (reasoning_id := getattr(item.raw_item, "id", None))
+                    and reasoning_id in emitted_reasoning_item_ids
+                )
+            ]
+        # Filter out HandoffCallItem to avoid duplicates (already sent earlier)
+        items_to_stream = [
+            item for item in items_to_stream if not isinstance(item, HandoffCallItem)
+        ]
+        # Create filtered result and send to queue
+        filtered_result = _dc.replace(single_step_result, new_step_items=items_to_stream)
+        RunImpl.stream_step_result_to_queue(filtered_result, streamed_result._event_queue)
+        return single_step_result
     @classmethod
     async def _run_single_turn(
         cls,
@@ -1010,14 +1655,19 @@ class AgentRunner:
         run_config: RunConfig,
         should_run_agent_start_hooks: bool,
         tool_use_tracker: AgentToolUseTracker,
-        previous_response_id: str | None,
+        server_conversation_tracker: _ServerConversationTracker | None = None,
     ) -> SingleStepResult:
         # Ensure we run the hooks before anything else
         if should_run_agent_start_hooks:
+            agent_hook_context = AgentHookContext(
+                context=context_wrapper.context,
+                usage=context_wrapper.usage,
+                turn_input=ItemHelpers.input_to_new_input_list(original_input),
+            )
             await asyncio.gather(
-                hooks.on_agent_start(context_wrapper, agent),
+                hooks.on_agent_start(agent_hook_context, agent),
                 (
-                    agent.hooks.on_start(context_wrapper, agent)
+                    agent.hooks.on_start(agent_hook_context, agent)
                     if agent.hooks
                     else _coro.noop_coroutine()
                 ),
@@ -1030,8 +1680,11 @@ class AgentRunner:
         output_schema = cls._get_output_schema(agent)
         handoffs = await cls._get_handoffs(agent, context_wrapper)
-        input = ItemHelpers.input_to_new_input_list(original_input)
-        input.extend([generated_item.to_input_item() for generated_item in generated_items])
+        if server_conversation_tracker is not None:
+            input = server_conversation_tracker.prepare_input(original_input, generated_items)
+        else:
+            input = ItemHelpers.input_to_new_input_list(original_input)
+            input.extend([generated_item.to_input_item() for generated_item in generated_items])
         new_response = await cls._get_new_response(
             agent,
@@ -1040,10 +1693,11 @@ class AgentRunner:
             output_schema,
             all_tools,
             handoffs,
+            hooks,
             context_wrapper,
             run_config,
             tool_use_tracker,
-            previous_response_id,
+            server_conversation_tracker,
             prompt_config,
         )
@@ -1076,6 +1730,7 @@ class AgentRunner:
         context_wrapper: RunContextWrapper[TContext],
         run_config: RunConfig,
         tool_use_tracker: AgentToolUseTracker,
+        event_queue: asyncio.Queue[StreamEvent | QueueCompleteSentinel] | None = None,
     ) -> SingleStepResult:
         processed_response = RunImpl.process_model_response(
             agent=agent,
@@ -1087,6 +1742,14 @@ class AgentRunner:
         tool_use_tracker.add_tool_use(agent, processed_response.tools_used)
+        # Send handoff items immediately for streaming, but avoid duplicates
+        if event_queue is not None and processed_response.new_items:
+            handoff_items = [
+                item for item in processed_response.new_items if isinstance(item, HandoffCallItem)
+            ]
+            if handoff_items:
+                RunImpl.stream_step_items_to_queue(cast(list[RunItem], handoff_items), event_queue)
         return await RunImpl.execute_tools_and_side_effects(
             agent=agent,
             original_input=original_input,
@@ -1115,7 +1778,7 @@ class AgentRunner:
         tool_use_tracker: AgentToolUseTracker,
     ) -> SingleStepResult:
         original_input = streamed_result.input
-        pre_step_items = streamed_result.new_items
+        pre_step_items = streamed_result._model_input_items
         event_queue = streamed_result._event_queue
         processed_response = RunImpl.process_model_response(
@@ -1140,10 +1803,15 @@ class AgentRunner:
             context_wrapper=context_wrapper,
             run_config=run_config,
         )
+        # Use session_step_items (unfiltered) if available for streaming observability,
+        # otherwise fall back to new_step_items.
+        streaming_items = (
+            single_step_result.session_step_items
+            if single_step_result.session_step_items is not None
+            else single_step_result.new_step_items
+        )
         new_step_items = [
-            item
-            for item in single_step_result.new_step_items
-            if item not in new_items_processed_response
+            item for item in streaming_items if item not in new_items_processed_response
         ]
         RunImpl.stream_step_items_to_queue(new_step_items, event_queue)
@@ -1175,6 +1843,8 @@ class AgentRunner:
                 # Cancel all guardrail tasks if a tripwire is triggered.
                 for t in guardrail_tasks:
                     t.cancel()
+                # Wait for cancellations to propagate by awaiting the cancelled tasks.
+                await asyncio.gather(*guardrail_tasks, return_exceptions=True)
                 _error_tracing.attach_error_to_current_span(
                     SpanError(
                         message="Guardrail tripwire triggered",
@@ -1234,10 +1904,11 @@ class AgentRunner:
         output_schema: AgentOutputSchemaBase | None,
         all_tools: list[Tool],
         handoffs: list[Handoff],
+        hooks: RunHooks[TContext],
         context_wrapper: RunContextWrapper[TContext],
         run_config: RunConfig,
         tool_use_tracker: AgentToolUseTracker,
-        previous_response_id: str | None,
+        server_conversation_tracker: _ServerConversationTracker | None,
         prompt_config: ResponsePromptParam | None,
     ) -> ModelResponse:
         # Allow user to modify model input right before the call, if configured
@@ -1253,6 +1924,31 @@ class AgentRunner:
         model_settings = agent.model_settings.resolve(run_config.model_settings)
         model_settings = RunImpl.maybe_reset_tool_choice(agent, tool_use_tracker, model_settings)
+        # If we have run hooks, or if the agent has hooks, we need to call them before the LLM call
+        await asyncio.gather(
+            hooks.on_llm_start(context_wrapper, agent, filtered.instructions, filtered.input),
+            (
+                agent.hooks.on_llm_start(
+                    context_wrapper,
+                    agent,
+                    filtered.instructions,  # Use filtered instructions
+                    filtered.input,  # Use filtered input
+                )
+                if agent.hooks
+                else _coro.noop_coroutine()
+            ),
+        )
+        previous_response_id = (
+            server_conversation_tracker.previous_response_id
+            if server_conversation_tracker
+            and server_conversation_tracker.previous_response_id is not None
+            else None
+        )
+        conversation_id = (
+            server_conversation_tracker.conversation_id if server_conversation_tracker else None
+        )
         new_response = await model.get_response(
             system_instructions=filtered.instructions,
             input=filtered.input,
@@ -1264,11 +1960,22 @@ class AgentRunner:
                 run_config.tracing_disabled, run_config.trace_include_sensitive_data
             ),
             previous_response_id=previous_response_id,
+            conversation_id=conversation_id,
             prompt=prompt_config,
         )
         context_wrapper.usage.add(new_response.usage)
+        # If we have run hooks, or if the agent has hooks, we need to call them after the LLM call
+        await asyncio.gather(
+            (
+                agent.hooks.on_llm_end(context_wrapper, agent, new_response)
+                if agent.hooks
+                else _coro.noop_coroutine()
+            ),
+            hooks.on_llm_end(context_wrapper, agent, new_response),
+        )
         return new_response
     @classmethod
@@ -1326,19 +2033,20 @@ class AgentRunner:
         cls,
         input: str | list[TResponseInputItem],
         session: Session | None,
+        session_input_callback: SessionInputCallback | None,
     ) -> str | list[TResponseInputItem]:
         """Prepare input by combining it with session history if enabled."""
         if session is None:
             return input
-        # Validate that we don't have both a session and a list input, as this creates
-        # ambiguity about whether the list should append to or replace existing session history
-        if isinstance(input, list):
+        # If the user doesn't specify an input callback and pass a list as input
+        if isinstance(input, list) and not session_input_callback:
             raise UserError(
-                "Cannot provide both a session and a list of input items. "
-                "When using session memory, provide only a string input to append to the "
-                "conversation, or use session=None and provide a list to manually manage "
-                "conversation history."
+                "When using session memory, list inputs require a "
+                "`RunConfig.session_input_callback` to define how they should be merged "
+                "with the conversation history. If you don't want to use a callback, "
+                "provide your input as a string instead, or disable session memory "
+                "(session=None) and pass a list to manage the history manually."
             )
         # Get previous conversation history
@@ -1347,19 +2055,32 @@ class AgentRunner:
         # Convert input to list format
         new_input_list = ItemHelpers.input_to_new_input_list(input)
-        # Combine history with new input
-        combined_input = history + new_input_list
-        return combined_input
+        if session_input_callback is None:
+            return history + new_input_list
+        elif callable(session_input_callback):
+            res = session_input_callback(history, new_input_list)
+            if inspect.isawaitable(res):
+                return await res
+            return res
+        else:
+            raise UserError(
+                f"Invalid `session_input_callback` value: {session_input_callback}. "
+                "Choose between `None` or a custom callable function."
+            )
     @classmethod
     async def _save_result_to_session(
         cls,
         session: Session | None,
         original_input: str | list[TResponseInputItem],
-        result: RunResult,
+        new_items: list[RunItem],
+        response_id: str | None = None,
     ) -> None:
-        """Save the conversation turn to session."""
+        """
+        Save the conversation turn to session.
+        It does not account for any filtering or modification performed by
+        `RunConfig.session_input_callback`.
+        """
         if session is None:
             return
@@ -1367,16 +2088,76 @@ class AgentRunner:
         input_list = ItemHelpers.input_to_new_input_list(original_input)
         # Convert new items to input format
-        new_items_as_input = [item.to_input_item() for item in result.new_items]
+        new_items_as_input = [item.to_input_item() for item in new_items]
         # Save all items from this turn
         items_to_save = input_list + new_items_as_input
         await session.add_items(items_to_save)
+        # Run compaction if session supports it and we have a response_id
+        if response_id and is_openai_responses_compaction_aware_session(session):
+            has_local_tool_outputs = any(
+                isinstance(item, (ToolCallOutputItem, HandoffOutputItem)) for item in new_items
+            )
+            if has_local_tool_outputs:
+                defer_compaction = getattr(session, "_defer_compaction", None)
+                if callable(defer_compaction):
+                    result = defer_compaction(response_id)
+                    if inspect.isawaitable(result):
+                        await result
+                logger.debug(
+                    "skip: deferring compaction for response %s due to local tool outputs",
+                    response_id,
+                )
+                return
+            deferred_response_id = None
+            get_deferred = getattr(session, "_get_deferred_compaction_response_id", None)
+            if callable(get_deferred):
+                deferred_response_id = get_deferred()
+            force_compaction = deferred_response_id is not None
+            if force_compaction:
+                logger.debug(
+                    "compact: forcing for response %s after deferred %s",
+                    response_id,
+                    deferred_response_id,
+                )
+            await session.run_compaction({"response_id": response_id, "force": force_compaction})
+    @staticmethod
+    async def _input_guardrail_tripwire_triggered_for_stream(
+        streamed_result: RunResultStreaming,
+    ) -> bool:
+        """Return True if any input guardrail triggered during a streamed run."""
+        task = streamed_result._input_guardrails_task
+        if task is None:
+            return False
+        if not task.done():
+            await task
+        return any(
+            guardrail_result.output.tripwire_triggered
+            for guardrail_result in streamed_result.input_guardrail_results
+        )
 DEFAULT_AGENT_RUNNER = AgentRunner()
+def _get_tool_call_types() -> tuple[type, ...]:
+    normalized_types: list[type] = []
+    for type_hint in get_args(ToolCallItemTypes):
+        origin = get_origin(type_hint)
+        candidate = origin or type_hint
+        if isinstance(candidate, type):
+            normalized_types.append(candidate)
+    return tuple(normalized_types)
+_TOOL_CALL_TYPES: tuple[type, ...] = _get_tool_call_types()
 def _copy_str_or_list(input: str | list[TResponseInputItem]) -> str | list[TResponseInputItem]:
     if isinstance(input, str):
         return input

openai-agents 0.2.8__py3-none-any.whl → 0.6.8__py3-none-any.whl

openai-agents 0.2.8py3-none-any.whl → 0.6.8py3-none-any.whl