PyPI - openai-agents - Versions diffs - 0.2.11__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

openai-agents 0.2.11py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of openai-agents might be problematic. Click here for more details.

Files changed (40) hide show

agents/_debug.py +15 -4
agents/_run_impl.py +34 -37
agents/agent.py +18 -2
agents/extensions/handoff_filters.py +2 -0
agents/extensions/memory/__init__.py +42 -15
agents/extensions/memory/encrypt_session.py +185 -0
agents/extensions/models/litellm_model.py +62 -10
agents/function_schema.py +45 -3
agents/memory/__init__.py +2 -0
agents/memory/openai_conversations_session.py +0 -3
agents/memory/util.py +20 -0
agents/models/chatcmpl_converter.py +74 -15
agents/models/chatcmpl_helpers.py +6 -0
agents/models/chatcmpl_stream_handler.py +29 -1
agents/models/openai_chatcompletions.py +26 -4
agents/models/openai_responses.py +30 -4
agents/realtime/__init__.py +2 -0
agents/realtime/_util.py +1 -1
agents/realtime/agent.py +7 -0
agents/realtime/audio_formats.py +29 -0
agents/realtime/config.py +32 -4
agents/realtime/items.py +17 -1
agents/realtime/model_events.py +2 -0
agents/realtime/model_inputs.py +15 -1
agents/realtime/openai_realtime.py +421 -130
agents/realtime/session.py +167 -14
agents/result.py +47 -20
agents/run.py +191 -106
agents/tool.py +1 -1
agents/tracing/processor_interface.py +84 -11
agents/tracing/spans.py +88 -0
agents/tracing/traces.py +99 -16
agents/util/_json.py +19 -1
agents/util/_transforms.py +12 -2
agents/voice/input.py +5 -4
agents/voice/models/openai_stt.py +15 -8
{openai_agents-0.2.11.dist-info → openai_agents-0.3.1.dist-info}/METADATA +4 -2
{openai_agents-0.2.11.dist-info → openai_agents-0.3.1.dist-info}/RECORD +40 -37
{openai_agents-0.2.11.dist-info → openai_agents-0.3.1.dist-info}/WHEEL +0 -0
{openai_agents-0.2.11.dist-info → openai_agents-0.3.1.dist-info}/licenses/LICENSE +0 -0

agents/memory/util.py ADDED Viewed

@@ -0,0 +1,20 @@
+from __future__ import annotations
+from typing import Callable
+from ..items import TResponseInputItem
+from ..util._types import MaybeAwaitable
+SessionInputCallback = Callable[
+    [list[TResponseInputItem], list[TResponseInputItem]],
+    MaybeAwaitable[list[TResponseInputItem]],
+]
+"""A function that combines session history with new input items.
+Args:
+    history_items: The list of items from the session history.
+    new_items: The list of new input items for the current turn.
+Returns:
+    A list of combined items to be used as input for the agent. Can be sync or async.
+"""

agents/models/chatcmpl_converter.py CHANGED Viewed

@@ -39,7 +39,7 @@ from openai.types.responses import (
     ResponseReasoningItemParam,
 )
 from openai.types.responses.response_input_param import FunctionCallOutput, ItemReference, Message
-from openai.types.responses.response_reasoning_item import Summary
+from openai.types.responses.response_reasoning_item import Content, Summary
 from ..agent_output import AgentOutputSchemaBase
 from ..exceptions import AgentsException, UserError
@@ -93,16 +93,41 @@ class Converter:
     def message_to_output_items(cls, message: ChatCompletionMessage) -> list[TResponseOutputItem]:
         items: list[TResponseOutputItem] = []
-        # Handle reasoning content if available
+        # Check if message is agents.extentions.models.litellm_model.InternalChatCompletionMessage
+        # We can't actually import it here because litellm is an optional dependency
+        # So we use hasattr to check for reasoning_content and thinking_blocks
         if hasattr(message, "reasoning_content") and message.reasoning_content:
-            items.append(
-                ResponseReasoningItem(
-                    id=FAKE_RESPONSES_ID,
-                    summary=[Summary(text=message.reasoning_content, type="summary_text")],
-                    type="reasoning",
-                )
+            reasoning_item = ResponseReasoningItem(
+                id=FAKE_RESPONSES_ID,
+                summary=[Summary(text=message.reasoning_content, type="summary_text")],
+                type="reasoning",
             )
+            # Store thinking blocks for Anthropic compatibility
+            if hasattr(message, "thinking_blocks") and message.thinking_blocks:
+                # Store thinking text in content and signature in encrypted_content
+                reasoning_item.content = []
+                signature = None
+                for block in message.thinking_blocks:
+                    if isinstance(block, dict):
+                        thinking_text = block.get("thinking", "")
+                        if thinking_text:
+                            reasoning_item.content.append(
+                                Content(text=thinking_text, type="reasoning_text")
+                            )
+                        # Store the signature if present
+                        if block.get("signature"):
+                            signature = block.get("signature")
+                # Store only the last signature in encrypted_content
+                # If there are multiple thinking blocks, this should be a problem.
+                # In practice, there should only be one signature for the entire reasoning step.
+                # Tested with: claude-sonnet-4-20250514
+                if signature:
+                    reasoning_item.encrypted_content = signature
+            items.append(reasoning_item)
         message_item = ResponseOutputMessage(
             id=FAKE_RESPONSES_ID,
             content=[],
@@ -272,9 +297,7 @@ class Converter:
                         f"Only file_data is supported for input_file {casted_file_param}"
                     )
                 if "filename" not in casted_file_param or not casted_file_param["filename"]:
-                    raise UserError(
-                        f"filename must be provided for input_file {casted_file_param}"
-                    )
+                    raise UserError(f"filename must be provided for input_file {casted_file_param}")
                 out.append(
                     File(
                         type="file",
@@ -292,10 +315,18 @@ class Converter:
     def items_to_messages(
         cls,
         items: str | Iterable[TResponseInputItem],
+        preserve_thinking_blocks: bool = False,
     ) -> list[ChatCompletionMessageParam]:
         """
         Convert a sequence of 'Item' objects into a list of ChatCompletionMessageParam.
+        Args:
+            items: A string or iterable of response input items to convert
+            preserve_thinking_blocks: Whether to preserve thinking blocks in tool calls
+                for reasoning models like Claude 4 Sonnet/Opus which support interleaved
+                thinking. When True, thinking blocks are reconstructed and included in
+                assistant messages with tool calls.
         Rules:
         - EasyInputMessage or InputMessage (role=user) => ChatCompletionUserMessageParam
         - EasyInputMessage or InputMessage (role=system) => ChatCompletionSystemMessageParam
@@ -316,6 +347,7 @@ class Converter:
         result: list[ChatCompletionMessageParam] = []
         current_assistant_msg: ChatCompletionAssistantMessageParam | None = None
+        pending_thinking_blocks: list[dict[str, str]] | None = None
         def flush_assistant_message() -> None:
             nonlocal current_assistant_msg
@@ -327,10 +359,11 @@ class Converter:
                 current_assistant_msg = None
         def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:
-            nonlocal current_assistant_msg
+            nonlocal current_assistant_msg, pending_thinking_blocks
             if current_assistant_msg is None:
                 current_assistant_msg = ChatCompletionAssistantMessageParam(role="assistant")
                 current_assistant_msg["tool_calls"] = []
             return current_assistant_msg
         for item in items:
@@ -446,6 +479,13 @@ class Converter:
             elif func_call := cls.maybe_function_tool_call(item):
                 asst = ensure_assistant_message()
+                # If we have pending thinking blocks, use them as the content
+                # This is required for Anthropic API tool calls with interleaved thinking
+                if pending_thinking_blocks:
+                    asst["content"] = pending_thinking_blocks  # type: ignore
+                    pending_thinking_blocks = None  # Clear after using
                 tool_calls = list(asst.get("tool_calls", []))
                 arguments = func_call["arguments"] if func_call["arguments"] else "{}"
                 new_tool_call = ChatCompletionMessageFunctionToolCallParam(
@@ -474,9 +514,28 @@ class Converter:
                     f"Encountered an item_reference, which is not supported: {item_ref}"
                 )
-            # 7) reasoning message => not handled
-            elif cls.maybe_reasoning_message(item):
-                pass
+            # 7) reasoning message => extract thinking blocks if present
+            elif reasoning_item := cls.maybe_reasoning_message(item):
+                # Reconstruct thinking blocks from content (text) and encrypted_content (signature)
+                content_items = reasoning_item.get("content", [])
+                signature = reasoning_item.get("encrypted_content")
+                if content_items and preserve_thinking_blocks:
+                    # Reconstruct thinking blocks from content and signature
+                    pending_thinking_blocks = []
+                    for content_item in content_items:
+                        if (
+                            isinstance(content_item, dict)
+                            and content_item.get("type") == "reasoning_text"
+                        ):
+                            thinking_block = {
+                                "type": "thinking",
+                                "thinking": content_item.get("text", ""),
+                            }
+                            # Add signature if available
+                            if signature:
+                                thinking_block["signature"] = signature
+                            pending_thinking_blocks.append(thinking_block)
             # 8) If we haven't recognized it => fail or ignore
             else:

agents/models/chatcmpl_helpers.py CHANGED Viewed

@@ -1,5 +1,7 @@
 from __future__ import annotations
+from contextvars import ContextVar
 from openai import AsyncOpenAI
 from ..model_settings import ModelSettings
@@ -8,6 +10,10 @@ from ..version import __version__
 _USER_AGENT = f"Agents/Python {__version__}"
 HEADERS = {"User-Agent": _USER_AGENT}
+USER_AGENT_OVERRIDE: ContextVar[str | None] = ContextVar(
+    "openai_chatcompletions_user_agent_override", default=None
+)
 class ChatCmplHelpers:
     @classmethod

agents/models/chatcmpl_stream_handler.py CHANGED Viewed

@@ -62,6 +62,9 @@ class StreamingState:
     # Fields for real-time function call streaming
     function_call_streaming: dict[int, bool] = field(default_factory=dict)
     function_call_output_idx: dict[int, int] = field(default_factory=dict)
+    # Store accumulated thinking text and signature for Anthropic compatibility
+    thinking_text: str = ""
+    thinking_signature: str | None = None
 class SequenceNumber:
@@ -101,6 +104,19 @@ class ChatCmplStreamHandler:
             delta = chunk.choices[0].delta
+            # Handle thinking blocks from Anthropic (for preserving signatures)
+            if hasattr(delta, "thinking_blocks") and delta.thinking_blocks:
+                for block in delta.thinking_blocks:
+                    if isinstance(block, dict):
+                        # Accumulate thinking text
+                        thinking_text = block.get("thinking", "")
+                        if thinking_text:
+                            state.thinking_text += thinking_text
+                        # Store signature if present
+                        signature = block.get("signature")
+                        if signature:
+                            state.thinking_signature = signature
             # Handle reasoning content for reasoning summaries
             if hasattr(delta, "reasoning_content"):
                 reasoning_content = delta.reasoning_content
@@ -527,7 +543,19 @@ class ChatCmplStreamHandler:
         # include Reasoning item if it exists
         if state.reasoning_content_index_and_output:
-            outputs.append(state.reasoning_content_index_and_output[1])
+            reasoning_item = state.reasoning_content_index_and_output[1]
+            # Store thinking text in content and signature in encrypted_content
+            if state.thinking_text:
+                # Add thinking text as a Content object
+                if not reasoning_item.content:
+                    reasoning_item.content = []
+                reasoning_item.content.append(
+                    Content(text=state.thinking_text, type="reasoning_text")
+                )
+            # Store signature in encrypted_content
+            if state.thinking_signature:
+                reasoning_item.encrypted_content = state.thinking_signature
+            outputs.append(reasoning_item)
         # include text or refusal content if they exist
         if state.text_content_index_and_output or state.refusal_content_index_and_output:

agents/models/openai_chatcompletions.py CHANGED Viewed

@@ -23,8 +23,9 @@ from ..tracing import generation_span
 from ..tracing.span_data import GenerationSpanData
 from ..tracing.spans import Span
 from ..usage import Usage
+from ..util._json import _to_dump_compatible
 from .chatcmpl_converter import Converter
-from .chatcmpl_helpers import HEADERS, ChatCmplHelpers
+from .chatcmpl_helpers import HEADERS, USER_AGENT_OVERRIDE, ChatCmplHelpers
 from .chatcmpl_stream_handler import ChatCmplStreamHandler
 from .fake_id import FAKE_RESPONSES_ID
 from .interface import Model, ModelTracing
@@ -237,6 +238,8 @@ class OpenAIChatCompletionsModel(Model):
                     "role": "system",
                 },
             )
+        converted_messages = _to_dump_compatible(converted_messages)
         if tracing.include_data():
             span.span_data.input = converted_messages
@@ -255,12 +258,24 @@ class OpenAIChatCompletionsModel(Model):
         for handoff in handoffs:
             converted_tools.append(Converter.convert_handoff_tool(handoff))
+        converted_tools = _to_dump_compatible(converted_tools)
         if _debug.DONT_LOG_MODEL_DATA:
             logger.debug("Calling LLM")
         else:
+            messages_json = json.dumps(
+                converted_messages,
+                indent=2,
+                ensure_ascii=False,
+            )
+            tools_json = json.dumps(
+                converted_tools,
+                indent=2,
+                ensure_ascii=False,
+            )
             logger.debug(
-                f"{json.dumps(converted_messages, indent=2, ensure_ascii=False)}\n"
-                f"Tools:\n{json.dumps(converted_tools, indent=2, ensure_ascii=False)}\n"
+                f"{messages_json}\n"
+                f"Tools:\n{tools_json}\n"
                 f"Stream: {stream}\n"
                 f"Tool choice: {tool_choice}\n"
                 f"Response format: {response_format}\n"
@@ -291,7 +306,7 @@ class OpenAIChatCompletionsModel(Model):
             reasoning_effort=self._non_null_or_not_given(reasoning_effort),
             verbosity=self._non_null_or_not_given(model_settings.verbosity),
             top_logprobs=self._non_null_or_not_given(model_settings.top_logprobs),
-            extra_headers={**HEADERS, **(model_settings.extra_headers or {})},
+            extra_headers=self._merge_headers(model_settings),
             extra_query=model_settings.extra_query,
             extra_body=model_settings.extra_body,
             metadata=self._non_null_or_not_given(model_settings.metadata),
@@ -334,3 +349,10 @@ class OpenAIChatCompletionsModel(Model):
         if self._client is None:
             self._client = AsyncOpenAI()
         return self._client
+    def _merge_headers(self, model_settings: ModelSettings):
+        merged = {**HEADERS, **(model_settings.extra_headers or {})}
+        ua_ctx = USER_AGENT_OVERRIDE.get()
+        if ua_ctx is not None:
+            merged["User-Agent"] = ua_ctx
+        return merged

agents/models/openai_responses.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 import json
 from collections.abc import AsyncIterator
+from contextvars import ContextVar
 from dataclasses import dataclass
 from typing import TYPE_CHECKING, Any, Literal, cast, overload
@@ -38,6 +39,7 @@ from ..tool import (
 )
 from ..tracing import SpanError, response_span
 from ..usage import Usage
+from ..util._json import _to_dump_compatible
 from ..version import __version__
 from .interface import Model, ModelTracing
@@ -48,6 +50,11 @@ if TYPE_CHECKING:
 _USER_AGENT = f"Agents/Python {__version__}"
 _HEADERS = {"User-Agent": _USER_AGENT}
+# Override for the User-Agent header used by the Responses API.
+_USER_AGENT_OVERRIDE: ContextVar[str | None] = ContextVar(
+    "openai_responses_user_agent_override", default=None
+)
 class OpenAIResponsesModel(Model):
     """
@@ -240,6 +247,7 @@ class OpenAIResponsesModel(Model):
         prompt: ResponsePromptParam | None = None,
     ) -> Response | AsyncStream[ResponseStreamEvent]:
         list_input = ItemHelpers.input_to_new_input_list(input)
+        list_input = _to_dump_compatible(list_input)
         parallel_tool_calls = (
             True
@@ -251,6 +259,7 @@ class OpenAIResponsesModel(Model):
         tool_choice = Converter.convert_tool_choice(model_settings.tool_choice)
         converted_tools = Converter.convert_tools(tools, handoffs)
+        converted_tools_payload = _to_dump_compatible(converted_tools.tools)
         response_format = Converter.get_response_format(output_schema)
         include_set: set[str] = set(converted_tools.includes)
@@ -263,10 +272,20 @@ class OpenAIResponsesModel(Model):
         if _debug.DONT_LOG_MODEL_DATA:
             logger.debug("Calling LLM")
         else:
+            input_json = json.dumps(
+                list_input,
+                indent=2,
+                ensure_ascii=False,
+            )
+            tools_json = json.dumps(
+                converted_tools_payload,
+                indent=2,
+                ensure_ascii=False,
+            )
             logger.debug(
                 f"Calling LLM {self.model} with input:\n"
-                f"{json.dumps(list_input, indent=2, ensure_ascii=False)}\n"
-                f"Tools:\n{json.dumps(converted_tools.tools, indent=2, ensure_ascii=False)}\n"
+                f"{input_json}\n"
+                f"Tools:\n{tools_json}\n"
                 f"Stream: {stream}\n"
                 f"Tool choice: {tool_choice}\n"
                 f"Response format: {response_format}\n"
@@ -290,7 +309,7 @@ class OpenAIResponsesModel(Model):
             model=self.model,
             input=list_input,
             include=include,
-            tools=converted_tools.tools,
+            tools=converted_tools_payload,
             prompt=self._non_null_or_not_given(prompt),
             temperature=self._non_null_or_not_given(model_settings.temperature),
             top_p=self._non_null_or_not_given(model_settings.top_p),
@@ -299,7 +318,7 @@ class OpenAIResponsesModel(Model):
             tool_choice=tool_choice,
             parallel_tool_calls=parallel_tool_calls,
             stream=stream,
-            extra_headers={**_HEADERS, **(model_settings.extra_headers or {})},
+            extra_headers=self._merge_headers(model_settings),
             extra_query=model_settings.extra_query,
             extra_body=model_settings.extra_body,
             text=response_format,
@@ -314,6 +333,13 @@ class OpenAIResponsesModel(Model):
             self._client = AsyncOpenAI()
         return self._client
+    def _merge_headers(self, model_settings: ModelSettings):
+        merged = {**_HEADERS, **(model_settings.extra_headers or {})}
+        ua_ctx = _USER_AGENT_OVERRIDE.get()
+        if ua_ctx is not None:
+            merged["User-Agent"] = ua_ctx
+        return merged
 @dataclass
 class ConvertedTools:

agents/realtime/__init__.py CHANGED Viewed

@@ -3,6 +3,7 @@ from .config import (
     RealtimeAudioFormat,
     RealtimeClientMessage,
     RealtimeGuardrailsSettings,
+    RealtimeInputAudioNoiseReductionConfig,
     RealtimeInputAudioTranscriptionConfig,
     RealtimeModelName,
     RealtimeModelTracingConfig,
@@ -101,6 +102,7 @@ __all__ = [
     "RealtimeAudioFormat",
     "RealtimeClientMessage",
     "RealtimeGuardrailsSettings",
+    "RealtimeInputAudioNoiseReductionConfig",
     "RealtimeInputAudioTranscriptionConfig",
     "RealtimeModelName",
     "RealtimeModelTracingConfig",

agents/realtime/_util.py CHANGED Viewed

@@ -4,6 +4,6 @@ from .config import RealtimeAudioFormat
 def calculate_audio_length_ms(format: RealtimeAudioFormat | None, audio_bytes: bytes) -> float:
-    if format and format.startswith("g711"):
+    if format and isinstance(format, str) and format.startswith("g711"):
         return (len(audio_bytes) / 8000) * 1000
     return (len(audio_bytes) / 24 / 2) * 1000

agents/realtime/agent.py CHANGED Viewed

@@ -6,6 +6,8 @@ from collections.abc import Awaitable
 from dataclasses import dataclass, field
 from typing import Any, Callable, Generic, cast
+from agents.prompts import Prompt
 from ..agent import AgentBase
 from ..guardrail import OutputGuardrail
 from ..handoffs import Handoff
@@ -55,6 +57,11 @@ class RealtimeAgent(AgentBase, Generic[TContext]):
     return a string.
     """
+    prompt: Prompt | None = None
+    """A prompt object. Prompts allow you to dynamically configure the instructions, tools
+    and other config for an agent outside of your code. Only usable with OpenAI models.
+    """
     handoffs: list[RealtimeAgent[Any] | Handoff[TContext, RealtimeAgent[Any]]] = field(
         default_factory=list
     )

agents/realtime/audio_formats.py ADDED Viewed

@@ -0,0 +1,29 @@
+from __future__ import annotations
+from openai.types.realtime.realtime_audio_formats import (
+    AudioPCM,
+    AudioPCMA,
+    AudioPCMU,
+    RealtimeAudioFormats,
+)
+from ..logger import logger
+def to_realtime_audio_format(
+    input_audio_format: str | RealtimeAudioFormats | None,
+) -> RealtimeAudioFormats | None:
+    format: RealtimeAudioFormats | None = None
+    if input_audio_format is not None:
+        if isinstance(input_audio_format, str):
+            if input_audio_format in ["pcm16", "audio/pcm", "pcm"]:
+                format = AudioPCM(type="audio/pcm", rate=24000)
+            elif input_audio_format in ["g711_ulaw", "audio/pcmu", "pcmu"]:
+                format = AudioPCMU(type="audio/pcmu")
+            elif input_audio_format in ["g711_alaw", "audio/pcma", "pcma"]:
+                format = AudioPCMA(type="audio/pcma")
+            else:
+                logger.debug(f"Unknown input_audio_format: {input_audio_format}")
+        else:
+            format = input_audio_format
+    return format

agents/realtime/config.py CHANGED Viewed

@@ -6,8 +6,13 @@ from typing import (
     Union,
 )
+from openai.types.realtime.realtime_audio_formats import (
+    RealtimeAudioFormats as OpenAIRealtimeAudioFormats,
+)
 from typing_extensions import NotRequired, TypeAlias, TypedDict
+from agents.prompts import Prompt
 from ..guardrail import OutputGuardrail
 from ..handoffs import Handoff
 from ..model_settings import ToolChoice
@@ -15,6 +20,8 @@ from ..tool import Tool
 RealtimeModelName: TypeAlias = Union[
     Literal[
+        "gpt-realtime",
+        "gpt-realtime-2025-08-28",
         "gpt-4o-realtime-preview",
         "gpt-4o-mini-realtime-preview",
         "gpt-4o-realtime-preview-2025-06-03",
@@ -54,6 +61,13 @@ class RealtimeInputAudioTranscriptionConfig(TypedDict):
     """An optional prompt to guide transcription."""
+class RealtimeInputAudioNoiseReductionConfig(TypedDict):
+    """Noise reduction configuration for input audio."""
+    type: NotRequired[Literal["near_field", "far_field"]]
+    """Noise reduction mode to apply to input audio."""
 class RealtimeTurnDetectionConfig(TypedDict):
     """Turn detection config. Allows extra vendor keys if needed."""
@@ -91,6 +105,9 @@ class RealtimeSessionModelSettings(TypedDict):
     instructions: NotRequired[str]
     """System instructions for the model."""
+    prompt: NotRequired[Prompt]
+    """The prompt to use for the model."""
     modalities: NotRequired[list[Literal["text", "audio"]]]
     """The modalities the model should support."""
@@ -100,15 +117,18 @@ class RealtimeSessionModelSettings(TypedDict):
     speed: NotRequired[float]
     """The speed of the model's responses."""
-    input_audio_format: NotRequired[RealtimeAudioFormat]
+    input_audio_format: NotRequired[RealtimeAudioFormat | OpenAIRealtimeAudioFormats]
     """The format for input audio streams."""
-    output_audio_format: NotRequired[RealtimeAudioFormat]
+    output_audio_format: NotRequired[RealtimeAudioFormat | OpenAIRealtimeAudioFormats]
     """The format for output audio streams."""
     input_audio_transcription: NotRequired[RealtimeInputAudioTranscriptionConfig]
     """Configuration for transcribing input audio."""
+    input_audio_noise_reduction: NotRequired[RealtimeInputAudioNoiseReductionConfig | None]
+    """Noise reduction configuration for input audio."""
     turn_detection: NotRequired[RealtimeTurnDetectionConfig]
     """Configuration for detecting conversation turns."""
@@ -177,6 +197,14 @@ class RealtimeUserInputText(TypedDict):
     """The text content from the user."""
+class RealtimeUserInputImage(TypedDict, total=False):
+    """An image input from the user (Realtime)."""
+    type: Literal["input_image"]
+    image_url: str
+    detail: NotRequired[Literal["auto", "low", "high"] | str]
 class RealtimeUserInputMessage(TypedDict):
     """A message input from the user."""
@@ -186,8 +214,8 @@ class RealtimeUserInputMessage(TypedDict):
     role: Literal["user"]
     """The role identifier for user messages."""
-    content: list[RealtimeUserInputText]
-    """List of text content items in the message."""
+    content: list[RealtimeUserInputText | RealtimeUserInputImage]
+    """List of content items (text and image) in the message."""
 RealtimeUserInput: TypeAlias = Union[str, RealtimeUserInputMessage]

agents/realtime/items.py CHANGED Viewed

@@ -34,6 +34,22 @@ class InputAudio(BaseModel):
     model_config = ConfigDict(extra="allow")
+class InputImage(BaseModel):
+    """Image input content for realtime messages."""
+    type: Literal["input_image"] = "input_image"
+    """The type identifier for image input."""
+    image_url: str | None = None
+    """Data/remote URL string (data:... or https:...)."""
+    detail: str | None = None
+    """Optional detail hint (e.g., 'auto', 'high', 'low')."""
+    # Allow extra data (e.g., `detail`)
+    model_config = ConfigDict(extra="allow")
 class AssistantText(BaseModel):
     """Text content from the assistant in realtime responses."""
@@ -100,7 +116,7 @@ class UserMessageItem(BaseModel):
     role: Literal["user"] = "user"
     """The role identifier for user messages."""
-    content: list[Annotated[InputText | InputAudio, Field(discriminator="type")]]
+    content: list[Annotated[InputText | InputAudio | InputImage, Field(discriminator="type")]]
     """List of content items, can be text or audio."""
     # Allow extra data

agents/realtime/model_events.py CHANGED Viewed

@@ -84,6 +84,7 @@ class RealtimeModelInputAudioTranscriptionCompletedEvent:
     type: Literal["input_audio_transcription_completed"] = "input_audio_transcription_completed"
 @dataclass
 class RealtimeModelInputAudioTimeoutTriggeredEvent:
     """Input audio timeout triggered."""
@@ -94,6 +95,7 @@ class RealtimeModelInputAudioTimeoutTriggeredEvent:
     type: Literal["input_audio_timeout_triggered"] = "input_audio_timeout_triggered"
 @dataclass
 class RealtimeModelTranscriptDeltaEvent:
     """Partial transcript update."""

agents/realtime/model_inputs.py CHANGED Viewed

@@ -24,12 +24,26 @@ class RealtimeModelInputTextContent(TypedDict):
     text: str
+class RealtimeModelInputImageContent(TypedDict, total=False):
+    """An image to be sent to the model.
+    The Realtime API expects `image_url` to be a string data/remote URL.
+    """
+    type: Literal["input_image"]
+    image_url: str
+    """String URL (data:... or https:...)."""
+    detail: NotRequired[str]
+    """Optional detail hint such as 'high', 'low', or 'auto'."""
 class RealtimeModelUserInputMessage(TypedDict):
     """A message to be sent to the model."""
     type: Literal["message"]
     role: Literal["user"]
-    content: list[RealtimeModelInputTextContent]
+    content: list[RealtimeModelInputTextContent | RealtimeModelInputImageContent]
 RealtimeModelUserInput: TypeAlias = Union[str, RealtimeModelUserInputMessage]

openai-agents 0.2.11__py3-none-any.whl → 0.3.1__py3-none-any.whl

Potentially problematic release.

openai-agents 0.2.11py3-none-any.whl → 0.3.1py3-none-any.whl