PyPI - openai-agents - Versions diffs - 0.2.10__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

openai-agents 0.2.10py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of openai-agents might be problematic. Click here for more details.

Files changed (26) hide show

agents/_debug.py +15 -4
agents/_run_impl.py +34 -37
agents/extensions/models/litellm_model.py +20 -5
agents/memory/__init__.py +2 -0
agents/memory/openai_conversations_session.py +0 -3
agents/memory/util.py +20 -0
agents/models/openai_chatcompletions.py +17 -2
agents/models/openai_responses.py +17 -4
agents/realtime/_util.py +1 -1
agents/realtime/agent.py +7 -0
agents/realtime/audio_formats.py +29 -0
agents/realtime/config.py +22 -4
agents/realtime/items.py +17 -1
agents/realtime/model.py +6 -0
agents/realtime/model_inputs.py +15 -1
agents/realtime/openai_realtime.py +428 -139
agents/realtime/session.py +167 -14
agents/run.py +102 -54
agents/tool.py +2 -2
agents/util/_json.py +19 -1
agents/voice/input.py +5 -4
agents/voice/models/openai_stt.py +6 -4
{openai_agents-0.2.10.dist-info → openai_agents-0.3.0.dist-info}/METADATA +2 -2
{openai_agents-0.2.10.dist-info → openai_agents-0.3.0.dist-info}/RECORD +26 -24
{openai_agents-0.2.10.dist-info → openai_agents-0.3.0.dist-info}/WHEEL +0 -0
{openai_agents-0.2.10.dist-info → openai_agents-0.3.0.dist-info}/licenses/LICENSE +0 -0

agents/_debug.py CHANGED Viewed

@@ -1,17 +1,28 @@
 import os
-def _debug_flag_enabled(flag: str) -> bool:
+def _debug_flag_enabled(flag: str, default: bool = False) -> bool:
     flag_value = os.getenv(flag)
-    return flag_value is not None and (flag_value == "1" or flag_value.lower() == "true")
+    if flag_value is None:
+        return default
+    else:
+        return flag_value == "1" or flag_value.lower() == "true"
-DONT_LOG_MODEL_DATA = _debug_flag_enabled("OPENAI_AGENTS_DONT_LOG_MODEL_DATA")
+def _load_dont_log_model_data() -> bool:
+    return _debug_flag_enabled("OPENAI_AGENTS_DONT_LOG_MODEL_DATA", default=True)
+def _load_dont_log_tool_data() -> bool:
+    return _debug_flag_enabled("OPENAI_AGENTS_DONT_LOG_TOOL_DATA", default=True)
+DONT_LOG_MODEL_DATA = _load_dont_log_model_data()
 """By default we don't log LLM inputs/outputs, to prevent exposing sensitive information. Set this
 flag to enable logging them.
 """
-DONT_LOG_TOOL_DATA = _debug_flag_enabled("OPENAI_AGENTS_DONT_LOG_TOOL_DATA")
+DONT_LOG_TOOL_DATA = _load_dont_log_tool_data()
 """By default we don't log tool call inputs/outputs, to prevent exposing sensitive information. Set
 this flag to enable logging them.
 """

agents/_run_impl.py CHANGED Viewed

@@ -330,43 +330,40 @@ class RunImpl:
             ItemHelpers.extract_last_text(message_items[-1].raw_item) if message_items else None
         )
-        # There are two possibilities that lead to a final output:
-        # 1. Structured output schema => always leads to a final output
-        # 2. Plain text output schema => only leads to a final output if there are no tool calls
-        if output_schema and not output_schema.is_plain_text() and potential_final_output_text:
-            final_output = output_schema.validate_json(potential_final_output_text)
-            return await cls.execute_final_output(
-                agent=agent,
-                original_input=original_input,
-                new_response=new_response,
-                pre_step_items=pre_step_items,
-                new_step_items=new_step_items,
-                final_output=final_output,
-                hooks=hooks,
-                context_wrapper=context_wrapper,
-            )
-        elif (
-            not output_schema or output_schema.is_plain_text()
-        ) and not processed_response.has_tools_or_approvals_to_run():
-            return await cls.execute_final_output(
-                agent=agent,
-                original_input=original_input,
-                new_response=new_response,
-                pre_step_items=pre_step_items,
-                new_step_items=new_step_items,
-                final_output=potential_final_output_text or "",
-                hooks=hooks,
-                context_wrapper=context_wrapper,
-            )
-        else:
-            # If there's no final output, we can just run again
-            return SingleStepResult(
-                original_input=original_input,
-                model_response=new_response,
-                pre_step_items=pre_step_items,
-                new_step_items=new_step_items,
-                next_step=NextStepRunAgain(),
-            )
+        # Generate final output only when there are no pending tool calls or approval requests.
+        if not processed_response.has_tools_or_approvals_to_run():
+            if output_schema and not output_schema.is_plain_text() and potential_final_output_text:
+                final_output = output_schema.validate_json(potential_final_output_text)
+                return await cls.execute_final_output(
+                    agent=agent,
+                    original_input=original_input,
+                    new_response=new_response,
+                    pre_step_items=pre_step_items,
+                    new_step_items=new_step_items,
+                    final_output=final_output,
+                    hooks=hooks,
+                    context_wrapper=context_wrapper,
+                )
+            elif not output_schema or output_schema.is_plain_text():
+                return await cls.execute_final_output(
+                    agent=agent,
+                    original_input=original_input,
+                    new_response=new_response,
+                    pre_step_items=pre_step_items,
+                    new_step_items=new_step_items,
+                    final_output=potential_final_output_text or "",
+                    hooks=hooks,
+                    context_wrapper=context_wrapper,
+                )
+        # If there's no final output, we can just run again
+        return SingleStepResult(
+            original_input=original_input,
+            model_response=new_response,
+            pre_step_items=pre_step_items,
+            new_step_items=new_step_items,
+            next_step=NextStepRunAgain(),
+        )
     @classmethod
     def maybe_reset_tool_choice(

agents/extensions/models/litellm_model.py CHANGED Viewed

@@ -48,6 +48,7 @@ from ...tracing import generation_span
 from ...tracing.span_data import GenerationSpanData
 from ...tracing.spans import Span
 from ...usage import Usage
+from ...util._json import _to_dump_compatible
 class InternalChatCompletionMessage(ChatCompletionMessage):
@@ -265,6 +266,8 @@ class LitellmModel(Model):
                     "role": "system",
                 },
             )
+        converted_messages = _to_dump_compatible(converted_messages)
         if tracing.include_data():
             span.span_data.input = converted_messages
@@ -283,13 +286,25 @@ class LitellmModel(Model):
         for handoff in handoffs:
             converted_tools.append(Converter.convert_handoff_tool(handoff))
+        converted_tools = _to_dump_compatible(converted_tools)
         if _debug.DONT_LOG_MODEL_DATA:
             logger.debug("Calling LLM")
         else:
+            messages_json = json.dumps(
+                converted_messages,
+                indent=2,
+                ensure_ascii=False,
+            )
+            tools_json = json.dumps(
+                converted_tools,
+                indent=2,
+                ensure_ascii=False,
+            )
             logger.debug(
                 f"Calling Litellm model: {self.model}\n"
-                f"{json.dumps(converted_messages, indent=2, ensure_ascii=False)}\n"
-                f"Tools:\n{json.dumps(converted_tools, indent=2, ensure_ascii=False)}\n"
+                f"{messages_json}\n"
+                f"Tools:\n{tools_json}\n"
                 f"Stream: {stream}\n"
                 f"Tool choice: {tool_choice}\n"
                 f"Response format: {response_format}\n"
@@ -369,9 +384,9 @@ class LitellmConverter:
         if message.role != "assistant":
             raise ModelBehaviorError(f"Unsupported role: {message.role}")
-        tool_calls: list[
-            ChatCompletionMessageFunctionToolCall | ChatCompletionMessageCustomToolCall
-        ] | None = (
+        tool_calls: (
+            list[ChatCompletionMessageFunctionToolCall | ChatCompletionMessageCustomToolCall] | None
+        ) = (
             [LitellmConverter.convert_tool_call_to_openai(tool) for tool in message.tool_calls]
             if message.tool_calls
             else None

agents/memory/__init__.py CHANGED Viewed

@@ -1,10 +1,12 @@
 from .openai_conversations_session import OpenAIConversationsSession
 from .session import Session, SessionABC
 from .sqlite_session import SQLiteSession
+from .util import SessionInputCallback
 __all__ = [
     "Session",
     "SessionABC",
+    "SessionInputCallback",
     "SQLiteSession",
     "OpenAIConversationsSession",
 ]

agents/memory/openai_conversations_session.py CHANGED Viewed

@@ -19,9 +19,6 @@ async def start_openai_conversations_session(openai_client: AsyncOpenAI | None =
     return response.id
-_EMPTY_SESSION_ID = ""
 class OpenAIConversationsSession(SessionABC):
     def __init__(
         self,

agents/memory/util.py ADDED Viewed

@@ -0,0 +1,20 @@
+from __future__ import annotations
+from typing import Callable
+from ..items import TResponseInputItem
+from ..util._types import MaybeAwaitable
+SessionInputCallback = Callable[
+    [list[TResponseInputItem], list[TResponseInputItem]],
+    MaybeAwaitable[list[TResponseInputItem]],
+]
+"""A function that combines session history with new input items.
+Args:
+    history_items: The list of items from the session history.
+    new_items: The list of new input items for the current turn.
+Returns:
+    A list of combined items to be used as input for the agent. Can be sync or async.
+"""

agents/models/openai_chatcompletions.py CHANGED Viewed

@@ -23,6 +23,7 @@ from ..tracing import generation_span
 from ..tracing.span_data import GenerationSpanData
 from ..tracing.spans import Span
 from ..usage import Usage
+from ..util._json import _to_dump_compatible
 from .chatcmpl_converter import Converter
 from .chatcmpl_helpers import HEADERS, ChatCmplHelpers
 from .chatcmpl_stream_handler import ChatCmplStreamHandler
@@ -237,6 +238,8 @@ class OpenAIChatCompletionsModel(Model):
                     "role": "system",
                 },
             )
+        converted_messages = _to_dump_compatible(converted_messages)
         if tracing.include_data():
             span.span_data.input = converted_messages
@@ -255,12 +258,24 @@ class OpenAIChatCompletionsModel(Model):
         for handoff in handoffs:
             converted_tools.append(Converter.convert_handoff_tool(handoff))
+        converted_tools = _to_dump_compatible(converted_tools)
         if _debug.DONT_LOG_MODEL_DATA:
             logger.debug("Calling LLM")
         else:
+            messages_json = json.dumps(
+                converted_messages,
+                indent=2,
+                ensure_ascii=False,
+            )
+            tools_json = json.dumps(
+                converted_tools,
+                indent=2,
+                ensure_ascii=False,
+            )
             logger.debug(
-                f"{json.dumps(converted_messages, indent=2, ensure_ascii=False)}\n"
-                f"Tools:\n{json.dumps(converted_tools, indent=2, ensure_ascii=False)}\n"
+                f"{messages_json}\n"
+                f"Tools:\n{tools_json}\n"
                 f"Stream: {stream}\n"
                 f"Tool choice: {tool_choice}\n"
                 f"Response format: {response_format}\n"

agents/models/openai_responses.py CHANGED Viewed

@@ -38,6 +38,7 @@ from ..tool import (
 )
 from ..tracing import SpanError, response_span
 from ..usage import Usage
+from ..util._json import _to_dump_compatible
 from ..version import __version__
 from .interface import Model, ModelTracing
@@ -240,6 +241,7 @@ class OpenAIResponsesModel(Model):
         prompt: ResponsePromptParam | None = None,
     ) -> Response | AsyncStream[ResponseStreamEvent]:
         list_input = ItemHelpers.input_to_new_input_list(input)
+        list_input = _to_dump_compatible(list_input)
         parallel_tool_calls = (
             True
@@ -251,6 +253,7 @@ class OpenAIResponsesModel(Model):
         tool_choice = Converter.convert_tool_choice(model_settings.tool_choice)
         converted_tools = Converter.convert_tools(tools, handoffs)
+        converted_tools_payload = _to_dump_compatible(converted_tools.tools)
         response_format = Converter.get_response_format(output_schema)
         include_set: set[str] = set(converted_tools.includes)
@@ -263,10 +266,20 @@ class OpenAIResponsesModel(Model):
         if _debug.DONT_LOG_MODEL_DATA:
             logger.debug("Calling LLM")
         else:
+            input_json = json.dumps(
+                list_input,
+                indent=2,
+                ensure_ascii=False,
+            )
+            tools_json = json.dumps(
+                converted_tools_payload,
+                indent=2,
+                ensure_ascii=False,
+            )
             logger.debug(
                 f"Calling LLM {self.model} with input:\n"
-                f"{json.dumps(list_input, indent=2, ensure_ascii=False)}\n"
-                f"Tools:\n{json.dumps(converted_tools.tools, indent=2, ensure_ascii=False)}\n"
+                f"{input_json}\n"
+                f"Tools:\n{tools_json}\n"
                 f"Stream: {stream}\n"
                 f"Tool choice: {tool_choice}\n"
                 f"Response format: {response_format}\n"
@@ -290,7 +303,7 @@ class OpenAIResponsesModel(Model):
             model=self.model,
             input=list_input,
             include=include,
-            tools=converted_tools.tools,
+            tools=converted_tools_payload,
             prompt=self._non_null_or_not_given(prompt),
             temperature=self._non_null_or_not_given(model_settings.temperature),
             top_p=self._non_null_or_not_given(model_settings.top_p),
@@ -433,7 +446,7 @@ class Converter:
             converted_tool = {
                 "type": "web_search",
                 "filters": tool.filters.model_dump() if tool.filters is not None else None,  # type: ignore [typeddict-item]
-                "user_location": tool.user_location,  # type: ignore [typeddict-item]
+                "user_location": tool.user_location,
                 "search_context_size": tool.search_context_size,
             }
             includes = None

agents/realtime/_util.py CHANGED Viewed

@@ -4,6 +4,6 @@ from .config import RealtimeAudioFormat
 def calculate_audio_length_ms(format: RealtimeAudioFormat | None, audio_bytes: bytes) -> float:
-    if format and format.startswith("g711"):
+    if format and isinstance(format, str) and format.startswith("g711"):
         return (len(audio_bytes) / 8000) * 1000
     return (len(audio_bytes) / 24 / 2) * 1000

agents/realtime/agent.py CHANGED Viewed

@@ -6,6 +6,8 @@ from collections.abc import Awaitable
 from dataclasses import dataclass, field
 from typing import Any, Callable, Generic, cast
+from agents.prompts import Prompt
 from ..agent import AgentBase
 from ..guardrail import OutputGuardrail
 from ..handoffs import Handoff
@@ -55,6 +57,11 @@ class RealtimeAgent(AgentBase, Generic[TContext]):
     return a string.
     """
+    prompt: Prompt | None = None
+    """A prompt object. Prompts allow you to dynamically configure the instructions, tools
+    and other config for an agent outside of your code. Only usable with OpenAI models.
+    """
     handoffs: list[RealtimeAgent[Any] | Handoff[TContext, RealtimeAgent[Any]]] = field(
         default_factory=list
     )

agents/realtime/audio_formats.py ADDED Viewed

@@ -0,0 +1,29 @@
+from __future__ import annotations
+from openai.types.realtime.realtime_audio_formats import (
+    AudioPCM,
+    AudioPCMA,
+    AudioPCMU,
+    RealtimeAudioFormats,
+)
+from ..logger import logger
+def to_realtime_audio_format(
+    input_audio_format: str | RealtimeAudioFormats | None,
+) -> RealtimeAudioFormats | None:
+    format: RealtimeAudioFormats | None = None
+    if input_audio_format is not None:
+        if isinstance(input_audio_format, str):
+            if input_audio_format in ["pcm16", "audio/pcm", "pcm"]:
+                format = AudioPCM(type="audio/pcm", rate=24000)
+            elif input_audio_format in ["g711_ulaw", "audio/pcmu", "pcmu"]:
+                format = AudioPCMU(type="audio/pcmu")
+            elif input_audio_format in ["g711_alaw", "audio/pcma", "pcma"]:
+                format = AudioPCMA(type="audio/pcma")
+            else:
+                logger.debug(f"Unknown input_audio_format: {input_audio_format}")
+        else:
+            format = input_audio_format
+    return format

agents/realtime/config.py CHANGED Viewed

@@ -6,8 +6,13 @@ from typing import (
     Union,
 )
+from openai.types.realtime.realtime_audio_formats import (
+    RealtimeAudioFormats as OpenAIRealtimeAudioFormats,
+)
 from typing_extensions import NotRequired, TypeAlias, TypedDict
+from agents.prompts import Prompt
 from ..guardrail import OutputGuardrail
 from ..handoffs import Handoff
 from ..model_settings import ToolChoice
@@ -15,6 +20,8 @@ from ..tool import Tool
 RealtimeModelName: TypeAlias = Union[
     Literal[
+        "gpt-realtime",
+        "gpt-realtime-2025-08-28",
         "gpt-4o-realtime-preview",
         "gpt-4o-mini-realtime-preview",
         "gpt-4o-realtime-preview-2025-06-03",
@@ -91,6 +98,9 @@ class RealtimeSessionModelSettings(TypedDict):
     instructions: NotRequired[str]
     """System instructions for the model."""
+    prompt: NotRequired[Prompt]
+    """The prompt to use for the model."""
     modalities: NotRequired[list[Literal["text", "audio"]]]
     """The modalities the model should support."""
@@ -100,10 +110,10 @@ class RealtimeSessionModelSettings(TypedDict):
     speed: NotRequired[float]
     """The speed of the model's responses."""
-    input_audio_format: NotRequired[RealtimeAudioFormat]
+    input_audio_format: NotRequired[RealtimeAudioFormat | OpenAIRealtimeAudioFormats]
     """The format for input audio streams."""
-    output_audio_format: NotRequired[RealtimeAudioFormat]
+    output_audio_format: NotRequired[RealtimeAudioFormat | OpenAIRealtimeAudioFormats]
     """The format for output audio streams."""
     input_audio_transcription: NotRequired[RealtimeInputAudioTranscriptionConfig]
@@ -177,6 +187,14 @@ class RealtimeUserInputText(TypedDict):
     """The text content from the user."""
+class RealtimeUserInputImage(TypedDict, total=False):
+    """An image input from the user (Realtime)."""
+    type: Literal["input_image"]
+    image_url: str
+    detail: NotRequired[Literal["auto", "low", "high"] | str]
 class RealtimeUserInputMessage(TypedDict):
     """A message input from the user."""
@@ -186,8 +204,8 @@ class RealtimeUserInputMessage(TypedDict):
     role: Literal["user"]
     """The role identifier for user messages."""
-    content: list[RealtimeUserInputText]
-    """List of text content items in the message."""
+    content: list[RealtimeUserInputText | RealtimeUserInputImage]
+    """List of content items (text and image) in the message."""
 RealtimeUserInput: TypeAlias = Union[str, RealtimeUserInputMessage]

agents/realtime/items.py CHANGED Viewed

@@ -34,6 +34,22 @@ class InputAudio(BaseModel):
     model_config = ConfigDict(extra="allow")
+class InputImage(BaseModel):
+    """Image input content for realtime messages."""
+    type: Literal["input_image"] = "input_image"
+    """The type identifier for image input."""
+    image_url: str | None = None
+    """Data/remote URL string (data:... or https:...)."""
+    detail: str | None = None
+    """Optional detail hint (e.g., 'auto', 'high', 'low')."""
+    # Allow extra data (e.g., `detail`)
+    model_config = ConfigDict(extra="allow")
 class AssistantText(BaseModel):
     """Text content from the assistant in realtime responses."""
@@ -100,7 +116,7 @@ class UserMessageItem(BaseModel):
     role: Literal["user"] = "user"
     """The role identifier for user messages."""
-    content: list[Annotated[InputText | InputAudio, Field(discriminator="type")]]
+    content: list[Annotated[InputText | InputAudio | InputImage, Field(discriminator="type")]]
     """List of content items, can be text or audio."""
     # Allow extra data

agents/realtime/model.py CHANGED Viewed

@@ -118,6 +118,12 @@ class RealtimeModelConfig(TypedDict):
     the OpenAI Realtime model will use the default OpenAI WebSocket URL.
     """
+    headers: NotRequired[dict[str, str]]
+    """The headers to use when connecting. If unset, the model will use a sane default.
+    Note that, when you set this, authorization header won't be set under the hood.
+    e.g., {"api-key": "your api key here"} for Azure OpenAI Realtime WebSocket connections.
+    """
     initial_model_settings: NotRequired[RealtimeSessionModelSettings]
     """The initial model settings to use when connecting."""

agents/realtime/model_inputs.py CHANGED Viewed

@@ -24,12 +24,26 @@ class RealtimeModelInputTextContent(TypedDict):
     text: str
+class RealtimeModelInputImageContent(TypedDict, total=False):
+    """An image to be sent to the model.
+    The Realtime API expects `image_url` to be a string data/remote URL.
+    """
+    type: Literal["input_image"]
+    image_url: str
+    """String URL (data:... or https:...)."""
+    detail: NotRequired[str]
+    """Optional detail hint such as 'high', 'low', or 'auto'."""
 class RealtimeModelUserInputMessage(TypedDict):
     """A message to be sent to the model."""
     type: Literal["message"]
     role: Literal["user"]
-    content: list[RealtimeModelInputTextContent]
+    content: list[RealtimeModelInputTextContent | RealtimeModelInputImageContent]
 RealtimeModelUserInput: TypeAlias = Union[str, RealtimeModelUserInputMessage]

openai-agents 0.2.10__py3-none-any.whl → 0.3.0__py3-none-any.whl

Potentially problematic release.

openai-agents 0.2.10py3-none-any.whl → 0.3.0py3-none-any.whl