PyPI - openai-agents - Versions diffs - 0.2.11__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

openai-agents 0.2.11py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of openai-agents might be problematic. Click here for more details.

Files changed (25) hide show

agents/_debug.py +15 -4
agents/_run_impl.py +34 -37
agents/extensions/models/litellm_model.py +20 -5
agents/memory/__init__.py +2 -0
agents/memory/openai_conversations_session.py +0 -3
agents/memory/util.py +20 -0
agents/models/openai_chatcompletions.py +17 -2
agents/models/openai_responses.py +16 -3
agents/realtime/_util.py +1 -1
agents/realtime/agent.py +7 -0
agents/realtime/audio_formats.py +29 -0
agents/realtime/config.py +22 -4
agents/realtime/items.py +17 -1
agents/realtime/model_inputs.py +15 -1
agents/realtime/openai_realtime.py +411 -130
agents/realtime/session.py +167 -14
agents/run.py +54 -35
agents/tool.py +1 -1
agents/util/_json.py +19 -1
agents/voice/input.py +5 -4
agents/voice/models/openai_stt.py +6 -4
{openai_agents-0.2.11.dist-info → openai_agents-0.3.0.dist-info}/METADATA +2 -2
{openai_agents-0.2.11.dist-info → openai_agents-0.3.0.dist-info}/RECORD +25 -23
{openai_agents-0.2.11.dist-info → openai_agents-0.3.0.dist-info}/WHEEL +0 -0
{openai_agents-0.2.11.dist-info → openai_agents-0.3.0.dist-info}/licenses/LICENSE +0 -0

agents/realtime/session.py CHANGED Viewed

@@ -35,7 +35,16 @@ from .events import (
     RealtimeToolStart,
 )
 from .handoffs import realtime_handoff
-from .items import AssistantAudio, InputAudio, InputText, RealtimeItem
+from .items import (
+    AssistantAudio,
+    AssistantMessageItem,
+    AssistantText,
+    InputAudio,
+    InputImage,
+    InputText,
+    RealtimeItem,
+    UserMessageItem,
+)
 from .model import RealtimeModel, RealtimeModelConfig, RealtimeModelListener
 from .model_events import (
     RealtimeModelEvent,
@@ -95,6 +104,12 @@ class RealtimeSession(RealtimeModelListener):
         self._history: list[RealtimeItem] = []
         self._model_config = model_config or {}
         self._run_config = run_config or {}
+        initial_model_settings = self._model_config.get("initial_model_settings")
+        run_config_settings = self._run_config.get("model_settings")
+        self._base_model_settings: RealtimeSessionModelSettings = {
+            **(run_config_settings or {}),
+            **(initial_model_settings or {}),
+        }
         self._event_queue: asyncio.Queue[RealtimeSessionEvent] = asyncio.Queue()
         self._closed = False
         self._stored_exception: Exception | None = None
@@ -224,10 +239,17 @@ class RealtimeSession(RealtimeModelListener):
                 )
             )
         elif event.type == "input_audio_transcription_completed":
+            prev_len = len(self._history)
             self._history = RealtimeSession._get_new_history(self._history, event)
-            await self._put_event(
-                RealtimeHistoryUpdated(info=self._event_info, history=self._history)
-            )
+            # If a new user item was appended (no existing item),
+            # emit history_added for incremental UIs.
+            if len(self._history) > prev_len and len(self._history) > 0:
+                new_item = self._history[-1]
+                await self._put_event(RealtimeHistoryAdded(info=self._event_info, item=new_item))
+            else:
+                await self._put_event(
+                    RealtimeHistoryUpdated(info=self._event_info, history=self._history)
+                )
         elif event.type == "input_audio_timeout_triggered":
             await self._put_event(
                 RealtimeInputAudioTimeoutTriggered(
@@ -242,6 +264,13 @@ class RealtimeSession(RealtimeModelListener):
                 self._item_guardrail_run_counts[item_id] = 0
             self._item_transcripts[item_id] += event.delta
+            self._history = self._get_new_history(
+                self._history,
+                AssistantMessageItem(
+                    item_id=item_id,
+                    content=[AssistantAudio(transcript=self._item_transcripts[item_id])],
+                ),
+            )
             # Check if we should run guardrails based on debounce threshold
             current_length = len(self._item_transcripts[item_id])
@@ -291,7 +320,7 @@ class RealtimeSession(RealtimeModelListener):
                                 # If still missing and this is an assistant item, fall back to
                                 # accumulated transcript deltas tracked during the turn.
-                                if not preserved and incoming_item.role == "assistant":
+                                if incoming_item.role == "assistant":
                                     preserved = self._item_transcripts.get(incoming_item.item_id)
                                 if preserved:
@@ -456,9 +485,9 @@ class RealtimeSession(RealtimeModelListener):
         old_history: list[RealtimeItem],
         event: RealtimeModelInputAudioTranscriptionCompletedEvent | RealtimeItem,
     ) -> list[RealtimeItem]:
-        # Merge transcript into placeholder input_audio message.
         if isinstance(event, RealtimeModelInputAudioTranscriptionCompletedEvent):
             new_history: list[RealtimeItem] = []
+            existing_item_found = False
             for item in old_history:
                 if item.item_id == event.item_id and item.type == "message" and item.role == "user":
                     content: list[InputText | InputAudio] = []
@@ -471,11 +500,18 @@ class RealtimeSession(RealtimeModelListener):
                     new_history.append(
                         item.model_copy(update={"content": content, "status": "completed"})
                     )
+                    existing_item_found = True
                 else:
                     new_history.append(item)
+            if existing_item_found is False:
+                new_history.append(
+                    UserMessageItem(
+                        item_id=event.item_id, content=[InputText(text=event.transcript)]
+                    )
+                )
             return new_history
-        # Otherwise it's just a new item
         # TODO (rm) Add support for audio storage config
         # If the item already exists, update it
@@ -484,8 +520,122 @@ class RealtimeSession(RealtimeModelListener):
         )
         if existing_index is not None:
             new_history = old_history.copy()
-            new_history[existing_index] = event
+            if event.type == "message" and event.content is not None and len(event.content) > 0:
+                existing_item = old_history[existing_index]
+                if existing_item.type == "message":
+                    # Merge content preserving existing transcript/text when incoming entry is empty
+                    if event.role == "assistant" and existing_item.role == "assistant":
+                        assistant_existing_content = existing_item.content
+                        assistant_incoming = event.content
+                        assistant_new_content: list[AssistantText | AssistantAudio] = []
+                        for idx, ac in enumerate(assistant_incoming):
+                            if idx >= len(assistant_existing_content):
+                                assistant_new_content.append(ac)
+                                continue
+                            assistant_current = assistant_existing_content[idx]
+                            if ac.type == "audio":
+                                if ac.transcript is None:
+                                    assistant_new_content.append(assistant_current)
+                                else:
+                                    assistant_new_content.append(ac)
+                            else:  # text
+                                cur_text = (
+                                    assistant_current.text
+                                    if isinstance(assistant_current, AssistantText)
+                                    else None
+                                )
+                                if cur_text is not None and ac.text is None:
+                                    assistant_new_content.append(assistant_current)
+                                else:
+                                    assistant_new_content.append(ac)
+                        updated_assistant = event.model_copy(
+                            update={"content": assistant_new_content}
+                        )
+                        new_history[existing_index] = updated_assistant
+                    elif event.role == "user" and existing_item.role == "user":
+                        user_existing_content = existing_item.content
+                        user_incoming = event.content
+                        # Start from incoming content (prefer latest fields)
+                        user_new_content: list[InputText | InputAudio | InputImage] = list(
+                            user_incoming
+                        )
+                        # Merge by type with special handling for images and transcripts
+                        def _image_url_str(val: object) -> str | None:
+                            if isinstance(val, InputImage):
+                                return val.image_url or None
+                            return None
+                        # 1) Preserve any existing images that are missing from the incoming payload
+                        incoming_image_urls: set[str] = set()
+                        for part in user_incoming:
+                            if isinstance(part, InputImage):
+                                u = _image_url_str(part)
+                                if u:
+                                    incoming_image_urls.add(u)
+                        missing_images: list[InputImage] = []
+                        for part in user_existing_content:
+                            if isinstance(part, InputImage):
+                                u = _image_url_str(part)
+                                if u and u not in incoming_image_urls:
+                                    missing_images.append(part)
+                        # Insert missing images at the beginning to keep them visible and stable
+                        if missing_images:
+                            user_new_content = missing_images + user_new_content
+                        # 2) For text/audio entries, preserve existing when incoming entry is empty
+                        merged: list[InputText | InputAudio | InputImage] = []
+                        for idx, uc in enumerate(user_new_content):
+                            if uc.type == "input_audio":
+                                # Attempt to preserve transcript if empty
+                                transcript = getattr(uc, "transcript", None)
+                                if transcript is None and idx < len(user_existing_content):
+                                    prev = user_existing_content[idx]
+                                    if isinstance(prev, InputAudio) and prev.transcript is not None:
+                                        uc = uc.model_copy(update={"transcript": prev.transcript})
+                                merged.append(uc)
+                            elif uc.type == "input_text":
+                                text = getattr(uc, "text", None)
+                                if (text is None or text == "") and idx < len(
+                                    user_existing_content
+                                ):
+                                    prev = user_existing_content[idx]
+                                    if isinstance(prev, InputText) and prev.text:
+                                        uc = uc.model_copy(update={"text": prev.text})
+                                merged.append(uc)
+                            else:
+                                merged.append(uc)
+                        updated_user = event.model_copy(update={"content": merged})
+                        new_history[existing_index] = updated_user
+                    elif event.role == "system" and existing_item.role == "system":
+                        system_existing_content = existing_item.content
+                        system_incoming = event.content
+                        # Prefer existing non-empty text when incoming is empty
+                        system_new_content: list[InputText] = []
+                        for idx, sc in enumerate(system_incoming):
+                            if idx >= len(system_existing_content):
+                                system_new_content.append(sc)
+                                continue
+                            system_current = system_existing_content[idx]
+                            cur_text = system_current.text
+                            if cur_text is not None and sc.text is None:
+                                system_new_content.append(system_current)
+                            else:
+                                system_new_content.append(sc)
+                        updated_system = event.model_copy(update={"content": system_new_content})
+                        new_history[existing_index] = updated_system
+                    else:
+                        # Role changed or mismatched; just replace
+                        new_history[existing_index] = event
+                else:
+                    # If the existing item is not a message, just replace it.
+                    new_history[existing_index] = event
             return new_history
         # Otherwise, insert it after the previous_item_id if that is set
         elif event.previous_item_id:
             # Insert the new item after the previous item
@@ -619,12 +769,11 @@ class RealtimeSession(RealtimeModelListener):
         starting_settings: RealtimeSessionModelSettings | None,
         agent: RealtimeAgent,
     ) -> RealtimeSessionModelSettings:
-        # Start with run config model settings as base
-        run_config_settings = self._run_config.get("model_settings", {})
-        updated_settings: RealtimeSessionModelSettings = run_config_settings.copy()
-        # Apply starting settings (from model config) next
-        if starting_settings:
-            updated_settings.update(starting_settings)
+        # Start with the merged base settings from run and model configuration.
+        updated_settings = self._base_model_settings.copy()
+        if agent.prompt is not None:
+            updated_settings["prompt"] = agent.prompt
         instructions, tools, handoffs = await asyncio.gather(
             agent.get_system_prompt(self._context_wrapper),
@@ -635,6 +784,10 @@ class RealtimeSession(RealtimeModelListener):
         updated_settings["tools"] = tools or []
         updated_settings["handoffs"] = handoffs or []
+        # Apply starting settings (from model config) next
+        if starting_settings:
+            updated_settings.update(starting_settings)
         disable_tracing = self._run_config.get("tracing_disabled", False)
         if disable_tracing:
             updated_settings["tracing"] = None

agents/run.py CHANGED Viewed

@@ -54,7 +54,7 @@ from .items import (
 )
 from .lifecycle import RunHooks
 from .logger import logger
-from .memory import Session
+from .memory import Session, SessionInputCallback
 from .model_settings import ModelSettings
 from .models.interface import Model, ModelProvider
 from .models.multi_provider import MultiProvider
@@ -179,6 +179,13 @@ class RunConfig:
     An optional dictionary of additional metadata to include with the trace.
     """
+    session_input_callback: SessionInputCallback | None = None
+    """Defines how to handle session history when new input is provided.
+    - `None` (default): The new input is appended to the session history.
+    - `SessionInputCallback`: A custom function that receives the history and new input, and
+      returns the desired combined list of items.
+    """
     call_model_input_filter: CallModelInputFilter | None = None
     """
     Optional callback that is invoked immediately before calling the model. It receives the current
@@ -411,8 +418,11 @@ class AgentRunner:
         if run_config is None:
             run_config = RunConfig()
-        # Prepare input with session if enabled
-        prepared_input = await self._prepare_input_with_session(input, session)
+        # Keep original user input separate from session-prepared input
+        original_user_input = input
+        prepared_input = await self._prepare_input_with_session(
+            input, session, run_config.session_input_callback
+        )
         tool_use_tracker = AgentToolUseTracker()
@@ -438,6 +448,9 @@ class AgentRunner:
             current_agent = starting_agent
             should_run_agent_start_hooks = True
+            # save only the new user input to the session, not the combined history
+            await self._save_result_to_session(session, original_user_input, [])
             try:
                 while True:
                     all_tools = await AgentRunner._get_all_tools(current_agent, context_wrapper)
@@ -537,9 +550,7 @@ class AgentRunner:
                             output_guardrail_results=output_guardrail_results,
                             context_wrapper=context_wrapper,
                         )
-                        # Save the conversation to session if enabled
-                        await self._save_result_to_session(session, input, result)
+                        await self._save_result_to_session(session, [], turn_result.new_step_items)
                         return result
                     elif isinstance(turn_result.next_step, NextStepHandoff):
@@ -548,7 +559,7 @@ class AgentRunner:
                         current_span = None
                         should_run_agent_start_hooks = True
                     elif isinstance(turn_result.next_step, NextStepRunAgain):
-                        pass
+                        await self._save_result_to_session(session, [], turn_result.new_step_items)
                     else:
                         raise AgentsException(
                             f"Unknown next step type: {type(turn_result.next_step)}"
@@ -779,11 +790,15 @@ class AgentRunner:
         try:
             # Prepare input with session if enabled
-            prepared_input = await AgentRunner._prepare_input_with_session(starting_input, session)
+            prepared_input = await AgentRunner._prepare_input_with_session(
+                starting_input, session, run_config.session_input_callback
+            )
             # Update the streamed result with the prepared input
             streamed_result.input = prepared_input
+            await AgentRunner._save_result_to_session(session, starting_input, [])
             while True:
                 if streamed_result.is_complete:
                     break
@@ -887,24 +902,15 @@ class AgentRunner:
                         streamed_result.is_complete = True
                         # Save the conversation to session if enabled
-                        # Create a temporary RunResult for session saving
-                        temp_result = RunResult(
-                            input=streamed_result.input,
-                            new_items=streamed_result.new_items,
-                            raw_responses=streamed_result.raw_responses,
-                            final_output=streamed_result.final_output,
-                            _last_agent=current_agent,
-                            input_guardrail_results=streamed_result.input_guardrail_results,
-                            output_guardrail_results=streamed_result.output_guardrail_results,
-                            context_wrapper=context_wrapper,
-                        )
                         await AgentRunner._save_result_to_session(
-                            session, starting_input, temp_result
+                            session, [], turn_result.new_step_items
                         )
                         streamed_result._event_queue.put_nowait(QueueCompleteSentinel())
                     elif isinstance(turn_result.next_step, NextStepRunAgain):
-                        pass
+                        await AgentRunner._save_result_to_session(
+                            session, [], turn_result.new_step_items
+                        )
                 except AgentsException as exc:
                     streamed_result.is_complete = True
                     streamed_result._event_queue.put_nowait(QueueCompleteSentinel())
@@ -1479,19 +1485,20 @@ class AgentRunner:
         cls,
         input: str | list[TResponseInputItem],
         session: Session | None,
+        session_input_callback: SessionInputCallback | None,
     ) -> str | list[TResponseInputItem]:
         """Prepare input by combining it with session history if enabled."""
         if session is None:
             return input
-        # Validate that we don't have both a session and a list input, as this creates
-        # ambiguity about whether the list should append to or replace existing session history
-        if isinstance(input, list):
+        # If the user doesn't specify an input callback and pass a list as input
+        if isinstance(input, list) and not session_input_callback:
             raise UserError(
-                "Cannot provide both a session and a list of input items. "
-                "When using session memory, provide only a string input to append to the "
-                "conversation, or use session=None and provide a list to manually manage "
-                "conversation history."
+                "When using session memory, list inputs require a "
+                "`RunConfig.session_input_callback` to define how they should be merged "
+                "with the conversation history. If you don't want to use a callback, "
+                "provide your input as a string instead, or disable session memory "
+                "(session=None) and pass a list to manage the history manually."
             )
         # Get previous conversation history
@@ -1500,19 +1507,31 @@ class AgentRunner:
         # Convert input to list format
         new_input_list = ItemHelpers.input_to_new_input_list(input)
-        # Combine history with new input
-        combined_input = history + new_input_list
-        return combined_input
+        if session_input_callback is None:
+            return history + new_input_list
+        elif callable(session_input_callback):
+            res = session_input_callback(history, new_input_list)
+            if inspect.isawaitable(res):
+                return await res
+            return res
+        else:
+            raise UserError(
+                f"Invalid `session_input_callback` value: {session_input_callback}. "
+                "Choose between `None` or a custom callable function."
+            )
     @classmethod
     async def _save_result_to_session(
         cls,
         session: Session | None,
         original_input: str | list[TResponseInputItem],
-        result: RunResult,
+        new_items: list[RunItem],
     ) -> None:
-        """Save the conversation turn to session."""
+        """
+        Save the conversation turn to session.
+        It does not account for any filtering or modification performed by
+        `RunConfig.session_input_callback`.
+        """
         if session is None:
             return
@@ -1520,7 +1539,7 @@ class AgentRunner:
         input_list = ItemHelpers.input_to_new_input_list(original_input)
         # Convert new items to input format
-        new_items_as_input = [item.to_input_item() for item in result.new_items]
+        new_items_as_input = [item.to_input_item() for item in new_items]
         # Save all items from this turn
         items_to_save = input_list + new_items_as_input

agents/tool.py CHANGED Viewed

@@ -142,7 +142,7 @@ class WebSearchTool:
     @property
     def name(self):
-        return "web_search_preview"
+        return "web_search"
 @dataclass

agents/util/_json.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from __future__ import annotations
-from typing import Literal
+from collections.abc import Iterable
+from typing import Any, Literal
 from pydantic import TypeAdapter, ValidationError
 from typing_extensions import TypeVar
@@ -29,3 +30,20 @@ def validate_json(json_str: str, type_adapter: TypeAdapter[T], partial: bool) ->
         raise ModelBehaviorError(
             f"Invalid JSON when parsing {json_str} for {type_adapter}; {e}"
         ) from e
+def _to_dump_compatible(obj: Any) -> Any:
+    return _to_dump_compatible_internal(obj)
+def _to_dump_compatible_internal(obj: Any) -> Any:
+    if isinstance(obj, dict):
+        return {k: _to_dump_compatible_internal(v) for k, v in obj.items()}
+    if isinstance(obj, (list, tuple)):
+        return [_to_dump_compatible_internal(x) for x in obj]
+    if isinstance(obj, Iterable) and not isinstance(obj, (str, bytes, bytearray)):
+        return [_to_dump_compatible_internal(x) for x in obj]
+    return obj

agents/voice/input.py CHANGED Viewed

@@ -13,7 +13,7 @@ DEFAULT_SAMPLE_RATE = 24000
 def _buffer_to_audio_file(
-    buffer: npt.NDArray[np.int16 | np.float32],
+    buffer: npt.NDArray[np.int16 | np.float32 | np.float64],
     frame_rate: int = DEFAULT_SAMPLE_RATE,
     sample_width: int = 2,
     channels: int = 1,
@@ -77,12 +77,13 @@ class StreamedAudioInput:
     """
     def __init__(self):
-        self.queue: asyncio.Queue[npt.NDArray[np.int16 | np.float32]] = asyncio.Queue()
+        self.queue: asyncio.Queue[npt.NDArray[np.int16 | np.float32] | None] = asyncio.Queue()
-    async def add_audio(self, audio: npt.NDArray[np.int16 | np.float32]):
+    async def add_audio(self, audio: npt.NDArray[np.int16 | np.float32] | None):
         """Adds more audio data to the stream.
         Args:
-            audio: The audio data to add. Must be a numpy array of int16 or float32.
+            audio: The audio data to add. Must be a numpy array of int16 or float32 or None.
+              If None passed, it indicates the end of the stream.
         """
         await self.queue.put(audio)

agents/voice/models/openai_stt.py CHANGED Viewed

@@ -88,7 +88,7 @@ class OpenAISTTTranscriptionSession(StreamedTranscriptionSession):
         self._trace_include_sensitive_data = trace_include_sensitive_data
         self._trace_include_sensitive_audio_data = trace_include_sensitive_audio_data
-        self._input_queue: asyncio.Queue[npt.NDArray[np.int16 | np.float32]] = input.queue
+        self._input_queue: asyncio.Queue[npt.NDArray[np.int16 | np.float32] | None] = input.queue
         self._output_queue: asyncio.Queue[str | ErrorSentinel | SessionCompleteSentinel] = (
             asyncio.Queue()
         )
@@ -226,7 +226,10 @@ class OpenAISTTTranscriptionSession(StreamedTranscriptionSession):
                     break
                 event_type = event.get("type", "unknown")
-                if event_type == "input_audio_transcription_completed":
+                if event_type in [
+                    "input_audio_transcription_completed",  # legacy
+                    "conversation.item.input_audio_transcription.completed",
+                ]:
                     transcript = cast(str, event.get("transcript", ""))
                     if len(transcript) > 0:
                         self._end_turn(transcript)
@@ -242,7 +245,7 @@ class OpenAISTTTranscriptionSession(StreamedTranscriptionSession):
         await self._output_queue.put(SessionCompleteSentinel())
     async def _stream_audio(
-        self, audio_queue: asyncio.Queue[npt.NDArray[np.int16 | np.float32]]
+        self, audio_queue: asyncio.Queue[npt.NDArray[np.int16 | np.float32] | None]
     ) -> None:
         assert self._websocket is not None, "Websocket not initialized"
         self._start_turn()
@@ -275,7 +278,6 @@ class OpenAISTTTranscriptionSession(StreamedTranscriptionSession):
                 "wss://api.openai.com/v1/realtime?intent=transcription",
                 additional_headers={
                     "Authorization": f"Bearer {self._client.api_key}",
-                    "OpenAI-Beta": "realtime=v1",
                     "OpenAI-Log-Session": "1",
                 },
             ) as ws:

{openai_agents-0.2.11.dist-info → openai_agents-0.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: openai-agents
-Version: 0.2.11
+Version: 0.3.0
 Summary: OpenAI Agents SDK
 Project-URL: Homepage, https://openai.github.io/openai-agents-python/
 Project-URL: Repository, https://github.com/openai/openai-agents-python
@@ -21,7 +21,7 @@ Classifier: Typing :: Typed
 Requires-Python: >=3.9
 Requires-Dist: griffe<2,>=1.5.6
 Requires-Dist: mcp<2,>=1.11.0; python_version >= '3.10'
-Requires-Dist: openai<2,>=1.104.1
+Requires-Dist: openai<2,>=1.107.1
 Requires-Dist: pydantic<3,>=2.10
 Requires-Dist: requests<3,>=2.0
 Requires-Dist: types-requests<3,>=2.0

openai-agents 0.2.11__py3-none-any.whl → 0.3.0__py3-none-any.whl

Potentially problematic release.

openai-agents 0.2.11py3-none-any.whl → 0.3.0py3-none-any.whl