openai-agents 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of openai-agents might be problematic. Click here for more details.

agents/__init__.py CHANGED
@@ -5,7 +5,13 @@ from typing import Literal
5
5
  from openai import AsyncOpenAI
6
6
 
7
7
  from . import _config
8
- from .agent import Agent, AgentBase, ToolsToFinalOutputFunction, ToolsToFinalOutputResult
8
+ from .agent import (
9
+ Agent,
10
+ AgentBase,
11
+ StopAtTools,
12
+ ToolsToFinalOutputFunction,
13
+ ToolsToFinalOutputResult,
14
+ )
9
15
  from .agent_output import AgentOutputSchema, AgentOutputSchemaBase
10
16
  from .computer import AsyncComputer, Button, Computer, Environment
11
17
  from .exceptions import (
@@ -43,6 +49,7 @@ from .lifecycle import AgentHooks, RunHooks
43
49
  from .memory import Session, SQLiteSession
44
50
  from .model_settings import ModelSettings
45
51
  from .models.interface import Model, ModelProvider, ModelTracing
52
+ from .models.multi_provider import MultiProvider
46
53
  from .models.openai_chatcompletions import OpenAIChatCompletionsModel
47
54
  from .models.openai_provider import OpenAIProvider
48
55
  from .models.openai_responses import OpenAIResponsesModel
@@ -162,6 +169,7 @@ def enable_verbose_stdout_logging():
162
169
  __all__ = [
163
170
  "Agent",
164
171
  "AgentBase",
172
+ "StopAtTools",
165
173
  "ToolsToFinalOutputFunction",
166
174
  "ToolsToFinalOutputResult",
167
175
  "Runner",
@@ -171,6 +179,7 @@ __all__ = [
171
179
  "ModelTracing",
172
180
  "ModelSettings",
173
181
  "OpenAIChatCompletionsModel",
182
+ "MultiProvider",
174
183
  "OpenAIProvider",
175
184
  "OpenAIResponsesModel",
176
185
  "AgentOutputSchema",
agents/_run_impl.py CHANGED
@@ -774,6 +774,7 @@ class RunImpl:
774
774
  else original_input,
775
775
  pre_handoff_items=tuple(pre_step_items),
776
776
  new_items=tuple(new_step_items),
777
+ run_context=context_wrapper,
777
778
  )
778
779
  if not callable(input_filter):
779
780
  _error_tracing.attach_error_to_span(
@@ -785,6 +786,8 @@ class RunImpl:
785
786
  )
786
787
  raise UserError(f"Invalid input filter: {input_filter}")
787
788
  filtered = input_filter(handoff_input_data)
789
+ if inspect.isawaitable(filtered):
790
+ filtered = await filtered
788
791
  if not isinstance(filtered, HandoffInputData):
789
792
  _error_tracing.attach_error_to_span(
790
793
  span_handoff,
@@ -911,12 +914,12 @@ class RunImpl:
911
914
  return result
912
915
 
913
916
  @classmethod
914
- def stream_step_result_to_queue(
917
+ def stream_step_items_to_queue(
915
918
  cls,
916
- step_result: SingleStepResult,
919
+ new_step_items: list[RunItem],
917
920
  queue: asyncio.Queue[StreamEvent | QueueCompleteSentinel],
918
921
  ):
919
- for item in step_result.new_step_items:
922
+ for item in new_step_items:
920
923
  if isinstance(item, MessageOutputItem):
921
924
  event = RunItemStreamEvent(item=item, name="message_output_created")
922
925
  elif isinstance(item, HandoffCallItem):
@@ -941,6 +944,14 @@ class RunImpl:
941
944
  if event:
942
945
  queue.put_nowait(event)
943
946
 
947
+ @classmethod
948
+ def stream_step_result_to_queue(
949
+ cls,
950
+ step_result: SingleStepResult,
951
+ queue: asyncio.Queue[StreamEvent | QueueCompleteSentinel],
952
+ ):
953
+ cls.stream_step_items_to_queue(step_result.new_step_items, queue)
954
+
944
955
  @classmethod
945
956
  async def _check_for_final_output_from_tools(
946
957
  cls,
agents/agent.py CHANGED
@@ -101,7 +101,7 @@ class AgentBase(Generic[TContext]):
101
101
  self.mcp_servers, convert_schemas_to_strict, run_context, self
102
102
  )
103
103
 
104
- async def get_all_tools(self, run_context: RunContextWrapper[Any]) -> list[Tool]:
104
+ async def get_all_tools(self, run_context: RunContextWrapper[TContext]) -> list[Tool]:
105
105
  """All agent tools, including MCP tools and function tools."""
106
106
  mcp_tools = await self.get_mcp_tools(run_context)
107
107
 
@@ -201,20 +201,22 @@ class Agent(AgentBase, Generic[TContext]):
201
201
  tool_use_behavior: (
202
202
  Literal["run_llm_again", "stop_on_first_tool"] | StopAtTools | ToolsToFinalOutputFunction
203
203
  ) = "run_llm_again"
204
- """This lets you configure how tool use is handled.
204
+ """
205
+ This lets you configure how tool use is handled.
205
206
  - "run_llm_again": The default behavior. Tools are run, and then the LLM receives the results
206
207
  and gets to respond.
207
208
  - "stop_on_first_tool": The output of the first tool call is used as the final output. This
208
209
  means that the LLM does not process the result of the tool call.
209
- - A list of tool names: The agent will stop running if any of the tools in the list are called.
210
- The final output will be the output of the first matching tool call. The LLM does not
211
- process the result of the tool call.
210
+ - A StopAtTools object: The agent will stop running if any of the tools listed in
211
+ `stop_at_tool_names` is called.
212
+ The final output will be the output of the first matching tool call.
213
+ The LLM does not process the result of the tool call.
212
214
  - A function: If you pass a function, it will be called with the run context and the list of
213
215
  tool results. It must return a `ToolsToFinalOutputResult`, which determines whether the tool
214
216
  calls result in a final output.
215
217
 
216
218
  NOTE: This configuration is specific to FunctionTools. Hosted tools, such as file search,
217
- web search, etc are always processed by the LLM.
219
+ web search, etc. are always processed by the LLM.
218
220
  """
219
221
 
220
222
  reset_tool_choice: bool = True
@@ -222,10 +224,17 @@ class Agent(AgentBase, Generic[TContext]):
222
224
  to True. This ensures that the agent doesn't enter an infinite loop of tool usage."""
223
225
 
224
226
  def clone(self, **kwargs: Any) -> Agent[TContext]:
225
- """Make a copy of the agent, with the given arguments changed. For example, you could do:
226
- ```
227
- new_agent = agent.clone(instructions="New instructions")
228
- ```
227
+ """Make a copy of the agent, with the given arguments changed.
228
+ Notes:
229
+ - Uses `dataclasses.replace`, which performs a **shallow copy**.
230
+ - Mutable attributes like `tools` and `handoffs` are shallow-copied:
231
+ new list objects are created only if overridden, but their contents
232
+ (tool functions and handoff objects) are shared with the original.
233
+ - To modify these independently, pass new lists when calling `clone()`.
234
+ Example:
235
+ ```python
236
+ new_agent = agent.clone(instructions="New instructions")
237
+ ```
229
238
  """
230
239
  return dataclasses.replace(self, **kwargs)
231
240
 
@@ -289,30 +298,3 @@ class Agent(AgentBase, Generic[TContext]):
289
298
  ) -> ResponsePromptParam | None:
290
299
  """Get the prompt for the agent."""
291
300
  return await PromptUtil.to_model_input(self.prompt, run_context, self)
292
-
293
- async def get_mcp_tools(self, run_context: RunContextWrapper[TContext]) -> list[Tool]:
294
- """Fetches the available tools from the MCP servers."""
295
- convert_schemas_to_strict = self.mcp_config.get("convert_schemas_to_strict", False)
296
- return await MCPUtil.get_all_function_tools(
297
- self.mcp_servers, convert_schemas_to_strict, run_context, self
298
- )
299
-
300
- async def get_all_tools(self, run_context: RunContextWrapper[Any]) -> list[Tool]:
301
- """All agent tools, including MCP tools and function tools."""
302
- mcp_tools = await self.get_mcp_tools(run_context)
303
-
304
- async def _check_tool_enabled(tool: Tool) -> bool:
305
- if not isinstance(tool, FunctionTool):
306
- return True
307
-
308
- attr = tool.is_enabled
309
- if isinstance(attr, bool):
310
- return attr
311
- res = attr(run_context, self)
312
- if inspect.isawaitable(res):
313
- return bool(await res)
314
- return bool(res)
315
-
316
- results = await asyncio.gather(*(_check_tool_enabled(t) for t in self.tools))
317
- enabled: list[Tool] = [t for t, ok in zip(self.tools, results) if ok]
318
- return [*mcp_tools, *enabled]
agents/agent_output.py CHANGED
@@ -116,7 +116,7 @@ class AgentOutputSchema(AgentOutputSchemaBase):
116
116
  raise UserError(
117
117
  "Strict JSON schema is enabled, but the output type is not valid. "
118
118
  "Either make the output type strict, "
119
- "or wrap your type with AgentOutputSchema(your_type, strict_json_schema=False)"
119
+ "or wrap your type with AgentOutputSchema(YourType, strict_json_schema=False)"
120
120
  ) from e
121
121
 
122
122
  def is_plain_text(self) -> bool:
@@ -29,6 +29,7 @@ def remove_all_tools(handoff_input_data: HandoffInputData) -> HandoffInputData:
29
29
  input_history=filtered_history,
30
30
  pre_handoff_items=filtered_pre_handoff_items,
31
31
  new_items=filtered_new_items,
32
+ run_context=handoff_input_data.run_context,
32
33
  )
33
34
 
34
35
 
@@ -45,6 +45,14 @@ from ...tracing.spans import Span
45
45
  from ...usage import Usage
46
46
 
47
47
 
48
+ class InternalChatCompletionMessage(ChatCompletionMessage):
49
+ """
50
+ An internal subclass to carry reasoning_content without modifying the original model.
51
+ """
52
+
53
+ reasoning_content: str
54
+
55
+
48
56
  class LitellmModel(Model):
49
57
  """This class enables using any model via LiteLLM. LiteLLM allows you to acess OpenAPI,
50
58
  Anthropic, Gemini, Mistral, and many other models.
@@ -364,13 +372,18 @@ class LitellmConverter:
364
372
  provider_specific_fields.get("refusal", None) if provider_specific_fields else None
365
373
  )
366
374
 
367
- return ChatCompletionMessage(
375
+ reasoning_content = ""
376
+ if hasattr(message, "reasoning_content") and message.reasoning_content:
377
+ reasoning_content = message.reasoning_content
378
+
379
+ return InternalChatCompletionMessage(
368
380
  content=message.content,
369
381
  refusal=refusal,
370
382
  role="assistant",
371
383
  annotations=cls.convert_annotations_to_openai(message),
372
384
  audio=message.get("audio", None), # litellm deletes audio if not present
373
385
  tool_calls=tool_calls,
386
+ reasoning_content=reasoning_content,
374
387
  )
375
388
 
376
389
  @classmethod
@@ -71,6 +71,12 @@ def get_all_nodes(
71
71
  f"fillcolor=lightgreen, width=0.5, height=0.3];"
72
72
  )
73
73
 
74
+ for mcp_server in agent.mcp_servers:
75
+ parts.append(
76
+ f'"{mcp_server.name}" [label="{mcp_server.name}", shape=box, style=filled, '
77
+ f"fillcolor=lightgrey, width=1, height=0.5];"
78
+ )
79
+
74
80
  for handoff in agent.handoffs:
75
81
  if isinstance(handoff, Handoff):
76
82
  parts.append(
@@ -119,6 +125,11 @@ def get_all_edges(
119
125
  "{agent.name}" -> "{tool.name}" [style=dotted, penwidth=1.5];
120
126
  "{tool.name}" -> "{agent.name}" [style=dotted, penwidth=1.5];""")
121
127
 
128
+ for mcp_server in agent.mcp_servers:
129
+ parts.append(f"""
130
+ "{agent.name}" -> "{mcp_server.name}" [style=dashed, penwidth=1.5];
131
+ "{mcp_server.name}" -> "{agent.name}" [style=dashed, penwidth=1.5];""")
132
+
122
133
  for handoff in agent.handoffs:
123
134
  if isinstance(handoff, Handoff):
124
135
  parts.append(f"""
agents/function_schema.py CHANGED
@@ -76,7 +76,7 @@ class FuncSchema:
76
76
 
77
77
  @dataclass
78
78
  class FuncDocumentation:
79
- """Contains metadata about a python function, extracted from its docstring."""
79
+ """Contains metadata about a Python function, extracted from its docstring."""
80
80
 
81
81
  name: str
82
82
  """The name of the function, via `__name__`."""
@@ -194,7 +194,7 @@ def function_schema(
194
194
  strict_json_schema: bool = True,
195
195
  ) -> FuncSchema:
196
196
  """
197
- Given a python function, extracts a `FuncSchema` from it, capturing the name, description,
197
+ Given a Python function, extracts a `FuncSchema` from it, capturing the name, description,
198
198
  parameter descriptions, and other metadata.
199
199
 
200
200
  Args:
@@ -208,7 +208,7 @@ def function_schema(
208
208
  descriptions.
209
209
  strict_json_schema: Whether the JSON schema is in strict mode. If True, we'll ensure that
210
210
  the schema adheres to the "strict" standard the OpenAI API expects. We **strongly**
211
- recommend setting this to True, as it increases the likelihood of the LLM providing
211
+ recommend setting this to True, as it increases the likelihood of the LLM producing
212
212
  correct JSON input.
213
213
 
214
214
  Returns:
agents/guardrail.py CHANGED
@@ -78,8 +78,9 @@ class InputGuardrail(Generic[TContext]):
78
78
  You can use the `@input_guardrail()` decorator to turn a function into an `InputGuardrail`, or
79
79
  create an `InputGuardrail` manually.
80
80
 
81
- Guardrails return a `GuardrailResult`. If `result.tripwire_triggered` is `True`, the agent
82
- execution will immediately stop and a `InputGuardrailTripwireTriggered` exception will be raised
81
+ Guardrails return a `GuardrailResult`. If `result.tripwire_triggered` is `True`,
82
+ the agent's execution will immediately stop, and
83
+ an `InputGuardrailTripwireTriggered` exception will be raised
83
84
  """
84
85
 
85
86
  guardrail_function: Callable[
@@ -132,7 +133,7 @@ class OutputGuardrail(Generic[TContext]):
132
133
  You can use the `@output_guardrail()` decorator to turn a function into an `OutputGuardrail`,
133
134
  or create an `OutputGuardrail` manually.
134
135
 
135
- Guardrails return a `GuardrailResult`. If `result.tripwire_triggered` is `True`, a
136
+ Guardrails return a `GuardrailResult`. If `result.tripwire_triggered` is `True`, an
136
137
  `OutputGuardrailTripwireTriggered` exception will be raised.
137
138
  """
138
139
 
@@ -316,7 +317,7 @@ def output_guardrail(
316
317
  ) -> OutputGuardrail[TContext_co]:
317
318
  return OutputGuardrail(
318
319
  guardrail_function=f,
319
- # Guardrail name defaults to function name when not specified (None).
320
+ # Guardrail name defaults to function's name when not specified (None).
320
321
  name=name if name else f.__name__,
321
322
  )
322
323
 
agents/handoffs.py CHANGED
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import inspect
4
4
  import json
5
5
  from collections.abc import Awaitable
6
- from dataclasses import dataclass
6
+ from dataclasses import dataclass, replace as dataclasses_replace
7
7
  from typing import TYPE_CHECKING, Any, Callable, Generic, cast, overload
8
8
 
9
9
  from pydantic import TypeAdapter
@@ -49,8 +49,24 @@ class HandoffInputData:
49
49
  handoff and the tool output message representing the response from the handoff output.
50
50
  """
51
51
 
52
+ run_context: RunContextWrapper[Any] | None = None
53
+ """
54
+ The run context at the time the handoff was invoked.
55
+ Note that, since this property was added later on, it's optional for backwards compatibility.
56
+ """
57
+
58
+ def clone(self, **kwargs: Any) -> HandoffInputData:
59
+ """
60
+ Make a copy of the handoff input data, with the given arguments changed. For example, you
61
+ could do:
62
+ ```
63
+ new_handoff_input_data = handoff_input_data.clone(new_items=())
64
+ ```
65
+ """
66
+ return dataclasses_replace(self, **kwargs)
52
67
 
53
- HandoffInputFilter: TypeAlias = Callable[[HandoffInputData], HandoffInputData]
68
+
69
+ HandoffInputFilter: TypeAlias = Callable[[HandoffInputData], MaybeAwaitable[HandoffInputData]]
54
70
  """A function that filters the input data passed to the next agent."""
55
71
 
56
72
 
@@ -103,9 +119,9 @@ class Handoff(Generic[TContext, TAgent]):
103
119
  True, as it increases the likelihood of correct JSON input.
104
120
  """
105
121
 
106
- is_enabled: bool | Callable[[RunContextWrapper[Any], AgentBase[Any]], MaybeAwaitable[bool]] = (
107
- True
108
- )
122
+ is_enabled: bool | Callable[
123
+ [RunContextWrapper[Any], AgentBase[Any]], MaybeAwaitable[bool]
124
+ ] = True
109
125
  """Whether the handoff is enabled. Either a bool or a Callable that takes the run context and
110
126
  agent and returns whether the handoff is enabled. You can use this to dynamically enable/disable
111
127
  a handoff based on your context/state."""
@@ -248,7 +264,7 @@ def handoff(
248
264
  async def _is_enabled(ctx: RunContextWrapper[Any], agent_base: AgentBase[Any]) -> bool:
249
265
  from .agent import Agent
250
266
 
251
- assert callable(is_enabled), "is_enabled must be non-null here"
267
+ assert callable(is_enabled), "is_enabled must be callable here"
252
268
  assert isinstance(agent_base, Agent), "Can't handoff to a non-Agent"
253
269
  result = is_enabled(ctx, agent_base)
254
270
 
agents/items.py CHANGED
@@ -66,7 +66,7 @@ class RunItemBase(Generic[T], abc.ABC):
66
66
  """The agent whose run caused this item to be generated."""
67
67
 
68
68
  raw_item: T
69
- """The raw Responses item from the run. This will always be a either an output item (i.e.
69
+ """The raw Responses item from the run. This will always be either an output item (i.e.
70
70
  `openai.types.responses.ResponseOutputItem` or an input item
71
71
  (i.e. `openai.types.responses.ResponseInputItemParam`).
72
72
  """
@@ -243,6 +243,8 @@ class ItemHelpers:
243
243
  if not isinstance(message, ResponseOutputMessage):
244
244
  return ""
245
245
 
246
+ if not message.content:
247
+ return ""
246
248
  last_content = message.content[-1]
247
249
  if isinstance(last_content, ResponseOutputText):
248
250
  return last_content.text
@@ -255,6 +257,8 @@ class ItemHelpers:
255
257
  def extract_last_text(cls, message: TResponseOutputItem) -> str | None:
256
258
  """Extracts the last text content from a message, if any. Ignores refusals."""
257
259
  if isinstance(message, ResponseOutputMessage):
260
+ if not message.content:
261
+ return None
258
262
  last_content = message.content[-1]
259
263
  if isinstance(last_content, ResponseOutputText):
260
264
  return last_content.text
agents/mcp/util.py CHANGED
@@ -194,23 +194,21 @@ class MCPUtil:
194
194
  else:
195
195
  logger.debug(f"MCP tool {tool.name} returned {result}")
196
196
 
197
- # The MCP tool result is a list of content items, whereas OpenAI tool outputs are a single
198
- # string. We'll try to convert.
199
- if len(result.content) == 1:
200
- tool_output = result.content[0].model_dump_json()
201
- # Append structured content if it exists and we're using it.
202
- if server.use_structured_content and result.structuredContent:
203
- tool_output = f"{tool_output}\n{json.dumps(result.structuredContent)}"
204
- elif len(result.content) > 1:
205
- tool_results = [item.model_dump(mode="json") for item in result.content]
206
- if server.use_structured_content and result.structuredContent:
207
- tool_results.append(result.structuredContent)
208
- tool_output = json.dumps(tool_results)
209
- elif server.use_structured_content and result.structuredContent:
197
+ # If structured content is requested and available, use it exclusively
198
+ if server.use_structured_content and result.structuredContent:
210
199
  tool_output = json.dumps(result.structuredContent)
211
200
  else:
212
- # Empty content is a valid result (e.g., "no results found")
213
- tool_output = "[]"
201
+ # Fall back to regular text content processing
202
+ # The MCP tool result is a list of content items, whereas OpenAI tool
203
+ # outputs are a single string. We'll try to convert.
204
+ if len(result.content) == 1:
205
+ tool_output = result.content[0].model_dump_json()
206
+ elif len(result.content) > 1:
207
+ tool_results = [item.model_dump(mode="json") for item in result.content]
208
+ tool_output = json.dumps(tool_results)
209
+ else:
210
+ # Empty content is a valid result (e.g., "no results found")
211
+ tool_output = "[]"
214
212
 
215
213
  current_span = get_current_span()
216
214
  if current_span:
@@ -36,6 +36,7 @@ from openai.types.responses import (
36
36
  ResponseOutputRefusal,
37
37
  ResponseOutputText,
38
38
  ResponseReasoningItem,
39
+ ResponseReasoningItemParam,
39
40
  )
40
41
  from openai.types.responses.response_input_param import FunctionCallOutput, ItemReference, Message
41
42
  from openai.types.responses.response_reasoning_item import Summary
@@ -210,6 +211,12 @@ class Converter:
210
211
  return cast(ResponseOutputMessageParam, item)
211
212
  return None
212
213
 
214
+ @classmethod
215
+ def maybe_reasoning_message(cls, item: Any) -> ResponseReasoningItemParam | None:
216
+ if isinstance(item, dict) and item.get("type") == "reasoning":
217
+ return cast(ResponseReasoningItemParam, item)
218
+ return None
219
+
213
220
  @classmethod
214
221
  def extract_text_content(
215
222
  cls, content: str | Iterable[ResponseInputContentParam]
@@ -459,7 +466,11 @@ class Converter:
459
466
  f"Encountered an item_reference, which is not supported: {item_ref}"
460
467
  )
461
468
 
462
- # 7) If we haven't recognized it => fail or ignore
469
+ # 7) reasoning message => not handled
470
+ elif cls.maybe_reasoning_message(item):
471
+ pass
472
+
473
+ # 8) If we haven't recognized it => fail or ignore
463
474
  else:
464
475
  raise UserError(f"Unhandled item type or structure: {item}")
465
476
 
@@ -198,6 +198,7 @@ class ChatCmplStreamHandler:
198
198
  is not None, # fixed 0 -> 0 or 1
199
199
  type="response.output_text.delta",
200
200
  sequence_number=sequence_number.get_and_increment(),
201
+ logprobs=[],
201
202
  )
202
203
  # Accumulate the text into the response part
203
204
  state.text_content_index_and_output[1].text += delta.content
@@ -288,10 +289,11 @@ class ChatCmplStreamHandler:
288
289
  function_call = state.function_calls[tc_delta.index]
289
290
 
290
291
  # Start streaming as soon as we have function name and call_id
291
- if (not state.function_call_streaming[tc_delta.index] and
292
- function_call.name and
293
- function_call.call_id):
294
-
292
+ if (
293
+ not state.function_call_streaming[tc_delta.index]
294
+ and function_call.name
295
+ and function_call.call_id
296
+ ):
295
297
  # Calculate the output index for this function call
296
298
  function_call_starting_index = 0
297
299
  if state.reasoning_content_index_and_output:
@@ -308,9 +310,9 @@ class ChatCmplStreamHandler:
308
310
 
309
311
  # Mark this function call as streaming and store its output index
310
312
  state.function_call_streaming[tc_delta.index] = True
311
- state.function_call_output_idx[
312
- tc_delta.index
313
- ] = function_call_starting_index
313
+ state.function_call_output_idx[tc_delta.index] = (
314
+ function_call_starting_index
315
+ )
314
316
 
315
317
  # Send initial function call added event
316
318
  yield ResponseOutputItemAddedEvent(
@@ -327,10 +329,11 @@ class ChatCmplStreamHandler:
327
329
  )
328
330
 
329
331
  # Stream arguments if we've started streaming this function call
330
- if (state.function_call_streaming.get(tc_delta.index, False) and
331
- tc_function and
332
- tc_function.arguments):
333
-
332
+ if (
333
+ state.function_call_streaming.get(tc_delta.index, False)
334
+ and tc_function
335
+ and tc_function.arguments
336
+ ):
334
337
  output_index = state.function_call_output_idx[tc_delta.index]
335
338
  yield ResponseFunctionCallArgumentsDeltaEvent(
336
339
  delta=tc_function.arguments,
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import json
4
4
  import time
5
5
  from collections.abc import AsyncIterator
6
- from typing import TYPE_CHECKING, Any, Literal, cast, overload
6
+ from typing import TYPE_CHECKING, Any, Literal, overload
7
7
 
8
8
  from openai import NOT_GIVEN, AsyncOpenAI, AsyncStream
9
9
  from openai.types import ChatModel
@@ -28,6 +28,7 @@ from .chatcmpl_helpers import HEADERS, ChatCmplHelpers
28
28
  from .chatcmpl_stream_handler import ChatCmplStreamHandler
29
29
  from .fake_id import FAKE_RESPONSES_ID
30
30
  from .interface import Model, ModelTracing
31
+ from .openai_responses import Converter as OpenAIResponsesConverter
31
32
 
32
33
  if TYPE_CHECKING:
33
34
  from ..model_settings import ModelSettings
@@ -296,15 +297,27 @@ class OpenAIChatCompletionsModel(Model):
296
297
  if isinstance(ret, ChatCompletion):
297
298
  return ret
298
299
 
300
+ responses_tool_choice = OpenAIResponsesConverter.convert_tool_choice(
301
+ model_settings.tool_choice
302
+ )
303
+ if responses_tool_choice is None or responses_tool_choice == NOT_GIVEN:
304
+ # For Responses API data compatibility with Chat Completions patterns,
305
+ # we need to set "none" if tool_choice is absent.
306
+ # Without this fix, you'll get the following error:
307
+ # pydantic_core._pydantic_core.ValidationError: 4 validation errors for Response
308
+ # tool_choice.literal['none','auto','required']
309
+ # Input should be 'none', 'auto' or 'required'
310
+ # [type=literal_error, input_value=NOT_GIVEN, input_type=NotGiven]
311
+ # see also: https://github.com/openai/openai-agents-python/issues/980
312
+ responses_tool_choice = "auto"
313
+
299
314
  response = Response(
300
315
  id=FAKE_RESPONSES_ID,
301
316
  created_at=time.time(),
302
317
  model=self.model,
303
318
  object="response",
304
319
  output=[],
305
- tool_choice=cast(Literal["auto", "required", "none"], tool_choice)
306
- if tool_choice != NOT_GIVEN
307
- else "auto",
320
+ tool_choice=responses_tool_choice, # type: ignore[arg-type]
308
321
  top_p=model_settings.top_p,
309
322
  temperature=model_settings.temperature,
310
323
  tools=[],
@@ -47,6 +47,8 @@ from .model import (
47
47
  RealtimeModel,
48
48
  RealtimeModelConfig,
49
49
  RealtimeModelListener,
50
+ RealtimePlaybackState,
51
+ RealtimePlaybackTracker,
50
52
  )
51
53
  from .model_events import (
52
54
  RealtimeConnectionStatus,
@@ -139,6 +141,8 @@ __all__ = [
139
141
  "RealtimeModel",
140
142
  "RealtimeModelConfig",
141
143
  "RealtimeModelListener",
144
+ "RealtimePlaybackTracker",
145
+ "RealtimePlaybackState",
142
146
  # Model Events
143
147
  "RealtimeConnectionStatus",
144
148
  "RealtimeModelAudioDoneEvent",
@@ -0,0 +1,47 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from datetime import datetime
5
+
6
+ from ._util import calculate_audio_length_ms
7
+ from .config import RealtimeAudioFormat
8
+
9
+
10
+ @dataclass
11
+ class ModelAudioState:
12
+ initial_received_time: datetime
13
+ audio_length_ms: float
14
+
15
+
16
+ class ModelAudioTracker:
17
+ def __init__(self) -> None:
18
+ # (item_id, item_content_index) -> ModelAudioState
19
+ self._states: dict[tuple[str, int], ModelAudioState] = {}
20
+ self._last_audio_item: tuple[str, int] | None = None
21
+
22
+ def set_audio_format(self, format: RealtimeAudioFormat) -> None:
23
+ """Called when the model wants to set the audio format."""
24
+ self._format = format
25
+
26
+ def on_audio_delta(self, item_id: str, item_content_index: int, audio_bytes: bytes) -> None:
27
+ """Called when an audio delta is received from the model."""
28
+ ms = calculate_audio_length_ms(self._format, audio_bytes)
29
+ new_key = (item_id, item_content_index)
30
+
31
+ self._last_audio_item = new_key
32
+ if new_key not in self._states:
33
+ self._states[new_key] = ModelAudioState(datetime.now(), ms)
34
+ else:
35
+ self._states[new_key].audio_length_ms += ms
36
+
37
+ def on_interrupted(self) -> None:
38
+ """Called when the audio playback has been interrupted."""
39
+ self._last_audio_item = None
40
+
41
+ def get_state(self, item_id: str, item_content_index: int) -> ModelAudioState | None:
42
+ """Called when the model wants to get the current playback state."""
43
+ return self._states.get((item_id, item_content_index))
44
+
45
+ def get_last_audio_item(self) -> tuple[str, int] | None:
46
+ """Called when the model wants to get the last audio item ID and content index."""
47
+ return self._last_audio_item
@@ -0,0 +1,9 @@
1
+ from __future__ import annotations
2
+
3
+ from .config import RealtimeAudioFormat
4
+
5
+
6
+ def calculate_audio_length_ms(format: RealtimeAudioFormat | None, audio_bytes: bytes) -> float:
7
+ if format and format.startswith("g711"):
8
+ return (len(audio_bytes) / 8000) * 1000
9
+ return (len(audio_bytes) / 24 / 2) * 1000
agents/realtime/config.py CHANGED
@@ -94,6 +94,9 @@ class RealtimeSessionModelSettings(TypedDict):
94
94
  voice: NotRequired[str]
95
95
  """The voice to use for audio output."""
96
96
 
97
+ speed: NotRequired[float]
98
+ """The speed of the model's responses."""
99
+
97
100
  input_audio_format: NotRequired[RealtimeAudioFormat]
98
101
  """The format for input audio streams."""
99
102