openai-agents 0.2.10__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of openai-agents might be problematic. Click here for more details.

agents/_debug.py CHANGED
@@ -1,17 +1,28 @@
1
1
  import os
2
2
 
3
3
 
4
- def _debug_flag_enabled(flag: str) -> bool:
4
+ def _debug_flag_enabled(flag: str, default: bool = False) -> bool:
5
5
  flag_value = os.getenv(flag)
6
- return flag_value is not None and (flag_value == "1" or flag_value.lower() == "true")
6
+ if flag_value is None:
7
+ return default
8
+ else:
9
+ return flag_value == "1" or flag_value.lower() == "true"
7
10
 
8
11
 
9
- DONT_LOG_MODEL_DATA = _debug_flag_enabled("OPENAI_AGENTS_DONT_LOG_MODEL_DATA")
12
+ def _load_dont_log_model_data() -> bool:
13
+ return _debug_flag_enabled("OPENAI_AGENTS_DONT_LOG_MODEL_DATA", default=True)
14
+
15
+
16
+ def _load_dont_log_tool_data() -> bool:
17
+ return _debug_flag_enabled("OPENAI_AGENTS_DONT_LOG_TOOL_DATA", default=True)
18
+
19
+
20
+ DONT_LOG_MODEL_DATA = _load_dont_log_model_data()
10
21
  """By default we don't log LLM inputs/outputs, to prevent exposing sensitive information. Set this
11
22
  flag to enable logging them.
12
23
  """
13
24
 
14
- DONT_LOG_TOOL_DATA = _debug_flag_enabled("OPENAI_AGENTS_DONT_LOG_TOOL_DATA")
25
+ DONT_LOG_TOOL_DATA = _load_dont_log_tool_data()
15
26
  """By default we don't log tool call inputs/outputs, to prevent exposing sensitive information. Set
16
27
  this flag to enable logging them.
17
28
  """
agents/_run_impl.py CHANGED
@@ -330,43 +330,40 @@ class RunImpl:
330
330
  ItemHelpers.extract_last_text(message_items[-1].raw_item) if message_items else None
331
331
  )
332
332
 
333
- # There are two possibilities that lead to a final output:
334
- # 1. Structured output schema => always leads to a final output
335
- # 2. Plain text output schema => only leads to a final output if there are no tool calls
336
- if output_schema and not output_schema.is_plain_text() and potential_final_output_text:
337
- final_output = output_schema.validate_json(potential_final_output_text)
338
- return await cls.execute_final_output(
339
- agent=agent,
340
- original_input=original_input,
341
- new_response=new_response,
342
- pre_step_items=pre_step_items,
343
- new_step_items=new_step_items,
344
- final_output=final_output,
345
- hooks=hooks,
346
- context_wrapper=context_wrapper,
347
- )
348
- elif (
349
- not output_schema or output_schema.is_plain_text()
350
- ) and not processed_response.has_tools_or_approvals_to_run():
351
- return await cls.execute_final_output(
352
- agent=agent,
353
- original_input=original_input,
354
- new_response=new_response,
355
- pre_step_items=pre_step_items,
356
- new_step_items=new_step_items,
357
- final_output=potential_final_output_text or "",
358
- hooks=hooks,
359
- context_wrapper=context_wrapper,
360
- )
361
- else:
362
- # If there's no final output, we can just run again
363
- return SingleStepResult(
364
- original_input=original_input,
365
- model_response=new_response,
366
- pre_step_items=pre_step_items,
367
- new_step_items=new_step_items,
368
- next_step=NextStepRunAgain(),
369
- )
333
+ # Generate final output only when there are no pending tool calls or approval requests.
334
+ if not processed_response.has_tools_or_approvals_to_run():
335
+ if output_schema and not output_schema.is_plain_text() and potential_final_output_text:
336
+ final_output = output_schema.validate_json(potential_final_output_text)
337
+ return await cls.execute_final_output(
338
+ agent=agent,
339
+ original_input=original_input,
340
+ new_response=new_response,
341
+ pre_step_items=pre_step_items,
342
+ new_step_items=new_step_items,
343
+ final_output=final_output,
344
+ hooks=hooks,
345
+ context_wrapper=context_wrapper,
346
+ )
347
+ elif not output_schema or output_schema.is_plain_text():
348
+ return await cls.execute_final_output(
349
+ agent=agent,
350
+ original_input=original_input,
351
+ new_response=new_response,
352
+ pre_step_items=pre_step_items,
353
+ new_step_items=new_step_items,
354
+ final_output=potential_final_output_text or "",
355
+ hooks=hooks,
356
+ context_wrapper=context_wrapper,
357
+ )
358
+
359
+ # If there's no final output, we can just run again
360
+ return SingleStepResult(
361
+ original_input=original_input,
362
+ model_response=new_response,
363
+ pre_step_items=pre_step_items,
364
+ new_step_items=new_step_items,
365
+ next_step=NextStepRunAgain(),
366
+ )
370
367
 
371
368
  @classmethod
372
369
  def maybe_reset_tool_choice(
@@ -48,6 +48,7 @@ from ...tracing import generation_span
48
48
  from ...tracing.span_data import GenerationSpanData
49
49
  from ...tracing.spans import Span
50
50
  from ...usage import Usage
51
+ from ...util._json import _to_dump_compatible
51
52
 
52
53
 
53
54
  class InternalChatCompletionMessage(ChatCompletionMessage):
@@ -265,6 +266,8 @@ class LitellmModel(Model):
265
266
  "role": "system",
266
267
  },
267
268
  )
269
+ converted_messages = _to_dump_compatible(converted_messages)
270
+
268
271
  if tracing.include_data():
269
272
  span.span_data.input = converted_messages
270
273
 
@@ -283,13 +286,25 @@ class LitellmModel(Model):
283
286
  for handoff in handoffs:
284
287
  converted_tools.append(Converter.convert_handoff_tool(handoff))
285
288
 
289
+ converted_tools = _to_dump_compatible(converted_tools)
290
+
286
291
  if _debug.DONT_LOG_MODEL_DATA:
287
292
  logger.debug("Calling LLM")
288
293
  else:
294
+ messages_json = json.dumps(
295
+ converted_messages,
296
+ indent=2,
297
+ ensure_ascii=False,
298
+ )
299
+ tools_json = json.dumps(
300
+ converted_tools,
301
+ indent=2,
302
+ ensure_ascii=False,
303
+ )
289
304
  logger.debug(
290
305
  f"Calling Litellm model: {self.model}\n"
291
- f"{json.dumps(converted_messages, indent=2, ensure_ascii=False)}\n"
292
- f"Tools:\n{json.dumps(converted_tools, indent=2, ensure_ascii=False)}\n"
306
+ f"{messages_json}\n"
307
+ f"Tools:\n{tools_json}\n"
293
308
  f"Stream: {stream}\n"
294
309
  f"Tool choice: {tool_choice}\n"
295
310
  f"Response format: {response_format}\n"
@@ -369,9 +384,9 @@ class LitellmConverter:
369
384
  if message.role != "assistant":
370
385
  raise ModelBehaviorError(f"Unsupported role: {message.role}")
371
386
 
372
- tool_calls: list[
373
- ChatCompletionMessageFunctionToolCall | ChatCompletionMessageCustomToolCall
374
- ] | None = (
387
+ tool_calls: (
388
+ list[ChatCompletionMessageFunctionToolCall | ChatCompletionMessageCustomToolCall] | None
389
+ ) = (
375
390
  [LitellmConverter.convert_tool_call_to_openai(tool) for tool in message.tool_calls]
376
391
  if message.tool_calls
377
392
  else None
agents/memory/__init__.py CHANGED
@@ -1,10 +1,12 @@
1
1
  from .openai_conversations_session import OpenAIConversationsSession
2
2
  from .session import Session, SessionABC
3
3
  from .sqlite_session import SQLiteSession
4
+ from .util import SessionInputCallback
4
5
 
5
6
  __all__ = [
6
7
  "Session",
7
8
  "SessionABC",
9
+ "SessionInputCallback",
8
10
  "SQLiteSession",
9
11
  "OpenAIConversationsSession",
10
12
  ]
@@ -19,9 +19,6 @@ async def start_openai_conversations_session(openai_client: AsyncOpenAI | None =
19
19
  return response.id
20
20
 
21
21
 
22
- _EMPTY_SESSION_ID = ""
23
-
24
-
25
22
  class OpenAIConversationsSession(SessionABC):
26
23
  def __init__(
27
24
  self,
agents/memory/util.py ADDED
@@ -0,0 +1,20 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Callable
4
+
5
+ from ..items import TResponseInputItem
6
+ from ..util._types import MaybeAwaitable
7
+
8
+ SessionInputCallback = Callable[
9
+ [list[TResponseInputItem], list[TResponseInputItem]],
10
+ MaybeAwaitable[list[TResponseInputItem]],
11
+ ]
12
+ """A function that combines session history with new input items.
13
+
14
+ Args:
15
+ history_items: The list of items from the session history.
16
+ new_items: The list of new input items for the current turn.
17
+
18
+ Returns:
19
+ A list of combined items to be used as input for the agent. Can be sync or async.
20
+ """
@@ -23,6 +23,7 @@ from ..tracing import generation_span
23
23
  from ..tracing.span_data import GenerationSpanData
24
24
  from ..tracing.spans import Span
25
25
  from ..usage import Usage
26
+ from ..util._json import _to_dump_compatible
26
27
  from .chatcmpl_converter import Converter
27
28
  from .chatcmpl_helpers import HEADERS, ChatCmplHelpers
28
29
  from .chatcmpl_stream_handler import ChatCmplStreamHandler
@@ -237,6 +238,8 @@ class OpenAIChatCompletionsModel(Model):
237
238
  "role": "system",
238
239
  },
239
240
  )
241
+ converted_messages = _to_dump_compatible(converted_messages)
242
+
240
243
  if tracing.include_data():
241
244
  span.span_data.input = converted_messages
242
245
 
@@ -255,12 +258,24 @@ class OpenAIChatCompletionsModel(Model):
255
258
  for handoff in handoffs:
256
259
  converted_tools.append(Converter.convert_handoff_tool(handoff))
257
260
 
261
+ converted_tools = _to_dump_compatible(converted_tools)
262
+
258
263
  if _debug.DONT_LOG_MODEL_DATA:
259
264
  logger.debug("Calling LLM")
260
265
  else:
266
+ messages_json = json.dumps(
267
+ converted_messages,
268
+ indent=2,
269
+ ensure_ascii=False,
270
+ )
271
+ tools_json = json.dumps(
272
+ converted_tools,
273
+ indent=2,
274
+ ensure_ascii=False,
275
+ )
261
276
  logger.debug(
262
- f"{json.dumps(converted_messages, indent=2, ensure_ascii=False)}\n"
263
- f"Tools:\n{json.dumps(converted_tools, indent=2, ensure_ascii=False)}\n"
277
+ f"{messages_json}\n"
278
+ f"Tools:\n{tools_json}\n"
264
279
  f"Stream: {stream}\n"
265
280
  f"Tool choice: {tool_choice}\n"
266
281
  f"Response format: {response_format}\n"
@@ -38,6 +38,7 @@ from ..tool import (
38
38
  )
39
39
  from ..tracing import SpanError, response_span
40
40
  from ..usage import Usage
41
+ from ..util._json import _to_dump_compatible
41
42
  from ..version import __version__
42
43
  from .interface import Model, ModelTracing
43
44
 
@@ -240,6 +241,7 @@ class OpenAIResponsesModel(Model):
240
241
  prompt: ResponsePromptParam | None = None,
241
242
  ) -> Response | AsyncStream[ResponseStreamEvent]:
242
243
  list_input = ItemHelpers.input_to_new_input_list(input)
244
+ list_input = _to_dump_compatible(list_input)
243
245
 
244
246
  parallel_tool_calls = (
245
247
  True
@@ -251,6 +253,7 @@ class OpenAIResponsesModel(Model):
251
253
 
252
254
  tool_choice = Converter.convert_tool_choice(model_settings.tool_choice)
253
255
  converted_tools = Converter.convert_tools(tools, handoffs)
256
+ converted_tools_payload = _to_dump_compatible(converted_tools.tools)
254
257
  response_format = Converter.get_response_format(output_schema)
255
258
 
256
259
  include_set: set[str] = set(converted_tools.includes)
@@ -263,10 +266,20 @@ class OpenAIResponsesModel(Model):
263
266
  if _debug.DONT_LOG_MODEL_DATA:
264
267
  logger.debug("Calling LLM")
265
268
  else:
269
+ input_json = json.dumps(
270
+ list_input,
271
+ indent=2,
272
+ ensure_ascii=False,
273
+ )
274
+ tools_json = json.dumps(
275
+ converted_tools_payload,
276
+ indent=2,
277
+ ensure_ascii=False,
278
+ )
266
279
  logger.debug(
267
280
  f"Calling LLM {self.model} with input:\n"
268
- f"{json.dumps(list_input, indent=2, ensure_ascii=False)}\n"
269
- f"Tools:\n{json.dumps(converted_tools.tools, indent=2, ensure_ascii=False)}\n"
281
+ f"{input_json}\n"
282
+ f"Tools:\n{tools_json}\n"
270
283
  f"Stream: {stream}\n"
271
284
  f"Tool choice: {tool_choice}\n"
272
285
  f"Response format: {response_format}\n"
@@ -290,7 +303,7 @@ class OpenAIResponsesModel(Model):
290
303
  model=self.model,
291
304
  input=list_input,
292
305
  include=include,
293
- tools=converted_tools.tools,
306
+ tools=converted_tools_payload,
294
307
  prompt=self._non_null_or_not_given(prompt),
295
308
  temperature=self._non_null_or_not_given(model_settings.temperature),
296
309
  top_p=self._non_null_or_not_given(model_settings.top_p),
@@ -433,7 +446,7 @@ class Converter:
433
446
  converted_tool = {
434
447
  "type": "web_search",
435
448
  "filters": tool.filters.model_dump() if tool.filters is not None else None, # type: ignore [typeddict-item]
436
- "user_location": tool.user_location, # type: ignore [typeddict-item]
449
+ "user_location": tool.user_location,
437
450
  "search_context_size": tool.search_context_size,
438
451
  }
439
452
  includes = None
agents/realtime/_util.py CHANGED
@@ -4,6 +4,6 @@ from .config import RealtimeAudioFormat
4
4
 
5
5
 
6
6
  def calculate_audio_length_ms(format: RealtimeAudioFormat | None, audio_bytes: bytes) -> float:
7
- if format and format.startswith("g711"):
7
+ if format and isinstance(format, str) and format.startswith("g711"):
8
8
  return (len(audio_bytes) / 8000) * 1000
9
9
  return (len(audio_bytes) / 24 / 2) * 1000
agents/realtime/agent.py CHANGED
@@ -6,6 +6,8 @@ from collections.abc import Awaitable
6
6
  from dataclasses import dataclass, field
7
7
  from typing import Any, Callable, Generic, cast
8
8
 
9
+ from agents.prompts import Prompt
10
+
9
11
  from ..agent import AgentBase
10
12
  from ..guardrail import OutputGuardrail
11
13
  from ..handoffs import Handoff
@@ -55,6 +57,11 @@ class RealtimeAgent(AgentBase, Generic[TContext]):
55
57
  return a string.
56
58
  """
57
59
 
60
+ prompt: Prompt | None = None
61
+ """A prompt object. Prompts allow you to dynamically configure the instructions, tools
62
+ and other config for an agent outside of your code. Only usable with OpenAI models.
63
+ """
64
+
58
65
  handoffs: list[RealtimeAgent[Any] | Handoff[TContext, RealtimeAgent[Any]]] = field(
59
66
  default_factory=list
60
67
  )
@@ -0,0 +1,29 @@
1
+ from __future__ import annotations
2
+
3
+ from openai.types.realtime.realtime_audio_formats import (
4
+ AudioPCM,
5
+ AudioPCMA,
6
+ AudioPCMU,
7
+ RealtimeAudioFormats,
8
+ )
9
+
10
+ from ..logger import logger
11
+
12
+
13
+ def to_realtime_audio_format(
14
+ input_audio_format: str | RealtimeAudioFormats | None,
15
+ ) -> RealtimeAudioFormats | None:
16
+ format: RealtimeAudioFormats | None = None
17
+ if input_audio_format is not None:
18
+ if isinstance(input_audio_format, str):
19
+ if input_audio_format in ["pcm16", "audio/pcm", "pcm"]:
20
+ format = AudioPCM(type="audio/pcm", rate=24000)
21
+ elif input_audio_format in ["g711_ulaw", "audio/pcmu", "pcmu"]:
22
+ format = AudioPCMU(type="audio/pcmu")
23
+ elif input_audio_format in ["g711_alaw", "audio/pcma", "pcma"]:
24
+ format = AudioPCMA(type="audio/pcma")
25
+ else:
26
+ logger.debug(f"Unknown input_audio_format: {input_audio_format}")
27
+ else:
28
+ format = input_audio_format
29
+ return format
agents/realtime/config.py CHANGED
@@ -6,8 +6,13 @@ from typing import (
6
6
  Union,
7
7
  )
8
8
 
9
+ from openai.types.realtime.realtime_audio_formats import (
10
+ RealtimeAudioFormats as OpenAIRealtimeAudioFormats,
11
+ )
9
12
  from typing_extensions import NotRequired, TypeAlias, TypedDict
10
13
 
14
+ from agents.prompts import Prompt
15
+
11
16
  from ..guardrail import OutputGuardrail
12
17
  from ..handoffs import Handoff
13
18
  from ..model_settings import ToolChoice
@@ -15,6 +20,8 @@ from ..tool import Tool
15
20
 
16
21
  RealtimeModelName: TypeAlias = Union[
17
22
  Literal[
23
+ "gpt-realtime",
24
+ "gpt-realtime-2025-08-28",
18
25
  "gpt-4o-realtime-preview",
19
26
  "gpt-4o-mini-realtime-preview",
20
27
  "gpt-4o-realtime-preview-2025-06-03",
@@ -91,6 +98,9 @@ class RealtimeSessionModelSettings(TypedDict):
91
98
  instructions: NotRequired[str]
92
99
  """System instructions for the model."""
93
100
 
101
+ prompt: NotRequired[Prompt]
102
+ """The prompt to use for the model."""
103
+
94
104
  modalities: NotRequired[list[Literal["text", "audio"]]]
95
105
  """The modalities the model should support."""
96
106
 
@@ -100,10 +110,10 @@ class RealtimeSessionModelSettings(TypedDict):
100
110
  speed: NotRequired[float]
101
111
  """The speed of the model's responses."""
102
112
 
103
- input_audio_format: NotRequired[RealtimeAudioFormat]
113
+ input_audio_format: NotRequired[RealtimeAudioFormat | OpenAIRealtimeAudioFormats]
104
114
  """The format for input audio streams."""
105
115
 
106
- output_audio_format: NotRequired[RealtimeAudioFormat]
116
+ output_audio_format: NotRequired[RealtimeAudioFormat | OpenAIRealtimeAudioFormats]
107
117
  """The format for output audio streams."""
108
118
 
109
119
  input_audio_transcription: NotRequired[RealtimeInputAudioTranscriptionConfig]
@@ -177,6 +187,14 @@ class RealtimeUserInputText(TypedDict):
177
187
  """The text content from the user."""
178
188
 
179
189
 
190
+ class RealtimeUserInputImage(TypedDict, total=False):
191
+ """An image input from the user (Realtime)."""
192
+
193
+ type: Literal["input_image"]
194
+ image_url: str
195
+ detail: NotRequired[Literal["auto", "low", "high"] | str]
196
+
197
+
180
198
  class RealtimeUserInputMessage(TypedDict):
181
199
  """A message input from the user."""
182
200
 
@@ -186,8 +204,8 @@ class RealtimeUserInputMessage(TypedDict):
186
204
  role: Literal["user"]
187
205
  """The role identifier for user messages."""
188
206
 
189
- content: list[RealtimeUserInputText]
190
- """List of text content items in the message."""
207
+ content: list[RealtimeUserInputText | RealtimeUserInputImage]
208
+ """List of content items (text and image) in the message."""
191
209
 
192
210
 
193
211
  RealtimeUserInput: TypeAlias = Union[str, RealtimeUserInputMessage]
agents/realtime/items.py CHANGED
@@ -34,6 +34,22 @@ class InputAudio(BaseModel):
34
34
  model_config = ConfigDict(extra="allow")
35
35
 
36
36
 
37
+ class InputImage(BaseModel):
38
+ """Image input content for realtime messages."""
39
+
40
+ type: Literal["input_image"] = "input_image"
41
+ """The type identifier for image input."""
42
+
43
+ image_url: str | None = None
44
+ """Data/remote URL string (data:... or https:...)."""
45
+
46
+ detail: str | None = None
47
+ """Optional detail hint (e.g., 'auto', 'high', 'low')."""
48
+
49
+ # Allow extra data (e.g., `detail`)
50
+ model_config = ConfigDict(extra="allow")
51
+
52
+
37
53
  class AssistantText(BaseModel):
38
54
  """Text content from the assistant in realtime responses."""
39
55
 
@@ -100,7 +116,7 @@ class UserMessageItem(BaseModel):
100
116
  role: Literal["user"] = "user"
101
117
  """The role identifier for user messages."""
102
118
 
103
- content: list[Annotated[InputText | InputAudio, Field(discriminator="type")]]
119
+ content: list[Annotated[InputText | InputAudio | InputImage, Field(discriminator="type")]]
104
120
  """List of content items, can be text or audio."""
105
121
 
106
122
  # Allow extra data
agents/realtime/model.py CHANGED
@@ -118,6 +118,12 @@ class RealtimeModelConfig(TypedDict):
118
118
  the OpenAI Realtime model will use the default OpenAI WebSocket URL.
119
119
  """
120
120
 
121
+ headers: NotRequired[dict[str, str]]
122
+ """The headers to use when connecting. If unset, the model will use a sane default.
123
+ Note that, when you set this, authorization header won't be set under the hood.
124
+ e.g., {"api-key": "your api key here"} for Azure OpenAI Realtime WebSocket connections.
125
+ """
126
+
121
127
  initial_model_settings: NotRequired[RealtimeSessionModelSettings]
122
128
  """The initial model settings to use when connecting."""
123
129
 
@@ -24,12 +24,26 @@ class RealtimeModelInputTextContent(TypedDict):
24
24
  text: str
25
25
 
26
26
 
27
+ class RealtimeModelInputImageContent(TypedDict, total=False):
28
+ """An image to be sent to the model.
29
+
30
+ The Realtime API expects `image_url` to be a string data/remote URL.
31
+ """
32
+
33
+ type: Literal["input_image"]
34
+ image_url: str
35
+ """String URL (data:... or https:...)."""
36
+
37
+ detail: NotRequired[str]
38
+ """Optional detail hint such as 'high', 'low', or 'auto'."""
39
+
40
+
27
41
  class RealtimeModelUserInputMessage(TypedDict):
28
42
  """A message to be sent to the model."""
29
43
 
30
44
  type: Literal["message"]
31
45
  role: Literal["user"]
32
- content: list[RealtimeModelInputTextContent]
46
+ content: list[RealtimeModelInputTextContent | RealtimeModelInputImageContent]
33
47
 
34
48
 
35
49
  RealtimeModelUserInput: TypeAlias = Union[str, RealtimeModelUserInputMessage]