openai-agents 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of openai-agents might be problematic. Click here for more details.

agents/realtime/config.py CHANGED
@@ -9,6 +9,7 @@ from typing import (
9
9
  from typing_extensions import NotRequired, TypeAlias, TypedDict
10
10
 
11
11
  from ..guardrail import OutputGuardrail
12
+ from ..handoffs import Handoff
12
13
  from ..model_settings import ToolChoice
13
14
  from ..tool import Tool
14
15
 
@@ -27,52 +28,95 @@ RealtimeModelName: TypeAlias = Union[
27
28
 
28
29
 
29
30
  RealtimeAudioFormat: TypeAlias = Union[Literal["pcm16", "g711_ulaw", "g711_alaw"], str]
31
+ """The audio format for realtime audio streams."""
30
32
 
31
33
 
32
34
  class RealtimeClientMessage(TypedDict):
33
35
  """A raw message to be sent to the model."""
34
36
 
35
37
  type: str # explicitly required
38
+ """The type of the message."""
39
+
36
40
  other_data: NotRequired[dict[str, Any]]
37
41
  """Merged into the message body."""
38
42
 
39
43
 
40
44
  class RealtimeInputAudioTranscriptionConfig(TypedDict):
45
+ """Configuration for audio transcription in realtime sessions."""
46
+
41
47
  language: NotRequired[str]
48
+ """The language code for transcription."""
49
+
42
50
  model: NotRequired[Literal["gpt-4o-transcribe", "gpt-4o-mini-transcribe", "whisper-1"] | str]
51
+ """The transcription model to use."""
52
+
43
53
  prompt: NotRequired[str]
54
+ """An optional prompt to guide transcription."""
44
55
 
45
56
 
46
57
  class RealtimeTurnDetectionConfig(TypedDict):
47
58
  """Turn detection config. Allows extra vendor keys if needed."""
48
59
 
49
60
  type: NotRequired[Literal["semantic_vad", "server_vad"]]
61
+ """The type of voice activity detection to use."""
62
+
50
63
  create_response: NotRequired[bool]
64
+ """Whether to create a response when a turn is detected."""
65
+
51
66
  eagerness: NotRequired[Literal["auto", "low", "medium", "high"]]
67
+ """How eagerly to detect turn boundaries."""
68
+
52
69
  interrupt_response: NotRequired[bool]
70
+ """Whether to allow interrupting the assistant's response."""
71
+
53
72
  prefix_padding_ms: NotRequired[int]
73
+ """Padding time in milliseconds before turn detection."""
74
+
54
75
  silence_duration_ms: NotRequired[int]
76
+ """Duration of silence in milliseconds to trigger turn detection."""
77
+
55
78
  threshold: NotRequired[float]
79
+ """The threshold for voice activity detection."""
56
80
 
57
81
 
58
82
  class RealtimeSessionModelSettings(TypedDict):
59
83
  """Model settings for a realtime model session."""
60
84
 
61
85
  model_name: NotRequired[RealtimeModelName]
86
+ """The name of the realtime model to use."""
62
87
 
63
88
  instructions: NotRequired[str]
89
+ """System instructions for the model."""
90
+
64
91
  modalities: NotRequired[list[Literal["text", "audio"]]]
92
+ """The modalities the model should support."""
93
+
65
94
  voice: NotRequired[str]
95
+ """The voice to use for audio output."""
66
96
 
67
97
  input_audio_format: NotRequired[RealtimeAudioFormat]
98
+ """The format for input audio streams."""
99
+
68
100
  output_audio_format: NotRequired[RealtimeAudioFormat]
101
+ """The format for output audio streams."""
102
+
69
103
  input_audio_transcription: NotRequired[RealtimeInputAudioTranscriptionConfig]
104
+ """Configuration for transcribing input audio."""
105
+
70
106
  turn_detection: NotRequired[RealtimeTurnDetectionConfig]
107
+ """Configuration for detecting conversation turns."""
71
108
 
72
109
  tool_choice: NotRequired[ToolChoice]
110
+ """How the model should choose which tools to call."""
111
+
73
112
  tools: NotRequired[list[Tool]]
113
+ """List of tools available to the model."""
114
+
115
+ handoffs: NotRequired[list[Handoff]]
116
+ """List of handoff configurations."""
74
117
 
75
118
  tracing: NotRequired[RealtimeModelTracingConfig | None]
119
+ """Configuration for request tracing."""
76
120
 
77
121
 
78
122
  class RealtimeGuardrailsSettings(TypedDict):
@@ -100,7 +144,10 @@ class RealtimeModelTracingConfig(TypedDict):
100
144
 
101
145
 
102
146
  class RealtimeRunConfig(TypedDict):
147
+ """Configuration for running a realtime agent session."""
148
+
103
149
  model_settings: NotRequired[RealtimeSessionModelSettings]
150
+ """Settings for the realtime model session."""
104
151
 
105
152
  output_guardrails: NotRequired[list[OutputGuardrail[Any]]]
106
153
  """List of output guardrails to run on the agent's responses."""
@@ -115,14 +162,27 @@ class RealtimeRunConfig(TypedDict):
115
162
 
116
163
 
117
164
  class RealtimeUserInputText(TypedDict):
165
+ """A text input from the user."""
166
+
118
167
  type: Literal["input_text"]
168
+ """The type identifier for text input."""
169
+
119
170
  text: str
171
+ """The text content from the user."""
120
172
 
121
173
 
122
174
  class RealtimeUserInputMessage(TypedDict):
175
+ """A message input from the user."""
176
+
123
177
  type: Literal["message"]
178
+ """The type identifier for message inputs."""
179
+
124
180
  role: Literal["user"]
181
+ """The role identifier for user messages."""
182
+
125
183
  content: list[RealtimeUserInputText]
184
+ """List of text content items in the message."""
126
185
 
127
186
 
128
187
  RealtimeUserInput: TypeAlias = Union[str, RealtimeUserInputMessage]
188
+ """User input that can be a string or structured message."""
@@ -0,0 +1,165 @@
1
+ from __future__ import annotations
2
+
3
+ import inspect
4
+ from typing import TYPE_CHECKING, Any, Callable, cast, overload
5
+
6
+ from pydantic import TypeAdapter
7
+ from typing_extensions import TypeVar
8
+
9
+ from ..exceptions import ModelBehaviorError, UserError
10
+ from ..handoffs import Handoff
11
+ from ..run_context import RunContextWrapper, TContext
12
+ from ..strict_schema import ensure_strict_json_schema
13
+ from ..tracing.spans import SpanError
14
+ from ..util import _error_tracing, _json
15
+ from ..util._types import MaybeAwaitable
16
+
17
+ if TYPE_CHECKING:
18
+ from ..agent import AgentBase
19
+ from . import RealtimeAgent
20
+
21
+
22
+ # The handoff input type is the type of data passed when the agent is called via a handoff.
23
+ THandoffInput = TypeVar("THandoffInput", default=Any)
24
+
25
+ OnHandoffWithInput = Callable[[RunContextWrapper[Any], THandoffInput], Any]
26
+ OnHandoffWithoutInput = Callable[[RunContextWrapper[Any]], Any]
27
+
28
+
29
+ @overload
30
+ def realtime_handoff(
31
+ agent: RealtimeAgent[TContext],
32
+ *,
33
+ tool_name_override: str | None = None,
34
+ tool_description_override: str | None = None,
35
+ is_enabled: bool
36
+ | Callable[[RunContextWrapper[Any], RealtimeAgent[Any]], MaybeAwaitable[bool]] = True,
37
+ ) -> Handoff[TContext, RealtimeAgent[TContext]]: ...
38
+
39
+
40
+ @overload
41
+ def realtime_handoff(
42
+ agent: RealtimeAgent[TContext],
43
+ *,
44
+ on_handoff: OnHandoffWithInput[THandoffInput],
45
+ input_type: type[THandoffInput],
46
+ tool_description_override: str | None = None,
47
+ tool_name_override: str | None = None,
48
+ is_enabled: bool
49
+ | Callable[[RunContextWrapper[Any], RealtimeAgent[Any]], MaybeAwaitable[bool]] = True,
50
+ ) -> Handoff[TContext, RealtimeAgent[TContext]]: ...
51
+
52
+
53
+ @overload
54
+ def realtime_handoff(
55
+ agent: RealtimeAgent[TContext],
56
+ *,
57
+ on_handoff: OnHandoffWithoutInput,
58
+ tool_description_override: str | None = None,
59
+ tool_name_override: str | None = None,
60
+ is_enabled: bool
61
+ | Callable[[RunContextWrapper[Any], RealtimeAgent[Any]], MaybeAwaitable[bool]] = True,
62
+ ) -> Handoff[TContext, RealtimeAgent[TContext]]: ...
63
+
64
+
65
+ def realtime_handoff(
66
+ agent: RealtimeAgent[TContext],
67
+ tool_name_override: str | None = None,
68
+ tool_description_override: str | None = None,
69
+ on_handoff: OnHandoffWithInput[THandoffInput] | OnHandoffWithoutInput | None = None,
70
+ input_type: type[THandoffInput] | None = None,
71
+ is_enabled: bool
72
+ | Callable[[RunContextWrapper[Any], RealtimeAgent[Any]], MaybeAwaitable[bool]] = True,
73
+ ) -> Handoff[TContext, RealtimeAgent[TContext]]:
74
+ """Create a handoff from a RealtimeAgent.
75
+
76
+ Args:
77
+ agent: The RealtimeAgent to handoff to, or a function that returns a RealtimeAgent.
78
+ tool_name_override: Optional override for the name of the tool that represents the handoff.
79
+ tool_description_override: Optional override for the description of the tool that
80
+ represents the handoff.
81
+ on_handoff: A function that runs when the handoff is invoked.
82
+ input_type: the type of the input to the handoff. If provided, the input will be validated
83
+ against this type. Only relevant if you pass a function that takes an input.
84
+ is_enabled: Whether the handoff is enabled. Can be a bool or a callable that takes the run
85
+ context and agent and returns whether the handoff is enabled. Disabled handoffs are
86
+ hidden from the LLM at runtime.
87
+
88
+ Note: input_filter is not supported for RealtimeAgent handoffs.
89
+ """
90
+ assert (on_handoff and input_type) or not (on_handoff and input_type), (
91
+ "You must provide either both on_handoff and input_type, or neither"
92
+ )
93
+ type_adapter: TypeAdapter[Any] | None
94
+ if input_type is not None:
95
+ assert callable(on_handoff), "on_handoff must be callable"
96
+ sig = inspect.signature(on_handoff)
97
+ if len(sig.parameters) != 2:
98
+ raise UserError("on_handoff must take two arguments: context and input")
99
+
100
+ type_adapter = TypeAdapter(input_type)
101
+ input_json_schema = type_adapter.json_schema()
102
+ else:
103
+ type_adapter = None
104
+ input_json_schema = {}
105
+ if on_handoff is not None:
106
+ sig = inspect.signature(on_handoff)
107
+ if len(sig.parameters) != 1:
108
+ raise UserError("on_handoff must take one argument: context")
109
+
110
+ async def _invoke_handoff(
111
+ ctx: RunContextWrapper[Any], input_json: str | None = None
112
+ ) -> RealtimeAgent[TContext]:
113
+ if input_type is not None and type_adapter is not None:
114
+ if input_json is None:
115
+ _error_tracing.attach_error_to_current_span(
116
+ SpanError(
117
+ message="Handoff function expected non-null input, but got None",
118
+ data={"details": "input_json is None"},
119
+ )
120
+ )
121
+ raise ModelBehaviorError("Handoff function expected non-null input, but got None")
122
+
123
+ validated_input = _json.validate_json(
124
+ json_str=input_json,
125
+ type_adapter=type_adapter,
126
+ partial=False,
127
+ )
128
+ input_func = cast(OnHandoffWithInput[THandoffInput], on_handoff)
129
+ if inspect.iscoroutinefunction(input_func):
130
+ await input_func(ctx, validated_input)
131
+ else:
132
+ input_func(ctx, validated_input)
133
+ elif on_handoff is not None:
134
+ no_input_func = cast(OnHandoffWithoutInput, on_handoff)
135
+ if inspect.iscoroutinefunction(no_input_func):
136
+ await no_input_func(ctx)
137
+ else:
138
+ no_input_func(ctx)
139
+
140
+ return agent
141
+
142
+ tool_name = tool_name_override or Handoff.default_tool_name(agent)
143
+ tool_description = tool_description_override or Handoff.default_tool_description(agent)
144
+
145
+ # Always ensure the input JSON schema is in strict mode
146
+ # If there is a need, we can make this configurable in the future
147
+ input_json_schema = ensure_strict_json_schema(input_json_schema)
148
+
149
+ async def _is_enabled(ctx: RunContextWrapper[Any], agent_base: AgentBase[Any]) -> bool:
150
+ assert callable(is_enabled), "is_enabled must be non-null here"
151
+ assert isinstance(agent_base, RealtimeAgent), "Can't handoff to a non-RealtimeAgent"
152
+ result = is_enabled(ctx, agent_base)
153
+ if inspect.isawaitable(result):
154
+ return await result
155
+ return result
156
+
157
+ return Handoff(
158
+ tool_name=tool_name,
159
+ tool_description=tool_description,
160
+ input_json_schema=input_json_schema,
161
+ on_invoke_handoff=_invoke_handoff,
162
+ input_filter=None, # Not supported for RealtimeAgent handoffs
163
+ agent_name=agent.name,
164
+ is_enabled=_is_enabled if callable(is_enabled) else is_enabled,
165
+ )
agents/realtime/items.py CHANGED
@@ -6,59 +6,127 @@ from pydantic import BaseModel, ConfigDict, Field
6
6
 
7
7
 
8
8
  class InputText(BaseModel):
9
+ """Text input content for realtime messages."""
10
+
9
11
  type: Literal["input_text"] = "input_text"
12
+ """The type identifier for text input."""
13
+
10
14
  text: str | None = None
15
+ """The text content."""
11
16
 
12
17
  # Allow extra data
13
18
  model_config = ConfigDict(extra="allow")
14
19
 
15
20
 
16
21
  class InputAudio(BaseModel):
22
+ """Audio input content for realtime messages."""
23
+
17
24
  type: Literal["input_audio"] = "input_audio"
25
+ """The type identifier for audio input."""
26
+
18
27
  audio: str | None = None
28
+ """The base64-encoded audio data."""
29
+
19
30
  transcript: str | None = None
31
+ """The transcript of the audio, if available."""
20
32
 
21
33
  # Allow extra data
22
34
  model_config = ConfigDict(extra="allow")
23
35
 
24
36
 
25
37
  class AssistantText(BaseModel):
38
+ """Text content from the assistant in realtime responses."""
39
+
26
40
  type: Literal["text"] = "text"
41
+ """The type identifier for text content."""
42
+
27
43
  text: str | None = None
44
+ """The text content from the assistant."""
45
+
46
+ # Allow extra data
47
+ model_config = ConfigDict(extra="allow")
48
+
49
+
50
+ class AssistantAudio(BaseModel):
51
+ """Audio content from the assistant in realtime responses."""
52
+
53
+ type: Literal["audio"] = "audio"
54
+ """The type identifier for audio content."""
55
+
56
+ audio: str | None = None
57
+ """The base64-encoded audio data from the assistant."""
58
+
59
+ transcript: str | None = None
60
+ """The transcript of the audio response."""
28
61
 
29
62
  # Allow extra data
30
63
  model_config = ConfigDict(extra="allow")
31
64
 
32
65
 
33
66
  class SystemMessageItem(BaseModel):
67
+ """A system message item in realtime conversations."""
68
+
34
69
  item_id: str
70
+ """Unique identifier for this message item."""
71
+
35
72
  previous_item_id: str | None = None
73
+ """ID of the previous item in the conversation."""
74
+
36
75
  type: Literal["message"] = "message"
76
+ """The type identifier for message items."""
77
+
37
78
  role: Literal["system"] = "system"
79
+ """The role identifier for system messages."""
80
+
38
81
  content: list[InputText]
82
+ """List of text content for the system message."""
39
83
 
40
84
  # Allow extra data
41
85
  model_config = ConfigDict(extra="allow")
42
86
 
43
87
 
44
88
  class UserMessageItem(BaseModel):
89
+ """A user message item in realtime conversations."""
90
+
45
91
  item_id: str
92
+ """Unique identifier for this message item."""
93
+
46
94
  previous_item_id: str | None = None
95
+ """ID of the previous item in the conversation."""
96
+
47
97
  type: Literal["message"] = "message"
98
+ """The type identifier for message items."""
99
+
48
100
  role: Literal["user"] = "user"
101
+ """The role identifier for user messages."""
102
+
49
103
  content: list[Annotated[InputText | InputAudio, Field(discriminator="type")]]
104
+ """List of content items, can be text or audio."""
50
105
 
51
106
  # Allow extra data
52
107
  model_config = ConfigDict(extra="allow")
53
108
 
54
109
 
55
110
  class AssistantMessageItem(BaseModel):
111
+ """An assistant message item in realtime conversations."""
112
+
56
113
  item_id: str
114
+ """Unique identifier for this message item."""
115
+
57
116
  previous_item_id: str | None = None
117
+ """ID of the previous item in the conversation."""
118
+
58
119
  type: Literal["message"] = "message"
120
+ """The type identifier for message items."""
121
+
59
122
  role: Literal["assistant"] = "assistant"
123
+ """The role identifier for assistant messages."""
124
+
60
125
  status: Literal["in_progress", "completed", "incomplete"] | None = None
61
- content: list[AssistantText]
126
+ """The status of the assistant's response."""
127
+
128
+ content: list[Annotated[AssistantText | AssistantAudio, Field(discriminator="type")]]
129
+ """List of content items from the assistant, can be text or audio."""
62
130
 
63
131
  # Allow extra data
64
132
  model_config = ConfigDict(extra="allow")
@@ -68,24 +136,49 @@ RealtimeMessageItem = Annotated[
68
136
  Union[SystemMessageItem, UserMessageItem, AssistantMessageItem],
69
137
  Field(discriminator="role"),
70
138
  ]
139
+ """A message item that can be from system, user, or assistant."""
71
140
 
72
141
 
73
142
  class RealtimeToolCallItem(BaseModel):
143
+ """A tool call item in realtime conversations."""
144
+
74
145
  item_id: str
146
+ """Unique identifier for this tool call item."""
147
+
75
148
  previous_item_id: str | None = None
149
+ """ID of the previous item in the conversation."""
150
+
151
+ call_id: str | None
152
+ """The call ID for this tool invocation."""
153
+
76
154
  type: Literal["function_call"] = "function_call"
155
+ """The type identifier for function call items."""
156
+
77
157
  status: Literal["in_progress", "completed"]
158
+ """The status of the tool call execution."""
159
+
78
160
  arguments: str
161
+ """The JSON string arguments passed to the tool."""
162
+
79
163
  name: str
164
+ """The name of the tool being called."""
165
+
80
166
  output: str | None = None
167
+ """The output result from the tool execution."""
81
168
 
82
169
  # Allow extra data
83
170
  model_config = ConfigDict(extra="allow")
84
171
 
85
172
 
86
173
  RealtimeItem = Union[RealtimeMessageItem, RealtimeToolCallItem]
174
+ """A realtime item that can be a message or tool call."""
87
175
 
88
176
 
89
177
  class RealtimeResponse(BaseModel):
178
+ """A response from the realtime model."""
179
+
90
180
  id: str
181
+ """Unique identifier for this response."""
182
+
91
183
  output: list[RealtimeMessageItem]
184
+ """List of message items in the response."""