openai-agents 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of openai-agents might be problematic. Click here for more details.
- agents/__init__.py +5 -1
- agents/_run_impl.py +5 -1
- agents/agent.py +61 -29
- agents/function_schema.py +11 -1
- agents/guardrail.py +5 -1
- agents/lifecycle.py +26 -17
- agents/mcp/server.py +43 -11
- agents/mcp/util.py +5 -6
- agents/memory/__init__.py +3 -0
- agents/memory/session.py +369 -0
- agents/model_settings.py +15 -7
- agents/models/chatcmpl_converter.py +19 -2
- agents/models/chatcmpl_stream_handler.py +1 -1
- agents/models/openai_responses.py +11 -4
- agents/realtime/README.md +3 -0
- agents/realtime/__init__.py +174 -0
- agents/realtime/agent.py +80 -0
- agents/realtime/config.py +128 -0
- agents/realtime/events.py +216 -0
- agents/realtime/items.py +91 -0
- agents/realtime/model.py +69 -0
- agents/realtime/model_events.py +159 -0
- agents/realtime/model_inputs.py +100 -0
- agents/realtime/openai_realtime.py +584 -0
- agents/realtime/runner.py +118 -0
- agents/realtime/session.py +502 -0
- agents/run.py +106 -4
- agents/tool.py +6 -7
- agents/tool_context.py +16 -3
- agents/voice/models/openai_stt.py +1 -1
- agents/voice/pipeline.py +6 -0
- agents/voice/workflow.py +8 -0
- {openai_agents-0.1.0.dist-info → openai_agents-0.2.0.dist-info}/METADATA +120 -3
- {openai_agents-0.1.0.dist-info → openai_agents-0.2.0.dist-info}/RECORD +36 -22
- {openai_agents-0.1.0.dist-info → openai_agents-0.2.0.dist-info}/WHEEL +0 -0
- {openai_agents-0.1.0.dist-info → openai_agents-0.2.0.dist-info}/licenses/LICENSE +0 -0
agents/realtime/agent.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import dataclasses
|
|
4
|
+
import inspect
|
|
5
|
+
from collections.abc import Awaitable
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any, Callable, Generic, cast
|
|
8
|
+
|
|
9
|
+
from ..agent import AgentBase
|
|
10
|
+
from ..lifecycle import AgentHooksBase, RunHooksBase
|
|
11
|
+
from ..logger import logger
|
|
12
|
+
from ..run_context import RunContextWrapper, TContext
|
|
13
|
+
from ..util._types import MaybeAwaitable
|
|
14
|
+
|
|
15
|
+
RealtimeAgentHooks = AgentHooksBase[TContext, "RealtimeAgent[TContext]"]
|
|
16
|
+
"""Agent hooks for `RealtimeAgent`s."""
|
|
17
|
+
|
|
18
|
+
RealtimeRunHooks = RunHooksBase[TContext, "RealtimeAgent[TContext]"]
|
|
19
|
+
"""Run hooks for `RealtimeAgent`s."""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class RealtimeAgent(AgentBase, Generic[TContext]):
|
|
24
|
+
"""A specialized agent instance that is meant to be used within a `RealtimeSession` to build
|
|
25
|
+
voice agents. Due to the nature of this agent, some configuration options are not supported
|
|
26
|
+
that are supported by regular `Agent` instances. For example:
|
|
27
|
+
- `model` choice is not supported, as all RealtimeAgents will be handled by the same model
|
|
28
|
+
within a `RealtimeSession`.
|
|
29
|
+
- `modelSettings` is not supported, as all RealtimeAgents will be handled by the same model
|
|
30
|
+
within a `RealtimeSession`.
|
|
31
|
+
- `outputType` is not supported, as RealtimeAgents do not support structured outputs.
|
|
32
|
+
- `toolUseBehavior` is not supported, as all RealtimeAgents will be handled by the same model
|
|
33
|
+
within a `RealtimeSession`.
|
|
34
|
+
- `voice` can be configured on an `Agent` level; however, it cannot be changed after the first
|
|
35
|
+
agent within a `RealtimeSession` has spoken.
|
|
36
|
+
|
|
37
|
+
See `AgentBase` for base parameters that are shared with `Agent`s.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
instructions: (
|
|
41
|
+
str
|
|
42
|
+
| Callable[
|
|
43
|
+
[RunContextWrapper[TContext], RealtimeAgent[TContext]],
|
|
44
|
+
MaybeAwaitable[str],
|
|
45
|
+
]
|
|
46
|
+
| None
|
|
47
|
+
) = None
|
|
48
|
+
"""The instructions for the agent. Will be used as the "system prompt" when this agent is
|
|
49
|
+
invoked. Describes what the agent should do, and how it responds.
|
|
50
|
+
|
|
51
|
+
Can either be a string, or a function that dynamically generates instructions for the agent. If
|
|
52
|
+
you provide a function, it will be called with the context and the agent instance. It must
|
|
53
|
+
return a string.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
hooks: RealtimeAgentHooks | None = None
|
|
57
|
+
"""A class that receives callbacks on various lifecycle events for this agent.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def clone(self, **kwargs: Any) -> RealtimeAgent[TContext]:
|
|
61
|
+
"""Make a copy of the agent, with the given arguments changed. For example, you could do:
|
|
62
|
+
```
|
|
63
|
+
new_agent = agent.clone(instructions="New instructions")
|
|
64
|
+
```
|
|
65
|
+
"""
|
|
66
|
+
return dataclasses.replace(self, **kwargs)
|
|
67
|
+
|
|
68
|
+
async def get_system_prompt(self, run_context: RunContextWrapper[TContext]) -> str | None:
|
|
69
|
+
"""Get the system prompt for the agent."""
|
|
70
|
+
if isinstance(self.instructions, str):
|
|
71
|
+
return self.instructions
|
|
72
|
+
elif callable(self.instructions):
|
|
73
|
+
if inspect.iscoroutinefunction(self.instructions):
|
|
74
|
+
return await cast(Awaitable[str], self.instructions(run_context, self))
|
|
75
|
+
else:
|
|
76
|
+
return cast(str, self.instructions(run_context, self))
|
|
77
|
+
elif self.instructions is not None:
|
|
78
|
+
logger.error(f"Instructions must be a string or a function, got {self.instructions}")
|
|
79
|
+
|
|
80
|
+
return None
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import (
|
|
4
|
+
Any,
|
|
5
|
+
Literal,
|
|
6
|
+
Union,
|
|
7
|
+
)
|
|
8
|
+
|
|
9
|
+
from typing_extensions import NotRequired, TypeAlias, TypedDict
|
|
10
|
+
|
|
11
|
+
from ..guardrail import OutputGuardrail
|
|
12
|
+
from ..model_settings import ToolChoice
|
|
13
|
+
from ..tool import Tool
|
|
14
|
+
|
|
15
|
+
RealtimeModelName: TypeAlias = Union[
|
|
16
|
+
Literal[
|
|
17
|
+
"gpt-4o-realtime-preview",
|
|
18
|
+
"gpt-4o-mini-realtime-preview",
|
|
19
|
+
"gpt-4o-realtime-preview-2025-06-03",
|
|
20
|
+
"gpt-4o-realtime-preview-2024-12-17",
|
|
21
|
+
"gpt-4o-realtime-preview-2024-10-01",
|
|
22
|
+
"gpt-4o-mini-realtime-preview-2024-12-17",
|
|
23
|
+
],
|
|
24
|
+
str,
|
|
25
|
+
]
|
|
26
|
+
"""The name of a realtime model."""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
RealtimeAudioFormat: TypeAlias = Union[Literal["pcm16", "g711_ulaw", "g711_alaw"], str]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class RealtimeClientMessage(TypedDict):
|
|
33
|
+
"""A raw message to be sent to the model."""
|
|
34
|
+
|
|
35
|
+
type: str # explicitly required
|
|
36
|
+
other_data: NotRequired[dict[str, Any]]
|
|
37
|
+
"""Merged into the message body."""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class RealtimeInputAudioTranscriptionConfig(TypedDict):
|
|
41
|
+
language: NotRequired[str]
|
|
42
|
+
model: NotRequired[Literal["gpt-4o-transcribe", "gpt-4o-mini-transcribe", "whisper-1"] | str]
|
|
43
|
+
prompt: NotRequired[str]
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class RealtimeTurnDetectionConfig(TypedDict):
|
|
47
|
+
"""Turn detection config. Allows extra vendor keys if needed."""
|
|
48
|
+
|
|
49
|
+
type: NotRequired[Literal["semantic_vad", "server_vad"]]
|
|
50
|
+
create_response: NotRequired[bool]
|
|
51
|
+
eagerness: NotRequired[Literal["auto", "low", "medium", "high"]]
|
|
52
|
+
interrupt_response: NotRequired[bool]
|
|
53
|
+
prefix_padding_ms: NotRequired[int]
|
|
54
|
+
silence_duration_ms: NotRequired[int]
|
|
55
|
+
threshold: NotRequired[float]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class RealtimeSessionModelSettings(TypedDict):
|
|
59
|
+
"""Model settings for a realtime model session."""
|
|
60
|
+
|
|
61
|
+
model_name: NotRequired[RealtimeModelName]
|
|
62
|
+
|
|
63
|
+
instructions: NotRequired[str]
|
|
64
|
+
modalities: NotRequired[list[Literal["text", "audio"]]]
|
|
65
|
+
voice: NotRequired[str]
|
|
66
|
+
|
|
67
|
+
input_audio_format: NotRequired[RealtimeAudioFormat]
|
|
68
|
+
output_audio_format: NotRequired[RealtimeAudioFormat]
|
|
69
|
+
input_audio_transcription: NotRequired[RealtimeInputAudioTranscriptionConfig]
|
|
70
|
+
turn_detection: NotRequired[RealtimeTurnDetectionConfig]
|
|
71
|
+
|
|
72
|
+
tool_choice: NotRequired[ToolChoice]
|
|
73
|
+
tools: NotRequired[list[Tool]]
|
|
74
|
+
|
|
75
|
+
tracing: NotRequired[RealtimeModelTracingConfig | None]
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class RealtimeGuardrailsSettings(TypedDict):
|
|
79
|
+
"""Settings for output guardrails in realtime sessions."""
|
|
80
|
+
|
|
81
|
+
debounce_text_length: NotRequired[int]
|
|
82
|
+
"""
|
|
83
|
+
The minimum number of characters to accumulate before running guardrails on transcript
|
|
84
|
+
deltas. Defaults to 100. Guardrails run every time the accumulated text reaches
|
|
85
|
+
1x, 2x, 3x, etc. times this threshold.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class RealtimeModelTracingConfig(TypedDict):
|
|
90
|
+
"""Configuration for tracing in realtime model sessions."""
|
|
91
|
+
|
|
92
|
+
workflow_name: NotRequired[str]
|
|
93
|
+
"""The workflow name to use for tracing."""
|
|
94
|
+
|
|
95
|
+
group_id: NotRequired[str]
|
|
96
|
+
"""A group identifier to use for tracing, to link multiple traces together."""
|
|
97
|
+
|
|
98
|
+
metadata: NotRequired[dict[str, Any]]
|
|
99
|
+
"""Additional metadata to include with the trace."""
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class RealtimeRunConfig(TypedDict):
|
|
103
|
+
model_settings: NotRequired[RealtimeSessionModelSettings]
|
|
104
|
+
|
|
105
|
+
output_guardrails: NotRequired[list[OutputGuardrail[Any]]]
|
|
106
|
+
"""List of output guardrails to run on the agent's responses."""
|
|
107
|
+
|
|
108
|
+
guardrails_settings: NotRequired[RealtimeGuardrailsSettings]
|
|
109
|
+
"""Settings for guardrail execution."""
|
|
110
|
+
|
|
111
|
+
tracing_disabled: NotRequired[bool]
|
|
112
|
+
"""Whether tracing is disabled for this run."""
|
|
113
|
+
|
|
114
|
+
# TODO (rm) Add history audio storage config
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class RealtimeUserInputText(TypedDict):
|
|
118
|
+
type: Literal["input_text"]
|
|
119
|
+
text: str
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class RealtimeUserInputMessage(TypedDict):
|
|
123
|
+
type: Literal["message"]
|
|
124
|
+
role: Literal["user"]
|
|
125
|
+
content: list[RealtimeUserInputText]
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
RealtimeUserInput: TypeAlias = Union[str, RealtimeUserInputMessage]
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from typing import Any, Literal, Union
|
|
5
|
+
|
|
6
|
+
from typing_extensions import TypeAlias
|
|
7
|
+
|
|
8
|
+
from ..guardrail import OutputGuardrailResult
|
|
9
|
+
from ..run_context import RunContextWrapper
|
|
10
|
+
from ..tool import Tool
|
|
11
|
+
from .agent import RealtimeAgent
|
|
12
|
+
from .items import RealtimeItem
|
|
13
|
+
from .model_events import RealtimeModelAudioEvent, RealtimeModelEvent
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@dataclass
|
|
17
|
+
class RealtimeEventInfo:
|
|
18
|
+
context: RunContextWrapper
|
|
19
|
+
"""The context for the event."""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class RealtimeAgentStartEvent:
|
|
24
|
+
"""A new agent has started."""
|
|
25
|
+
|
|
26
|
+
agent: RealtimeAgent
|
|
27
|
+
"""The new agent."""
|
|
28
|
+
|
|
29
|
+
info: RealtimeEventInfo
|
|
30
|
+
"""Common info for all events, such as the context."""
|
|
31
|
+
|
|
32
|
+
type: Literal["agent_start"] = "agent_start"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class RealtimeAgentEndEvent:
|
|
37
|
+
"""An agent has ended."""
|
|
38
|
+
|
|
39
|
+
agent: RealtimeAgent
|
|
40
|
+
"""The agent that ended."""
|
|
41
|
+
|
|
42
|
+
info: RealtimeEventInfo
|
|
43
|
+
"""Common info for all events, such as the context."""
|
|
44
|
+
|
|
45
|
+
type: Literal["agent_end"] = "agent_end"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class RealtimeHandoffEvent:
|
|
50
|
+
"""An agent has handed off to another agent."""
|
|
51
|
+
|
|
52
|
+
from_agent: RealtimeAgent
|
|
53
|
+
"""The agent that handed off."""
|
|
54
|
+
|
|
55
|
+
to_agent: RealtimeAgent
|
|
56
|
+
"""The agent that was handed off to."""
|
|
57
|
+
|
|
58
|
+
info: RealtimeEventInfo
|
|
59
|
+
"""Common info for all events, such as the context."""
|
|
60
|
+
|
|
61
|
+
type: Literal["handoff"] = "handoff"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass
|
|
65
|
+
class RealtimeToolStart:
|
|
66
|
+
"""An agent is starting a tool call."""
|
|
67
|
+
|
|
68
|
+
agent: RealtimeAgent
|
|
69
|
+
"""The agent that updated."""
|
|
70
|
+
|
|
71
|
+
tool: Tool
|
|
72
|
+
|
|
73
|
+
info: RealtimeEventInfo
|
|
74
|
+
"""Common info for all events, such as the context."""
|
|
75
|
+
|
|
76
|
+
type: Literal["tool_start"] = "tool_start"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
@dataclass
|
|
80
|
+
class RealtimeToolEnd:
|
|
81
|
+
"""An agent has ended a tool call."""
|
|
82
|
+
|
|
83
|
+
agent: RealtimeAgent
|
|
84
|
+
"""The agent that ended the tool call."""
|
|
85
|
+
|
|
86
|
+
tool: Tool
|
|
87
|
+
"""The tool that was called."""
|
|
88
|
+
|
|
89
|
+
output: Any
|
|
90
|
+
"""The output of the tool call."""
|
|
91
|
+
|
|
92
|
+
info: RealtimeEventInfo
|
|
93
|
+
"""Common info for all events, such as the context."""
|
|
94
|
+
|
|
95
|
+
type: Literal["tool_end"] = "tool_end"
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@dataclass
|
|
99
|
+
class RealtimeRawModelEvent:
|
|
100
|
+
"""Forwards raw events from the model layer."""
|
|
101
|
+
|
|
102
|
+
data: RealtimeModelEvent
|
|
103
|
+
"""The raw data from the model layer."""
|
|
104
|
+
|
|
105
|
+
info: RealtimeEventInfo
|
|
106
|
+
"""Common info for all events, such as the context."""
|
|
107
|
+
|
|
108
|
+
type: Literal["raw_model_event"] = "raw_model_event"
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@dataclass
|
|
112
|
+
class RealtimeAudioEnd:
|
|
113
|
+
"""Triggered when the agent stops generating audio."""
|
|
114
|
+
|
|
115
|
+
info: RealtimeEventInfo
|
|
116
|
+
"""Common info for all events, such as the context."""
|
|
117
|
+
|
|
118
|
+
type: Literal["audio_end"] = "audio_end"
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@dataclass
|
|
122
|
+
class RealtimeAudio:
|
|
123
|
+
"""Triggered when the agent generates new audio to be played."""
|
|
124
|
+
|
|
125
|
+
audio: RealtimeModelAudioEvent
|
|
126
|
+
"""The audio event from the model layer."""
|
|
127
|
+
|
|
128
|
+
info: RealtimeEventInfo
|
|
129
|
+
"""Common info for all events, such as the context."""
|
|
130
|
+
|
|
131
|
+
type: Literal["audio"] = "audio"
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
@dataclass
|
|
135
|
+
class RealtimeAudioInterrupted:
|
|
136
|
+
"""Triggered when the agent is interrupted. Can be listened to by the user to stop audio
|
|
137
|
+
playback or give visual indicators to the user.
|
|
138
|
+
"""
|
|
139
|
+
|
|
140
|
+
info: RealtimeEventInfo
|
|
141
|
+
"""Common info for all events, such as the context."""
|
|
142
|
+
|
|
143
|
+
type: Literal["audio_interrupted"] = "audio_interrupted"
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
@dataclass
|
|
147
|
+
class RealtimeError:
|
|
148
|
+
"""An error has occurred."""
|
|
149
|
+
|
|
150
|
+
error: Any
|
|
151
|
+
"""The error that occurred."""
|
|
152
|
+
|
|
153
|
+
info: RealtimeEventInfo
|
|
154
|
+
"""Common info for all events, such as the context."""
|
|
155
|
+
|
|
156
|
+
type: Literal["error"] = "error"
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
@dataclass
|
|
160
|
+
class RealtimeHistoryUpdated:
|
|
161
|
+
"""The history has been updated. Contains the full history of the session."""
|
|
162
|
+
|
|
163
|
+
history: list[RealtimeItem]
|
|
164
|
+
"""The full history of the session."""
|
|
165
|
+
|
|
166
|
+
info: RealtimeEventInfo
|
|
167
|
+
"""Common info for all events, such as the context."""
|
|
168
|
+
|
|
169
|
+
type: Literal["history_updated"] = "history_updated"
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
@dataclass
|
|
173
|
+
class RealtimeHistoryAdded:
|
|
174
|
+
"""A new item has been added to the history."""
|
|
175
|
+
|
|
176
|
+
item: RealtimeItem
|
|
177
|
+
"""The new item that was added to the history."""
|
|
178
|
+
|
|
179
|
+
info: RealtimeEventInfo
|
|
180
|
+
"""Common info for all events, such as the context."""
|
|
181
|
+
|
|
182
|
+
type: Literal["history_added"] = "history_added"
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
@dataclass
|
|
186
|
+
class RealtimeGuardrailTripped:
|
|
187
|
+
"""A guardrail has been tripped and the agent has been interrupted."""
|
|
188
|
+
|
|
189
|
+
guardrail_results: list[OutputGuardrailResult]
|
|
190
|
+
"""The results from all triggered guardrails."""
|
|
191
|
+
|
|
192
|
+
message: str
|
|
193
|
+
"""The message that was being generated when the guardrail was triggered."""
|
|
194
|
+
|
|
195
|
+
info: RealtimeEventInfo
|
|
196
|
+
"""Common info for all events, such as the context."""
|
|
197
|
+
|
|
198
|
+
type: Literal["guardrail_tripped"] = "guardrail_tripped"
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
RealtimeSessionEvent: TypeAlias = Union[
|
|
202
|
+
RealtimeAgentStartEvent,
|
|
203
|
+
RealtimeAgentEndEvent,
|
|
204
|
+
RealtimeHandoffEvent,
|
|
205
|
+
RealtimeToolStart,
|
|
206
|
+
RealtimeToolEnd,
|
|
207
|
+
RealtimeRawModelEvent,
|
|
208
|
+
RealtimeAudioEnd,
|
|
209
|
+
RealtimeAudio,
|
|
210
|
+
RealtimeAudioInterrupted,
|
|
211
|
+
RealtimeError,
|
|
212
|
+
RealtimeHistoryUpdated,
|
|
213
|
+
RealtimeHistoryAdded,
|
|
214
|
+
RealtimeGuardrailTripped,
|
|
215
|
+
]
|
|
216
|
+
"""An event emitted by the realtime session."""
|
agents/realtime/items.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Annotated, Literal, Union
|
|
4
|
+
|
|
5
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class InputText(BaseModel):
|
|
9
|
+
type: Literal["input_text"] = "input_text"
|
|
10
|
+
text: str | None = None
|
|
11
|
+
|
|
12
|
+
# Allow extra data
|
|
13
|
+
model_config = ConfigDict(extra="allow")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class InputAudio(BaseModel):
|
|
17
|
+
type: Literal["input_audio"] = "input_audio"
|
|
18
|
+
audio: str | None = None
|
|
19
|
+
transcript: str | None = None
|
|
20
|
+
|
|
21
|
+
# Allow extra data
|
|
22
|
+
model_config = ConfigDict(extra="allow")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class AssistantText(BaseModel):
|
|
26
|
+
type: Literal["text"] = "text"
|
|
27
|
+
text: str | None = None
|
|
28
|
+
|
|
29
|
+
# Allow extra data
|
|
30
|
+
model_config = ConfigDict(extra="allow")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class SystemMessageItem(BaseModel):
|
|
34
|
+
item_id: str
|
|
35
|
+
previous_item_id: str | None = None
|
|
36
|
+
type: Literal["message"] = "message"
|
|
37
|
+
role: Literal["system"] = "system"
|
|
38
|
+
content: list[InputText]
|
|
39
|
+
|
|
40
|
+
# Allow extra data
|
|
41
|
+
model_config = ConfigDict(extra="allow")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class UserMessageItem(BaseModel):
|
|
45
|
+
item_id: str
|
|
46
|
+
previous_item_id: str | None = None
|
|
47
|
+
type: Literal["message"] = "message"
|
|
48
|
+
role: Literal["user"] = "user"
|
|
49
|
+
content: list[Annotated[InputText | InputAudio, Field(discriminator="type")]]
|
|
50
|
+
|
|
51
|
+
# Allow extra data
|
|
52
|
+
model_config = ConfigDict(extra="allow")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class AssistantMessageItem(BaseModel):
|
|
56
|
+
item_id: str
|
|
57
|
+
previous_item_id: str | None = None
|
|
58
|
+
type: Literal["message"] = "message"
|
|
59
|
+
role: Literal["assistant"] = "assistant"
|
|
60
|
+
status: Literal["in_progress", "completed", "incomplete"] | None = None
|
|
61
|
+
content: list[AssistantText]
|
|
62
|
+
|
|
63
|
+
# Allow extra data
|
|
64
|
+
model_config = ConfigDict(extra="allow")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
RealtimeMessageItem = Annotated[
|
|
68
|
+
Union[SystemMessageItem, UserMessageItem, AssistantMessageItem],
|
|
69
|
+
Field(discriminator="role"),
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class RealtimeToolCallItem(BaseModel):
|
|
74
|
+
item_id: str
|
|
75
|
+
previous_item_id: str | None = None
|
|
76
|
+
type: Literal["function_call"] = "function_call"
|
|
77
|
+
status: Literal["in_progress", "completed"]
|
|
78
|
+
arguments: str
|
|
79
|
+
name: str
|
|
80
|
+
output: str | None = None
|
|
81
|
+
|
|
82
|
+
# Allow extra data
|
|
83
|
+
model_config = ConfigDict(extra="allow")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
RealtimeItem = Union[RealtimeMessageItem, RealtimeToolCallItem]
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
class RealtimeResponse(BaseModel):
|
|
90
|
+
id: str
|
|
91
|
+
output: list[RealtimeMessageItem]
|
agents/realtime/model.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import abc
|
|
4
|
+
from typing import Callable
|
|
5
|
+
|
|
6
|
+
from typing_extensions import NotRequired, TypedDict
|
|
7
|
+
|
|
8
|
+
from ..util._types import MaybeAwaitable
|
|
9
|
+
from .config import (
|
|
10
|
+
RealtimeSessionModelSettings,
|
|
11
|
+
)
|
|
12
|
+
from .model_events import RealtimeModelEvent
|
|
13
|
+
from .model_inputs import RealtimeModelSendEvent
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class RealtimeModelListener(abc.ABC):
|
|
17
|
+
"""A listener for realtime transport events."""
|
|
18
|
+
|
|
19
|
+
@abc.abstractmethod
|
|
20
|
+
async def on_event(self, event: RealtimeModelEvent) -> None:
|
|
21
|
+
"""Called when an event is emitted by the realtime transport."""
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class RealtimeModelConfig(TypedDict):
|
|
26
|
+
"""Options for connecting to a realtime model."""
|
|
27
|
+
|
|
28
|
+
api_key: NotRequired[str | Callable[[], MaybeAwaitable[str]]]
|
|
29
|
+
"""The API key (or function that returns a key) to use when connecting. If unset, the model will
|
|
30
|
+
try to use a sane default. For example, the OpenAI Realtime model will try to use the
|
|
31
|
+
`OPENAI_API_KEY` environment variable.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
url: NotRequired[str]
|
|
35
|
+
"""The URL to use when connecting. If unset, the model will use a sane default. For example,
|
|
36
|
+
the OpenAI Realtime model will use the default OpenAI WebSocket URL.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
initial_model_settings: NotRequired[RealtimeSessionModelSettings]
|
|
40
|
+
"""The initial model settings to use when connecting."""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class RealtimeModel(abc.ABC):
|
|
44
|
+
"""Interface for connecting to a realtime model and sending/receiving events."""
|
|
45
|
+
|
|
46
|
+
@abc.abstractmethod
|
|
47
|
+
async def connect(self, options: RealtimeModelConfig) -> None:
|
|
48
|
+
"""Establish a connection to the model and keep it alive."""
|
|
49
|
+
pass
|
|
50
|
+
|
|
51
|
+
@abc.abstractmethod
|
|
52
|
+
def add_listener(self, listener: RealtimeModelListener) -> None:
|
|
53
|
+
"""Add a listener to the model."""
|
|
54
|
+
pass
|
|
55
|
+
|
|
56
|
+
@abc.abstractmethod
|
|
57
|
+
def remove_listener(self, listener: RealtimeModelListener) -> None:
|
|
58
|
+
"""Remove a listener from the model."""
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
@abc.abstractmethod
|
|
62
|
+
async def send_event(self, event: RealtimeModelSendEvent) -> None:
|
|
63
|
+
"""Send an event to the model."""
|
|
64
|
+
pass
|
|
65
|
+
|
|
66
|
+
@abc.abstractmethod
|
|
67
|
+
async def close(self) -> None:
|
|
68
|
+
"""Close the session."""
|
|
69
|
+
pass
|