openai-agents 0.2.3__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of openai-agents might be problematic. Click here for more details.
- agents/__init__.py +10 -1
- agents/_run_impl.py +14 -3
- agents/agent.py +19 -37
- agents/agent_output.py +1 -1
- agents/extensions/handoff_filters.py +1 -0
- agents/extensions/models/litellm_model.py +14 -1
- agents/extensions/visualization.py +11 -0
- agents/function_schema.py +3 -3
- agents/guardrail.py +5 -4
- agents/handoffs.py +22 -6
- agents/items.py +5 -1
- agents/mcp/util.py +13 -15
- agents/models/chatcmpl_converter.py +12 -1
- agents/models/chatcmpl_stream_handler.py +14 -11
- agents/models/openai_chatcompletions.py +17 -4
- agents/realtime/__init__.py +4 -0
- agents/realtime/_default_tracker.py +47 -0
- agents/realtime/_util.py +9 -0
- agents/realtime/config.py +3 -0
- agents/realtime/events.py +18 -0
- agents/realtime/model.py +94 -0
- agents/realtime/model_events.py +28 -0
- agents/realtime/openai_realtime.py +100 -29
- agents/realtime/session.py +41 -10
- agents/run.py +53 -6
- agents/tracing/create.py +1 -2
- agents/tracing/processors.py +9 -7
- agents/tracing/provider.py +25 -7
- agents/tracing/traces.py +1 -1
- {openai_agents-0.2.3.dist-info → openai_agents-0.2.5.dist-info}/METADATA +7 -2
- {openai_agents-0.2.3.dist-info → openai_agents-0.2.5.dist-info}/RECORD +33 -31
- {openai_agents-0.2.3.dist-info → openai_agents-0.2.5.dist-info}/WHEEL +0 -0
- {openai_agents-0.2.3.dist-info → openai_agents-0.2.5.dist-info}/licenses/LICENSE +0 -0
agents/realtime/events.py
CHANGED
|
@@ -115,6 +115,12 @@ class RealtimeAudioEnd:
|
|
|
115
115
|
info: RealtimeEventInfo
|
|
116
116
|
"""Common info for all events, such as the context."""
|
|
117
117
|
|
|
118
|
+
item_id: str
|
|
119
|
+
"""The ID of the item containing audio."""
|
|
120
|
+
|
|
121
|
+
content_index: int
|
|
122
|
+
"""The index of the audio content in `item.content`"""
|
|
123
|
+
|
|
118
124
|
type: Literal["audio_end"] = "audio_end"
|
|
119
125
|
|
|
120
126
|
|
|
@@ -125,6 +131,12 @@ class RealtimeAudio:
|
|
|
125
131
|
audio: RealtimeModelAudioEvent
|
|
126
132
|
"""The audio event from the model layer."""
|
|
127
133
|
|
|
134
|
+
item_id: str
|
|
135
|
+
"""The ID of the item containing audio."""
|
|
136
|
+
|
|
137
|
+
content_index: int
|
|
138
|
+
"""The index of the audio content in `item.content`"""
|
|
139
|
+
|
|
128
140
|
info: RealtimeEventInfo
|
|
129
141
|
"""Common info for all events, such as the context."""
|
|
130
142
|
|
|
@@ -140,6 +152,12 @@ class RealtimeAudioInterrupted:
|
|
|
140
152
|
info: RealtimeEventInfo
|
|
141
153
|
"""Common info for all events, such as the context."""
|
|
142
154
|
|
|
155
|
+
item_id: str
|
|
156
|
+
"""The ID of the item containing audio."""
|
|
157
|
+
|
|
158
|
+
content_index: int
|
|
159
|
+
"""The index of the audio content in `item.content`"""
|
|
160
|
+
|
|
143
161
|
type: Literal["audio_interrupted"] = "audio_interrupted"
|
|
144
162
|
|
|
145
163
|
|
agents/realtime/model.py
CHANGED
|
@@ -6,13 +6,95 @@ from typing import Callable
|
|
|
6
6
|
from typing_extensions import NotRequired, TypedDict
|
|
7
7
|
|
|
8
8
|
from ..util._types import MaybeAwaitable
|
|
9
|
+
from ._util import calculate_audio_length_ms
|
|
9
10
|
from .config import (
|
|
11
|
+
RealtimeAudioFormat,
|
|
10
12
|
RealtimeSessionModelSettings,
|
|
11
13
|
)
|
|
12
14
|
from .model_events import RealtimeModelEvent
|
|
13
15
|
from .model_inputs import RealtimeModelSendEvent
|
|
14
16
|
|
|
15
17
|
|
|
18
|
+
class RealtimePlaybackState(TypedDict):
|
|
19
|
+
current_item_id: str | None
|
|
20
|
+
"""The item ID of the current item being played."""
|
|
21
|
+
|
|
22
|
+
current_item_content_index: int | None
|
|
23
|
+
"""The index of the current item content being played."""
|
|
24
|
+
|
|
25
|
+
elapsed_ms: float | None
|
|
26
|
+
"""The number of milliseconds of audio that have been played."""
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class RealtimePlaybackTracker:
|
|
30
|
+
"""If you have custom playback logic or expect that audio is played with delays or at different
|
|
31
|
+
speeds, create an instance of RealtimePlaybackTracker and pass it to the session. You are
|
|
32
|
+
responsible for tracking the audio playback progress and calling `on_play_bytes` or
|
|
33
|
+
`on_play_ms` when the user has played some audio."""
|
|
34
|
+
|
|
35
|
+
def __init__(self) -> None:
|
|
36
|
+
self._format: RealtimeAudioFormat | None = None
|
|
37
|
+
# (item_id, item_content_index)
|
|
38
|
+
self._current_item: tuple[str, int] | None = None
|
|
39
|
+
self._elapsed_ms: float | None = None
|
|
40
|
+
|
|
41
|
+
def on_play_bytes(self, item_id: str, item_content_index: int, bytes: bytes) -> None:
|
|
42
|
+
"""Called by you when you have played some audio.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
item_id: The item ID of the audio being played.
|
|
46
|
+
item_content_index: The index of the audio content in `item.content`
|
|
47
|
+
bytes: The audio bytes that have been fully played.
|
|
48
|
+
"""
|
|
49
|
+
ms = calculate_audio_length_ms(self._format, bytes)
|
|
50
|
+
self.on_play_ms(item_id, item_content_index, ms)
|
|
51
|
+
|
|
52
|
+
def on_play_ms(self, item_id: str, item_content_index: int, ms: float) -> None:
|
|
53
|
+
"""Called by you when you have played some audio.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
item_id: The item ID of the audio being played.
|
|
57
|
+
item_content_index: The index of the audio content in `item.content`
|
|
58
|
+
ms: The number of milliseconds of audio that have been played.
|
|
59
|
+
"""
|
|
60
|
+
if self._current_item != (item_id, item_content_index):
|
|
61
|
+
self._current_item = (item_id, item_content_index)
|
|
62
|
+
self._elapsed_ms = ms
|
|
63
|
+
else:
|
|
64
|
+
assert self._elapsed_ms is not None
|
|
65
|
+
self._elapsed_ms += ms
|
|
66
|
+
|
|
67
|
+
def on_interrupted(self) -> None:
|
|
68
|
+
"""Called by the model when the audio playback has been interrupted."""
|
|
69
|
+
self._current_item = None
|
|
70
|
+
self._elapsed_ms = None
|
|
71
|
+
|
|
72
|
+
def set_audio_format(self, format: RealtimeAudioFormat) -> None:
|
|
73
|
+
"""Will be called by the model to set the audio format.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
format: The audio format to use.
|
|
77
|
+
"""
|
|
78
|
+
self._format = format
|
|
79
|
+
|
|
80
|
+
def get_state(self) -> RealtimePlaybackState:
|
|
81
|
+
"""Will be called by the model to get the current playback state."""
|
|
82
|
+
if self._current_item is None:
|
|
83
|
+
return {
|
|
84
|
+
"current_item_id": None,
|
|
85
|
+
"current_item_content_index": None,
|
|
86
|
+
"elapsed_ms": None,
|
|
87
|
+
}
|
|
88
|
+
assert self._elapsed_ms is not None
|
|
89
|
+
|
|
90
|
+
item_id, item_content_index = self._current_item
|
|
91
|
+
return {
|
|
92
|
+
"current_item_id": item_id,
|
|
93
|
+
"current_item_content_index": item_content_index,
|
|
94
|
+
"elapsed_ms": self._elapsed_ms,
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
|
|
16
98
|
class RealtimeModelListener(abc.ABC):
|
|
17
99
|
"""A listener for realtime transport events."""
|
|
18
100
|
|
|
@@ -39,6 +121,18 @@ class RealtimeModelConfig(TypedDict):
|
|
|
39
121
|
initial_model_settings: NotRequired[RealtimeSessionModelSettings]
|
|
40
122
|
"""The initial model settings to use when connecting."""
|
|
41
123
|
|
|
124
|
+
playback_tracker: NotRequired[RealtimePlaybackTracker]
|
|
125
|
+
"""The playback tracker to use when tracking audio playback progress. If not set, the model will
|
|
126
|
+
use a default implementation that assumes audio is played immediately, at realtime speed.
|
|
127
|
+
|
|
128
|
+
A playback tracker is useful for interruptions. The model generates audio much faster than
|
|
129
|
+
realtime playback speed. So if there's an interruption, its useful for the model to know how
|
|
130
|
+
much of the audio has been played by the user. In low-latency scenarios, it's fine to assume
|
|
131
|
+
that audio is played back immediately at realtime speed. But in scenarios like phone calls or
|
|
132
|
+
other remote interactions, you can set a playback tracker that lets the model know when audio
|
|
133
|
+
is played to the user.
|
|
134
|
+
"""
|
|
135
|
+
|
|
42
136
|
|
|
43
137
|
class RealtimeModel(abc.ABC):
|
|
44
138
|
"""Interface for connecting to a realtime model and sending/receiving events."""
|
agents/realtime/model_events.py
CHANGED
|
@@ -40,6 +40,12 @@ class RealtimeModelAudioEvent:
|
|
|
40
40
|
data: bytes
|
|
41
41
|
response_id: str
|
|
42
42
|
|
|
43
|
+
item_id: str
|
|
44
|
+
"""The ID of the item containing audio."""
|
|
45
|
+
|
|
46
|
+
content_index: int
|
|
47
|
+
"""The index of the audio content in `item.content`"""
|
|
48
|
+
|
|
43
49
|
type: Literal["audio"] = "audio"
|
|
44
50
|
|
|
45
51
|
|
|
@@ -47,6 +53,12 @@ class RealtimeModelAudioEvent:
|
|
|
47
53
|
class RealtimeModelAudioInterruptedEvent:
|
|
48
54
|
"""Audio interrupted."""
|
|
49
55
|
|
|
56
|
+
item_id: str
|
|
57
|
+
"""The ID of the item containing audio."""
|
|
58
|
+
|
|
59
|
+
content_index: int
|
|
60
|
+
"""The index of the audio content in `item.content`"""
|
|
61
|
+
|
|
50
62
|
type: Literal["audio_interrupted"] = "audio_interrupted"
|
|
51
63
|
|
|
52
64
|
|
|
@@ -54,6 +66,12 @@ class RealtimeModelAudioInterruptedEvent:
|
|
|
54
66
|
class RealtimeModelAudioDoneEvent:
|
|
55
67
|
"""Audio done."""
|
|
56
68
|
|
|
69
|
+
item_id: str
|
|
70
|
+
"""The ID of the item containing audio."""
|
|
71
|
+
|
|
72
|
+
content_index: int
|
|
73
|
+
"""The index of the audio content in `item.content`"""
|
|
74
|
+
|
|
57
75
|
type: Literal["audio_done"] = "audio_done"
|
|
58
76
|
|
|
59
77
|
|
|
@@ -138,6 +156,15 @@ class RealtimeModelExceptionEvent:
|
|
|
138
156
|
type: Literal["exception"] = "exception"
|
|
139
157
|
|
|
140
158
|
|
|
159
|
+
@dataclass
|
|
160
|
+
class RealtimeModelRawServerEvent:
|
|
161
|
+
"""Raw events forwarded from the server."""
|
|
162
|
+
|
|
163
|
+
data: Any
|
|
164
|
+
|
|
165
|
+
type: Literal["raw_server_event"] = "raw_server_event"
|
|
166
|
+
|
|
167
|
+
|
|
141
168
|
# TODO (rm) Add usage events
|
|
142
169
|
|
|
143
170
|
|
|
@@ -156,4 +183,5 @@ RealtimeModelEvent: TypeAlias = Union[
|
|
|
156
183
|
RealtimeModelTurnEndedEvent,
|
|
157
184
|
RealtimeModelOtherEvent,
|
|
158
185
|
RealtimeModelExceptionEvent,
|
|
186
|
+
RealtimeModelRawServerEvent,
|
|
159
187
|
]
|
|
@@ -57,6 +57,7 @@ from typing_extensions import assert_never
|
|
|
57
57
|
from websockets.asyncio.client import ClientConnection
|
|
58
58
|
|
|
59
59
|
from agents.handoffs import Handoff
|
|
60
|
+
from agents.realtime._default_tracker import ModelAudioTracker
|
|
60
61
|
from agents.tool import FunctionTool, Tool
|
|
61
62
|
from agents.util._types import MaybeAwaitable
|
|
62
63
|
|
|
@@ -72,6 +73,8 @@ from .model import (
|
|
|
72
73
|
RealtimeModel,
|
|
73
74
|
RealtimeModelConfig,
|
|
74
75
|
RealtimeModelListener,
|
|
76
|
+
RealtimePlaybackState,
|
|
77
|
+
RealtimePlaybackTracker,
|
|
75
78
|
)
|
|
76
79
|
from .model_events import (
|
|
77
80
|
RealtimeModelAudioDoneEvent,
|
|
@@ -83,6 +86,7 @@ from .model_events import (
|
|
|
83
86
|
RealtimeModelInputAudioTranscriptionCompletedEvent,
|
|
84
87
|
RealtimeModelItemDeletedEvent,
|
|
85
88
|
RealtimeModelItemUpdatedEvent,
|
|
89
|
+
RealtimeModelRawServerEvent,
|
|
86
90
|
RealtimeModelToolCallEvent,
|
|
87
91
|
RealtimeModelTranscriptDeltaEvent,
|
|
88
92
|
RealtimeModelTurnEndedEvent,
|
|
@@ -133,11 +137,11 @@ class OpenAIRealtimeWebSocketModel(RealtimeModel):
|
|
|
133
137
|
self._websocket_task: asyncio.Task[None] | None = None
|
|
134
138
|
self._listeners: list[RealtimeModelListener] = []
|
|
135
139
|
self._current_item_id: str | None = None
|
|
136
|
-
self.
|
|
137
|
-
self._audio_length_ms: float = 0.0
|
|
140
|
+
self._audio_state_tracker: ModelAudioTracker = ModelAudioTracker()
|
|
138
141
|
self._ongoing_response: bool = False
|
|
139
|
-
self._current_audio_content_index: int | None = None
|
|
140
142
|
self._tracing_config: RealtimeModelTracingConfig | Literal["auto"] | None = None
|
|
143
|
+
self._playback_tracker: RealtimePlaybackTracker | None = None
|
|
144
|
+
self._created_session: OpenAISessionObject | None = None
|
|
141
145
|
|
|
142
146
|
async def connect(self, options: RealtimeModelConfig) -> None:
|
|
143
147
|
"""Establish a connection to the model and keep it alive."""
|
|
@@ -146,6 +150,8 @@ class OpenAIRealtimeWebSocketModel(RealtimeModel):
|
|
|
146
150
|
|
|
147
151
|
model_settings: RealtimeSessionModelSettings = options.get("initial_model_settings", {})
|
|
148
152
|
|
|
153
|
+
self._playback_tracker = options.get("playback_tracker", None)
|
|
154
|
+
|
|
149
155
|
self.model = model_settings.get("model_name", self.model)
|
|
150
156
|
api_key = await get_api_key(options.get("api_key"))
|
|
151
157
|
|
|
@@ -220,7 +226,7 @@ class OpenAIRealtimeWebSocketModel(RealtimeModel):
|
|
|
220
226
|
|
|
221
227
|
except websockets.exceptions.ConnectionClosedOK:
|
|
222
228
|
# Normal connection closure - no exception event needed
|
|
223
|
-
logger.
|
|
229
|
+
logger.debug("WebSocket connection closed normally")
|
|
224
230
|
except websockets.exceptions.ConnectionClosed as e:
|
|
225
231
|
await self._emit_event(
|
|
226
232
|
RealtimeModelExceptionEvent(
|
|
@@ -294,26 +300,76 @@ class OpenAIRealtimeWebSocketModel(RealtimeModel):
|
|
|
294
300
|
if event.start_response:
|
|
295
301
|
await self._send_raw_message(OpenAIResponseCreateEvent(type="response.create"))
|
|
296
302
|
|
|
303
|
+
def _get_playback_state(self) -> RealtimePlaybackState:
|
|
304
|
+
if self._playback_tracker:
|
|
305
|
+
return self._playback_tracker.get_state()
|
|
306
|
+
|
|
307
|
+
if last_audio_item_id := self._audio_state_tracker.get_last_audio_item():
|
|
308
|
+
item_id, item_content_index = last_audio_item_id
|
|
309
|
+
audio_state = self._audio_state_tracker.get_state(item_id, item_content_index)
|
|
310
|
+
if audio_state:
|
|
311
|
+
elapsed_ms = (
|
|
312
|
+
datetime.now() - audio_state.initial_received_time
|
|
313
|
+
).total_seconds() * 1000
|
|
314
|
+
return {
|
|
315
|
+
"current_item_id": item_id,
|
|
316
|
+
"current_item_content_index": item_content_index,
|
|
317
|
+
"elapsed_ms": elapsed_ms,
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
return {
|
|
321
|
+
"current_item_id": None,
|
|
322
|
+
"current_item_content_index": None,
|
|
323
|
+
"elapsed_ms": None,
|
|
324
|
+
}
|
|
325
|
+
|
|
297
326
|
async def _send_interrupt(self, event: RealtimeModelSendInterrupt) -> None:
|
|
298
|
-
|
|
327
|
+
playback_state = self._get_playback_state()
|
|
328
|
+
current_item_id = playback_state.get("current_item_id")
|
|
329
|
+
current_item_content_index = playback_state.get("current_item_content_index")
|
|
330
|
+
elapsed_ms = playback_state.get("elapsed_ms")
|
|
331
|
+
if current_item_id is None or elapsed_ms is None:
|
|
332
|
+
logger.debug(
|
|
333
|
+
"Skipping interrupt. "
|
|
334
|
+
f"Item id: {current_item_id}, "
|
|
335
|
+
f"elapsed ms: {elapsed_ms}, "
|
|
336
|
+
f"content index: {current_item_content_index}"
|
|
337
|
+
)
|
|
299
338
|
return
|
|
300
339
|
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
340
|
+
current_item_content_index = current_item_content_index or 0
|
|
341
|
+
if elapsed_ms > 0:
|
|
342
|
+
await self._emit_event(
|
|
343
|
+
RealtimeModelAudioInterruptedEvent(
|
|
344
|
+
item_id=current_item_id,
|
|
345
|
+
content_index=current_item_content_index,
|
|
346
|
+
)
|
|
347
|
+
)
|
|
306
348
|
converted = _ConversionHelper.convert_interrupt(
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
int(
|
|
349
|
+
current_item_id,
|
|
350
|
+
current_item_content_index,
|
|
351
|
+
int(elapsed_ms),
|
|
310
352
|
)
|
|
311
353
|
await self._send_raw_message(converted)
|
|
354
|
+
else:
|
|
355
|
+
logger.debug(
|
|
356
|
+
"Didn't interrupt bc elapsed ms is < 0. "
|
|
357
|
+
f"Item id: {current_item_id}, "
|
|
358
|
+
f"elapsed ms: {elapsed_ms}, "
|
|
359
|
+
f"content index: {current_item_content_index}"
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
automatic_response_cancellation_enabled = (
|
|
363
|
+
self._created_session
|
|
364
|
+
and self._created_session.turn_detection
|
|
365
|
+
and self._created_session.turn_detection.interrupt_response
|
|
366
|
+
)
|
|
367
|
+
if not automatic_response_cancellation_enabled:
|
|
368
|
+
await self._cancel_response()
|
|
312
369
|
|
|
313
|
-
self.
|
|
314
|
-
self.
|
|
315
|
-
|
|
316
|
-
self._current_audio_content_index = None
|
|
370
|
+
self._audio_state_tracker.on_interrupted()
|
|
371
|
+
if self._playback_tracker:
|
|
372
|
+
self._playback_tracker.on_interrupted()
|
|
317
373
|
|
|
318
374
|
async def _send_session_update(self, event: RealtimeModelSendSessionUpdate) -> None:
|
|
319
375
|
"""Send a session update to the model."""
|
|
@@ -321,23 +377,21 @@ class OpenAIRealtimeWebSocketModel(RealtimeModel):
|
|
|
321
377
|
|
|
322
378
|
async def _handle_audio_delta(self, parsed: ResponseAudioDeltaEvent) -> None:
|
|
323
379
|
"""Handle audio delta events and update audio tracking state."""
|
|
324
|
-
self._current_audio_content_index = parsed.content_index
|
|
325
380
|
self._current_item_id = parsed.item_id
|
|
326
|
-
if self._audio_start_time is None:
|
|
327
|
-
self._audio_start_time = datetime.now()
|
|
328
|
-
self._audio_length_ms = 0.0
|
|
329
381
|
|
|
330
382
|
audio_bytes = base64.b64decode(parsed.delta)
|
|
331
|
-
|
|
332
|
-
self.
|
|
383
|
+
|
|
384
|
+
self._audio_state_tracker.on_audio_delta(parsed.item_id, parsed.content_index, audio_bytes)
|
|
385
|
+
|
|
333
386
|
await self._emit_event(
|
|
334
|
-
RealtimeModelAudioEvent(
|
|
387
|
+
RealtimeModelAudioEvent(
|
|
388
|
+
data=audio_bytes,
|
|
389
|
+
response_id=parsed.response_id,
|
|
390
|
+
item_id=parsed.item_id,
|
|
391
|
+
content_index=parsed.content_index,
|
|
392
|
+
)
|
|
335
393
|
)
|
|
336
394
|
|
|
337
|
-
def _calculate_audio_length_ms(self, audio_bytes: bytes) -> float:
|
|
338
|
-
"""Calculate audio length in milliseconds for 24KHz PCM16LE format."""
|
|
339
|
-
return len(audio_bytes) / 24 / 2
|
|
340
|
-
|
|
341
395
|
async def _handle_output_item(self, item: ConversationItem) -> None:
|
|
342
396
|
"""Handle response output item events (function calls and messages)."""
|
|
343
397
|
if item.type == "function_call" and item.status == "completed":
|
|
@@ -401,6 +455,7 @@ class OpenAIRealtimeWebSocketModel(RealtimeModel):
|
|
|
401
455
|
self._ongoing_response = False
|
|
402
456
|
|
|
403
457
|
async def _handle_ws_event(self, event: dict[str, Any]):
|
|
458
|
+
await self._emit_event(RealtimeModelRawServerEvent(data=event))
|
|
404
459
|
try:
|
|
405
460
|
if "previous_item_id" in event and event["previous_item_id"] is None:
|
|
406
461
|
event["previous_item_id"] = "" # TODO (rm) remove
|
|
@@ -429,7 +484,12 @@ class OpenAIRealtimeWebSocketModel(RealtimeModel):
|
|
|
429
484
|
if parsed.type == "response.audio.delta":
|
|
430
485
|
await self._handle_audio_delta(parsed)
|
|
431
486
|
elif parsed.type == "response.audio.done":
|
|
432
|
-
await self._emit_event(
|
|
487
|
+
await self._emit_event(
|
|
488
|
+
RealtimeModelAudioDoneEvent(
|
|
489
|
+
item_id=parsed.item_id,
|
|
490
|
+
content_index=parsed.content_index,
|
|
491
|
+
)
|
|
492
|
+
)
|
|
433
493
|
elif parsed.type == "input_audio_buffer.speech_started":
|
|
434
494
|
await self._send_interrupt(RealtimeModelSendInterrupt())
|
|
435
495
|
elif parsed.type == "response.created":
|
|
@@ -440,6 +500,9 @@ class OpenAIRealtimeWebSocketModel(RealtimeModel):
|
|
|
440
500
|
await self._emit_event(RealtimeModelTurnEndedEvent())
|
|
441
501
|
elif parsed.type == "session.created":
|
|
442
502
|
await self._send_tracing_config(self._tracing_config)
|
|
503
|
+
self._update_created_session(parsed.session) # type: ignore
|
|
504
|
+
elif parsed.type == "session.updated":
|
|
505
|
+
self._update_created_session(parsed.session) # type: ignore
|
|
443
506
|
elif parsed.type == "error":
|
|
444
507
|
await self._emit_event(RealtimeModelErrorEvent(error=parsed.error))
|
|
445
508
|
elif parsed.type == "conversation.item.deleted":
|
|
@@ -489,6 +552,13 @@ class OpenAIRealtimeWebSocketModel(RealtimeModel):
|
|
|
489
552
|
):
|
|
490
553
|
await self._handle_output_item(parsed.item)
|
|
491
554
|
|
|
555
|
+
def _update_created_session(self, session: OpenAISessionObject) -> None:
|
|
556
|
+
self._created_session = session
|
|
557
|
+
if session.output_audio_format:
|
|
558
|
+
self._audio_state_tracker.set_audio_format(session.output_audio_format)
|
|
559
|
+
if self._playback_tracker:
|
|
560
|
+
self._playback_tracker.set_audio_format(session.output_audio_format)
|
|
561
|
+
|
|
492
562
|
async def _update_session_config(self, model_settings: RealtimeSessionModelSettings) -> None:
|
|
493
563
|
session_config = self._get_session_config(model_settings)
|
|
494
564
|
await self._send_raw_message(
|
|
@@ -506,6 +576,7 @@ class OpenAIRealtimeWebSocketModel(RealtimeModel):
|
|
|
506
576
|
or DEFAULT_MODEL_SETTINGS.get("model_name")
|
|
507
577
|
),
|
|
508
578
|
voice=model_settings.get("voice", DEFAULT_MODEL_SETTINGS.get("voice")),
|
|
579
|
+
speed=model_settings.get("speed", None),
|
|
509
580
|
modalities=model_settings.get("modalities", DEFAULT_MODEL_SETTINGS.get("modalities")),
|
|
510
581
|
input_audio_format=model_settings.get(
|
|
511
582
|
"input_audio_format",
|
agents/realtime/session.py
CHANGED
|
@@ -180,6 +180,19 @@ class RealtimeSession(RealtimeModelListener):
|
|
|
180
180
|
"""Interrupt the model."""
|
|
181
181
|
await self._model.send_event(RealtimeModelSendInterrupt())
|
|
182
182
|
|
|
183
|
+
async def update_agent(self, agent: RealtimeAgent) -> None:
|
|
184
|
+
"""Update the active agent for this session and apply its settings to the model."""
|
|
185
|
+
self._current_agent = agent
|
|
186
|
+
|
|
187
|
+
updated_settings = await self._get_updated_model_settings_from_agent(
|
|
188
|
+
starting_settings=None,
|
|
189
|
+
agent=self._current_agent,
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
await self._model.send_event(
|
|
193
|
+
RealtimeModelSendSessionUpdate(session_settings=updated_settings)
|
|
194
|
+
)
|
|
195
|
+
|
|
183
196
|
async def on_event(self, event: RealtimeModelEvent) -> None:
|
|
184
197
|
await self._put_event(RealtimeRawModelEvent(data=event, info=self._event_info))
|
|
185
198
|
|
|
@@ -188,11 +201,26 @@ class RealtimeSession(RealtimeModelListener):
|
|
|
188
201
|
elif event.type == "function_call":
|
|
189
202
|
await self._handle_tool_call(event)
|
|
190
203
|
elif event.type == "audio":
|
|
191
|
-
await self._put_event(
|
|
204
|
+
await self._put_event(
|
|
205
|
+
RealtimeAudio(
|
|
206
|
+
info=self._event_info,
|
|
207
|
+
audio=event,
|
|
208
|
+
item_id=event.item_id,
|
|
209
|
+
content_index=event.content_index,
|
|
210
|
+
)
|
|
211
|
+
)
|
|
192
212
|
elif event.type == "audio_interrupted":
|
|
193
|
-
await self._put_event(
|
|
213
|
+
await self._put_event(
|
|
214
|
+
RealtimeAudioInterrupted(
|
|
215
|
+
info=self._event_info, item_id=event.item_id, content_index=event.content_index
|
|
216
|
+
)
|
|
217
|
+
)
|
|
194
218
|
elif event.type == "audio_done":
|
|
195
|
-
await self._put_event(
|
|
219
|
+
await self._put_event(
|
|
220
|
+
RealtimeAudioEnd(
|
|
221
|
+
info=self._event_info, item_id=event.item_id, content_index=event.content_index
|
|
222
|
+
)
|
|
223
|
+
)
|
|
196
224
|
elif event.type == "input_audio_transcription_completed":
|
|
197
225
|
self._history = RealtimeSession._get_new_history(self._history, event)
|
|
198
226
|
await self._put_event(
|
|
@@ -259,6 +287,8 @@ class RealtimeSession(RealtimeModelListener):
|
|
|
259
287
|
self._stored_exception = event.exception
|
|
260
288
|
elif event.type == "other":
|
|
261
289
|
pass
|
|
290
|
+
elif event.type == "raw_server_event":
|
|
291
|
+
pass
|
|
262
292
|
else:
|
|
263
293
|
assert_never(event)
|
|
264
294
|
|
|
@@ -344,19 +374,20 @@ class RealtimeSession(RealtimeModelListener):
|
|
|
344
374
|
)
|
|
345
375
|
)
|
|
346
376
|
|
|
347
|
-
#
|
|
377
|
+
# First, send the session update so the model receives the new instructions
|
|
378
|
+
await self._model.send_event(
|
|
379
|
+
RealtimeModelSendSessionUpdate(session_settings=updated_settings)
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
# Then send tool output to complete the handoff (this triggers a new response)
|
|
383
|
+
transfer_message = handoff.get_transfer_message(result)
|
|
348
384
|
await self._model.send_event(
|
|
349
385
|
RealtimeModelSendToolOutput(
|
|
350
386
|
tool_call=event,
|
|
351
|
-
output=
|
|
387
|
+
output=transfer_message,
|
|
352
388
|
start_response=True,
|
|
353
389
|
)
|
|
354
390
|
)
|
|
355
|
-
|
|
356
|
-
# Send session update to model
|
|
357
|
-
await self._model.send_event(
|
|
358
|
-
RealtimeModelSendSessionUpdate(session_settings=updated_settings)
|
|
359
|
-
)
|
|
360
391
|
else:
|
|
361
392
|
raise ModelBehaviorError(f"Tool {event.name} not found")
|
|
362
393
|
|
agents/run.py
CHANGED
|
@@ -904,10 +904,9 @@ class AgentRunner:
|
|
|
904
904
|
raise ModelBehaviorError("Model did not produce a final response!")
|
|
905
905
|
|
|
906
906
|
# 3. Now, we can process the turn as we do in the non-streaming case
|
|
907
|
-
|
|
907
|
+
return await cls._get_single_step_result_from_streamed_response(
|
|
908
908
|
agent=agent,
|
|
909
|
-
|
|
910
|
-
pre_step_items=streamed_result.new_items,
|
|
909
|
+
streamed_result=streamed_result,
|
|
911
910
|
new_response=final_response,
|
|
912
911
|
output_schema=output_schema,
|
|
913
912
|
all_tools=all_tools,
|
|
@@ -918,9 +917,6 @@ class AgentRunner:
|
|
|
918
917
|
tool_use_tracker=tool_use_tracker,
|
|
919
918
|
)
|
|
920
919
|
|
|
921
|
-
RunImpl.stream_step_result_to_queue(single_step_result, streamed_result._event_queue)
|
|
922
|
-
return single_step_result
|
|
923
|
-
|
|
924
920
|
@classmethod
|
|
925
921
|
async def _run_single_turn(
|
|
926
922
|
cls,
|
|
@@ -1023,6 +1019,57 @@ class AgentRunner:
|
|
|
1023
1019
|
run_config=run_config,
|
|
1024
1020
|
)
|
|
1025
1021
|
|
|
1022
|
+
@classmethod
|
|
1023
|
+
async def _get_single_step_result_from_streamed_response(
|
|
1024
|
+
cls,
|
|
1025
|
+
*,
|
|
1026
|
+
agent: Agent[TContext],
|
|
1027
|
+
all_tools: list[Tool],
|
|
1028
|
+
streamed_result: RunResultStreaming,
|
|
1029
|
+
new_response: ModelResponse,
|
|
1030
|
+
output_schema: AgentOutputSchemaBase | None,
|
|
1031
|
+
handoffs: list[Handoff],
|
|
1032
|
+
hooks: RunHooks[TContext],
|
|
1033
|
+
context_wrapper: RunContextWrapper[TContext],
|
|
1034
|
+
run_config: RunConfig,
|
|
1035
|
+
tool_use_tracker: AgentToolUseTracker,
|
|
1036
|
+
) -> SingleStepResult:
|
|
1037
|
+
|
|
1038
|
+
original_input = streamed_result.input
|
|
1039
|
+
pre_step_items = streamed_result.new_items
|
|
1040
|
+
event_queue = streamed_result._event_queue
|
|
1041
|
+
|
|
1042
|
+
processed_response = RunImpl.process_model_response(
|
|
1043
|
+
agent=agent,
|
|
1044
|
+
all_tools=all_tools,
|
|
1045
|
+
response=new_response,
|
|
1046
|
+
output_schema=output_schema,
|
|
1047
|
+
handoffs=handoffs,
|
|
1048
|
+
)
|
|
1049
|
+
new_items_processed_response = processed_response.new_items
|
|
1050
|
+
tool_use_tracker.add_tool_use(agent, processed_response.tools_used)
|
|
1051
|
+
RunImpl.stream_step_items_to_queue(new_items_processed_response, event_queue)
|
|
1052
|
+
|
|
1053
|
+
single_step_result = await RunImpl.execute_tools_and_side_effects(
|
|
1054
|
+
agent=agent,
|
|
1055
|
+
original_input=original_input,
|
|
1056
|
+
pre_step_items=pre_step_items,
|
|
1057
|
+
new_response=new_response,
|
|
1058
|
+
processed_response=processed_response,
|
|
1059
|
+
output_schema=output_schema,
|
|
1060
|
+
hooks=hooks,
|
|
1061
|
+
context_wrapper=context_wrapper,
|
|
1062
|
+
run_config=run_config,
|
|
1063
|
+
)
|
|
1064
|
+
new_step_items = [
|
|
1065
|
+
item
|
|
1066
|
+
for item in single_step_result.new_step_items
|
|
1067
|
+
if item not in new_items_processed_response
|
|
1068
|
+
]
|
|
1069
|
+
RunImpl.stream_step_items_to_queue(new_step_items, event_queue)
|
|
1070
|
+
|
|
1071
|
+
return single_step_result
|
|
1072
|
+
|
|
1026
1073
|
@classmethod
|
|
1027
1074
|
async def _run_input_guardrails(
|
|
1028
1075
|
cls,
|
agents/tracing/create.py
CHANGED
|
@@ -50,8 +50,7 @@ def trace(
|
|
|
50
50
|
group_id: Optional grouping identifier to link multiple traces from the same conversation
|
|
51
51
|
or process. For instance, you might use a chat thread ID.
|
|
52
52
|
metadata: Optional dictionary of additional metadata to attach to the trace.
|
|
53
|
-
disabled: If True, we will return a Trace but the Trace will not be recorded.
|
|
54
|
-
not be checked if there's an existing trace and `even_if_trace_running` is True.
|
|
53
|
+
disabled: If True, we will return a Trace but the Trace will not be recorded.
|
|
55
54
|
|
|
56
55
|
Returns:
|
|
57
56
|
The newly created trace object.
|
agents/tracing/processors.py
CHANGED
|
@@ -22,7 +22,7 @@ class ConsoleSpanExporter(TracingExporter):
|
|
|
22
22
|
def export(self, items: list[Trace | Span[Any]]) -> None:
|
|
23
23
|
for item in items:
|
|
24
24
|
if isinstance(item, Trace):
|
|
25
|
-
print(f"[Exporter] Export trace_id={item.trace_id}, name={item.name}
|
|
25
|
+
print(f"[Exporter] Export trace_id={item.trace_id}, name={item.name}")
|
|
26
26
|
else:
|
|
27
27
|
print(f"[Exporter] Export span: {item.export()}")
|
|
28
28
|
|
|
@@ -69,9 +69,12 @@ class BackendSpanExporter(TracingExporter):
|
|
|
69
69
|
api_key: The OpenAI API key to use. This is the same key used by the OpenAI Python
|
|
70
70
|
client.
|
|
71
71
|
"""
|
|
72
|
-
#
|
|
72
|
+
# Clear the cached property if it exists
|
|
73
|
+
if 'api_key' in self.__dict__:
|
|
74
|
+
del self.__dict__['api_key']
|
|
75
|
+
|
|
76
|
+
# Update the private attribute
|
|
73
77
|
self._api_key = api_key
|
|
74
|
-
self.api_key = api_key
|
|
75
78
|
|
|
76
79
|
@cached_property
|
|
77
80
|
def api_key(self):
|
|
@@ -121,7 +124,7 @@ class BackendSpanExporter(TracingExporter):
|
|
|
121
124
|
logger.debug(f"Exported {len(items)} items")
|
|
122
125
|
return
|
|
123
126
|
|
|
124
|
-
# If the response is a client error (4xx), we
|
|
127
|
+
# If the response is a client error (4xx), we won't retry
|
|
125
128
|
if 400 <= response.status_code < 500:
|
|
126
129
|
logger.error(
|
|
127
130
|
f"[non-fatal] Tracing client error {response.status_code}: {response.text}"
|
|
@@ -183,7 +186,7 @@ class BatchTraceProcessor(TracingProcessor):
|
|
|
183
186
|
self._shutdown_event = threading.Event()
|
|
184
187
|
|
|
185
188
|
# The queue size threshold at which we export immediately.
|
|
186
|
-
self._export_trigger_size = int(max_queue_size * export_trigger_ratio)
|
|
189
|
+
self._export_trigger_size = max(1, int(max_queue_size * export_trigger_ratio))
|
|
187
190
|
|
|
188
191
|
# Track when we next *must* perform a scheduled export
|
|
189
192
|
self._next_export_time = time.time() + self._schedule_delay
|
|
@@ -269,8 +272,7 @@ class BatchTraceProcessor(TracingProcessor):
|
|
|
269
272
|
|
|
270
273
|
def _export_batches(self, force: bool = False):
|
|
271
274
|
"""Drains the queue and exports in batches. If force=True, export everything.
|
|
272
|
-
Otherwise, export up to `max_batch_size` repeatedly until the queue is empty
|
|
273
|
-
certain threshold.
|
|
275
|
+
Otherwise, export up to `max_batch_size` repeatedly until the queue is completely empty.
|
|
274
276
|
"""
|
|
275
277
|
while True:
|
|
276
278
|
items_to_export: list[Span[Any] | Trace] = []
|