openai-agents 0.0.19__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of openai-agents might be problematic. Click here for more details.

Files changed (43) hide show
  1. agents/__init__.py +5 -2
  2. agents/_run_impl.py +35 -1
  3. agents/agent.py +65 -29
  4. agents/extensions/models/litellm_model.py +7 -3
  5. agents/function_schema.py +11 -1
  6. agents/guardrail.py +5 -1
  7. agents/handoffs.py +14 -0
  8. agents/lifecycle.py +26 -17
  9. agents/mcp/__init__.py +13 -1
  10. agents/mcp/server.py +173 -16
  11. agents/mcp/util.py +89 -6
  12. agents/memory/__init__.py +3 -0
  13. agents/memory/session.py +369 -0
  14. agents/model_settings.py +60 -6
  15. agents/models/chatcmpl_converter.py +31 -2
  16. agents/models/chatcmpl_stream_handler.py +128 -16
  17. agents/models/openai_chatcompletions.py +12 -10
  18. agents/models/openai_responses.py +25 -8
  19. agents/realtime/README.md +3 -0
  20. agents/realtime/__init__.py +174 -0
  21. agents/realtime/agent.py +80 -0
  22. agents/realtime/config.py +128 -0
  23. agents/realtime/events.py +216 -0
  24. agents/realtime/items.py +91 -0
  25. agents/realtime/model.py +69 -0
  26. agents/realtime/model_events.py +159 -0
  27. agents/realtime/model_inputs.py +100 -0
  28. agents/realtime/openai_realtime.py +584 -0
  29. agents/realtime/runner.py +118 -0
  30. agents/realtime/session.py +502 -0
  31. agents/repl.py +1 -4
  32. agents/run.py +131 -10
  33. agents/tool.py +30 -6
  34. agents/tool_context.py +16 -3
  35. agents/tracing/__init__.py +1 -2
  36. agents/tracing/processor_interface.py +1 -1
  37. agents/voice/models/openai_stt.py +1 -1
  38. agents/voice/pipeline.py +6 -0
  39. agents/voice/workflow.py +8 -0
  40. {openai_agents-0.0.19.dist-info → openai_agents-0.2.0.dist-info}/METADATA +133 -8
  41. {openai_agents-0.0.19.dist-info → openai_agents-0.2.0.dist-info}/RECORD +43 -29
  42. {openai_agents-0.0.19.dist-info → openai_agents-0.2.0.dist-info}/WHEEL +0 -0
  43. {openai_agents-0.0.19.dist-info → openai_agents-0.2.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,100 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Any, Literal, Union
5
+
6
+ from typing_extensions import NotRequired, TypeAlias, TypedDict
7
+
8
+ from .config import RealtimeSessionModelSettings
9
+ from .model_events import RealtimeModelToolCallEvent
10
+
11
+
12
+ class RealtimeModelRawClientMessage(TypedDict):
13
+ """A raw message to be sent to the model."""
14
+
15
+ type: str # explicitly required
16
+ other_data: NotRequired[dict[str, Any]]
17
+ """Merged into the message body."""
18
+
19
+
20
+ class RealtimeModelInputTextContent(TypedDict):
21
+ """A piece of text to be sent to the model."""
22
+
23
+ type: Literal["input_text"]
24
+ text: str
25
+
26
+
27
+ class RealtimeModelUserInputMessage(TypedDict):
28
+ """A message to be sent to the model."""
29
+
30
+ type: Literal["message"]
31
+ role: Literal["user"]
32
+ content: list[RealtimeModelInputTextContent]
33
+
34
+
35
+ RealtimeModelUserInput: TypeAlias = Union[str, RealtimeModelUserInputMessage]
36
+ """A user input to be sent to the model."""
37
+
38
+
39
+ # Model messages
40
+
41
+
42
+ @dataclass
43
+ class RealtimeModelSendRawMessage:
44
+ """Send a raw message to the model."""
45
+
46
+ message: RealtimeModelRawClientMessage
47
+ """The message to send."""
48
+
49
+
50
+ @dataclass
51
+ class RealtimeModelSendUserInput:
52
+ """Send a user input to the model."""
53
+
54
+ user_input: RealtimeModelUserInput
55
+ """The user input to send."""
56
+
57
+
58
+ @dataclass
59
+ class RealtimeModelSendAudio:
60
+ """Send audio to the model."""
61
+
62
+ audio: bytes
63
+ commit: bool = False
64
+
65
+
66
+ @dataclass
67
+ class RealtimeModelSendToolOutput:
68
+ """Send tool output to the model."""
69
+
70
+ tool_call: RealtimeModelToolCallEvent
71
+ """The tool call to send."""
72
+
73
+ output: str
74
+ """The output to send."""
75
+
76
+ start_response: bool
77
+ """Whether to start a response."""
78
+
79
+
80
+ @dataclass
81
+ class RealtimeModelSendInterrupt:
82
+ """Send an interrupt to the model."""
83
+
84
+
85
+ @dataclass
86
+ class RealtimeModelSendSessionUpdate:
87
+ """Send a session update to the model."""
88
+
89
+ session_settings: RealtimeSessionModelSettings
90
+ """The updated session settings to send."""
91
+
92
+
93
+ RealtimeModelSendEvent: TypeAlias = Union[
94
+ RealtimeModelSendRawMessage,
95
+ RealtimeModelSendUserInput,
96
+ RealtimeModelSendAudio,
97
+ RealtimeModelSendToolOutput,
98
+ RealtimeModelSendInterrupt,
99
+ RealtimeModelSendSessionUpdate,
100
+ ]
@@ -0,0 +1,584 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import base64
5
+ import inspect
6
+ import json
7
+ import os
8
+ from datetime import datetime
9
+ from typing import Any, Callable, Literal
10
+
11
+ import pydantic
12
+ import websockets
13
+ from openai.types.beta.realtime.conversation_item import ConversationItem
14
+ from openai.types.beta.realtime.realtime_server_event import (
15
+ RealtimeServerEvent as OpenAIRealtimeServerEvent,
16
+ )
17
+ from openai.types.beta.realtime.response_audio_delta_event import ResponseAudioDeltaEvent
18
+ from openai.types.beta.realtime.session_update_event import (
19
+ Session as OpenAISessionObject,
20
+ SessionTool as OpenAISessionTool,
21
+ )
22
+ from pydantic import TypeAdapter
23
+ from typing_extensions import assert_never
24
+ from websockets.asyncio.client import ClientConnection
25
+
26
+ from agents.tool import FunctionTool, Tool
27
+ from agents.util._types import MaybeAwaitable
28
+
29
+ from ..exceptions import UserError
30
+ from ..logger import logger
31
+ from .config import (
32
+ RealtimeModelTracingConfig,
33
+ RealtimeSessionModelSettings,
34
+ )
35
+ from .items import RealtimeMessageItem, RealtimeToolCallItem
36
+ from .model import (
37
+ RealtimeModel,
38
+ RealtimeModelConfig,
39
+ RealtimeModelListener,
40
+ )
41
+ from .model_events import (
42
+ RealtimeModelAudioDoneEvent,
43
+ RealtimeModelAudioEvent,
44
+ RealtimeModelAudioInterruptedEvent,
45
+ RealtimeModelErrorEvent,
46
+ RealtimeModelEvent,
47
+ RealtimeModelExceptionEvent,
48
+ RealtimeModelInputAudioTranscriptionCompletedEvent,
49
+ RealtimeModelItemDeletedEvent,
50
+ RealtimeModelItemUpdatedEvent,
51
+ RealtimeModelToolCallEvent,
52
+ RealtimeModelTranscriptDeltaEvent,
53
+ RealtimeModelTurnEndedEvent,
54
+ RealtimeModelTurnStartedEvent,
55
+ )
56
+ from .model_inputs import (
57
+ RealtimeModelSendAudio,
58
+ RealtimeModelSendEvent,
59
+ RealtimeModelSendInterrupt,
60
+ RealtimeModelSendRawMessage,
61
+ RealtimeModelSendSessionUpdate,
62
+ RealtimeModelSendToolOutput,
63
+ RealtimeModelSendUserInput,
64
+ )
65
+
66
+ DEFAULT_MODEL_SETTINGS: RealtimeSessionModelSettings = {
67
+ "voice": "ash",
68
+ "modalities": ["text", "audio"],
69
+ "input_audio_format": "pcm16",
70
+ "output_audio_format": "pcm16",
71
+ "input_audio_transcription": {
72
+ "model": "gpt-4o-mini-transcribe",
73
+ },
74
+ "turn_detection": {"type": "semantic_vad"},
75
+ }
76
+
77
+
78
+ async def get_api_key(key: str | Callable[[], MaybeAwaitable[str]] | None) -> str | None:
79
+ if isinstance(key, str):
80
+ return key
81
+ elif callable(key):
82
+ result = key()
83
+ if inspect.isawaitable(result):
84
+ return await result
85
+ return result
86
+
87
+ return os.getenv("OPENAI_API_KEY")
88
+
89
+
90
+ class OpenAIRealtimeWebSocketModel(RealtimeModel):
91
+ """A model that uses OpenAI's WebSocket API."""
92
+
93
+ def __init__(self) -> None:
94
+ self.model = "gpt-4o-realtime-preview" # Default model
95
+ self._websocket: ClientConnection | None = None
96
+ self._websocket_task: asyncio.Task[None] | None = None
97
+ self._listeners: list[RealtimeModelListener] = []
98
+ self._current_item_id: str | None = None
99
+ self._audio_start_time: datetime | None = None
100
+ self._audio_length_ms: float = 0.0
101
+ self._ongoing_response: bool = False
102
+ self._current_audio_content_index: int | None = None
103
+ self._tracing_config: RealtimeModelTracingConfig | Literal["auto"] | None = None
104
+
105
+ async def connect(self, options: RealtimeModelConfig) -> None:
106
+ """Establish a connection to the model and keep it alive."""
107
+ assert self._websocket is None, "Already connected"
108
+ assert self._websocket_task is None, "Already connected"
109
+
110
+ model_settings: RealtimeSessionModelSettings = options.get("initial_model_settings", {})
111
+
112
+ self.model = model_settings.get("model_name", self.model)
113
+ api_key = await get_api_key(options.get("api_key"))
114
+
115
+ if "tracing" in model_settings:
116
+ self._tracing_config = model_settings["tracing"]
117
+ else:
118
+ self._tracing_config = "auto"
119
+
120
+ if not api_key:
121
+ raise UserError("API key is required but was not provided.")
122
+
123
+ url = options.get("url", f"wss://api.openai.com/v1/realtime?model={self.model}")
124
+
125
+ headers = {
126
+ "Authorization": f"Bearer {api_key}",
127
+ "OpenAI-Beta": "realtime=v1",
128
+ }
129
+ self._websocket = await websockets.connect(url, additional_headers=headers)
130
+ self._websocket_task = asyncio.create_task(self._listen_for_messages())
131
+ await self._update_session_config(model_settings)
132
+
133
+ async def _send_tracing_config(
134
+ self, tracing_config: RealtimeModelTracingConfig | Literal["auto"] | None
135
+ ) -> None:
136
+ """Update tracing configuration via session.update event."""
137
+ if tracing_config is not None:
138
+ await self._send_raw_message(
139
+ RealtimeModelSendRawMessage(
140
+ message={
141
+ "type": "session.update",
142
+ "other_data": {"session": {"tracing": tracing_config}},
143
+ }
144
+ )
145
+ )
146
+
147
+ def add_listener(self, listener: RealtimeModelListener) -> None:
148
+ """Add a listener to the model."""
149
+ if listener not in self._listeners:
150
+ self._listeners.append(listener)
151
+
152
+ def remove_listener(self, listener: RealtimeModelListener) -> None:
153
+ """Remove a listener from the model."""
154
+ if listener in self._listeners:
155
+ self._listeners.remove(listener)
156
+
157
+ async def _emit_event(self, event: RealtimeModelEvent) -> None:
158
+ """Emit an event to the listeners."""
159
+ for listener in self._listeners:
160
+ await listener.on_event(event)
161
+
162
+ async def _listen_for_messages(self):
163
+ assert self._websocket is not None, "Not connected"
164
+
165
+ try:
166
+ async for message in self._websocket:
167
+ try:
168
+ parsed = json.loads(message)
169
+ await self._handle_ws_event(parsed)
170
+ except json.JSONDecodeError as e:
171
+ await self._emit_event(
172
+ RealtimeModelExceptionEvent(
173
+ exception=e, context="Failed to parse WebSocket message as JSON"
174
+ )
175
+ )
176
+ except Exception as e:
177
+ await self._emit_event(
178
+ RealtimeModelExceptionEvent(
179
+ exception=e, context="Error handling WebSocket event"
180
+ )
181
+ )
182
+
183
+ except websockets.exceptions.ConnectionClosedOK:
184
+ # Normal connection closure - no exception event needed
185
+ logger.info("WebSocket connection closed normally")
186
+ except websockets.exceptions.ConnectionClosed as e:
187
+ await self._emit_event(
188
+ RealtimeModelExceptionEvent(
189
+ exception=e, context="WebSocket connection closed unexpectedly"
190
+ )
191
+ )
192
+ except Exception as e:
193
+ await self._emit_event(
194
+ RealtimeModelExceptionEvent(
195
+ exception=e, context="WebSocket error in message listener"
196
+ )
197
+ )
198
+
199
+ async def send_event(self, event: RealtimeModelSendEvent) -> None:
200
+ """Send an event to the model."""
201
+ if isinstance(event, RealtimeModelSendRawMessage):
202
+ await self._send_raw_message(event)
203
+ elif isinstance(event, RealtimeModelSendUserInput):
204
+ await self._send_user_input(event)
205
+ elif isinstance(event, RealtimeModelSendAudio):
206
+ await self._send_audio(event)
207
+ elif isinstance(event, RealtimeModelSendToolOutput):
208
+ await self._send_tool_output(event)
209
+ elif isinstance(event, RealtimeModelSendInterrupt):
210
+ await self._send_interrupt(event)
211
+ elif isinstance(event, RealtimeModelSendSessionUpdate):
212
+ await self._send_session_update(event)
213
+ else:
214
+ assert_never(event)
215
+ raise ValueError(f"Unknown event type: {type(event)}")
216
+
217
+ async def _send_raw_message(self, event: RealtimeModelSendRawMessage) -> None:
218
+ """Send a raw message to the model."""
219
+ assert self._websocket is not None, "Not connected"
220
+
221
+ converted_event = {
222
+ "type": event.message["type"],
223
+ }
224
+
225
+ converted_event.update(event.message.get("other_data", {}))
226
+
227
+ await self._websocket.send(json.dumps(converted_event))
228
+
229
+ async def _send_user_input(self, event: RealtimeModelSendUserInput) -> None:
230
+ message = (
231
+ event.user_input
232
+ if isinstance(event.user_input, dict)
233
+ else {
234
+ "type": "message",
235
+ "role": "user",
236
+ "content": [{"type": "input_text", "text": event.user_input}],
237
+ }
238
+ )
239
+ other_data = {
240
+ "item": message,
241
+ }
242
+
243
+ await self._send_raw_message(
244
+ RealtimeModelSendRawMessage(
245
+ message={"type": "conversation.item.create", "other_data": other_data}
246
+ )
247
+ )
248
+ await self._send_raw_message(
249
+ RealtimeModelSendRawMessage(message={"type": "response.create"})
250
+ )
251
+
252
+ async def _send_audio(self, event: RealtimeModelSendAudio) -> None:
253
+ base64_audio = base64.b64encode(event.audio).decode("utf-8")
254
+ await self._send_raw_message(
255
+ RealtimeModelSendRawMessage(
256
+ message={
257
+ "type": "input_audio_buffer.append",
258
+ "other_data": {
259
+ "audio": base64_audio,
260
+ },
261
+ }
262
+ )
263
+ )
264
+ if event.commit:
265
+ await self._send_raw_message(
266
+ RealtimeModelSendRawMessage(message={"type": "input_audio_buffer.commit"})
267
+ )
268
+
269
+ async def _send_tool_output(self, event: RealtimeModelSendToolOutput) -> None:
270
+ await self._send_raw_message(
271
+ RealtimeModelSendRawMessage(
272
+ message={
273
+ "type": "conversation.item.create",
274
+ "other_data": {
275
+ "item": {
276
+ "type": "function_call_output",
277
+ "output": event.output,
278
+ "call_id": event.tool_call.id,
279
+ },
280
+ },
281
+ }
282
+ )
283
+ )
284
+
285
+ tool_item = RealtimeToolCallItem(
286
+ item_id=event.tool_call.id or "",
287
+ previous_item_id=event.tool_call.previous_item_id,
288
+ type="function_call",
289
+ status="completed",
290
+ arguments=event.tool_call.arguments,
291
+ name=event.tool_call.name,
292
+ output=event.output,
293
+ )
294
+ await self._emit_event(RealtimeModelItemUpdatedEvent(item=tool_item))
295
+
296
+ if event.start_response:
297
+ await self._send_raw_message(
298
+ RealtimeModelSendRawMessage(message={"type": "response.create"})
299
+ )
300
+
301
+ async def _send_interrupt(self, event: RealtimeModelSendInterrupt) -> None:
302
+ if not self._current_item_id or not self._audio_start_time:
303
+ return
304
+
305
+ await self._cancel_response()
306
+
307
+ elapsed_time_ms = (datetime.now() - self._audio_start_time).total_seconds() * 1000
308
+ if elapsed_time_ms > 0 and elapsed_time_ms < self._audio_length_ms:
309
+ await self._emit_event(RealtimeModelAudioInterruptedEvent())
310
+ await self._send_raw_message(
311
+ RealtimeModelSendRawMessage(
312
+ message={
313
+ "type": "conversation.item.truncate",
314
+ "other_data": {
315
+ "item_id": self._current_item_id,
316
+ "content_index": self._current_audio_content_index,
317
+ "audio_end_ms": elapsed_time_ms,
318
+ },
319
+ }
320
+ )
321
+ )
322
+
323
+ self._current_item_id = None
324
+ self._audio_start_time = None
325
+ self._audio_length_ms = 0.0
326
+ self._current_audio_content_index = None
327
+
328
+ async def _send_session_update(self, event: RealtimeModelSendSessionUpdate) -> None:
329
+ """Send a session update to the model."""
330
+ await self._update_session_config(event.session_settings)
331
+
332
+ async def _handle_audio_delta(self, parsed: ResponseAudioDeltaEvent) -> None:
333
+ """Handle audio delta events and update audio tracking state."""
334
+ self._current_audio_content_index = parsed.content_index
335
+ self._current_item_id = parsed.item_id
336
+ if self._audio_start_time is None:
337
+ self._audio_start_time = datetime.now()
338
+ self._audio_length_ms = 0.0
339
+
340
+ audio_bytes = base64.b64decode(parsed.delta)
341
+ # Calculate audio length in ms using 24KHz pcm16le
342
+ self._audio_length_ms += self._calculate_audio_length_ms(audio_bytes)
343
+ await self._emit_event(
344
+ RealtimeModelAudioEvent(data=audio_bytes, response_id=parsed.response_id)
345
+ )
346
+
347
+ def _calculate_audio_length_ms(self, audio_bytes: bytes) -> float:
348
+ """Calculate audio length in milliseconds for 24KHz PCM16LE format."""
349
+ return len(audio_bytes) / 24 / 2
350
+
351
+ async def _handle_output_item(self, item: ConversationItem) -> None:
352
+ """Handle response output item events (function calls and messages)."""
353
+ if item.type == "function_call" and item.status == "completed":
354
+ tool_call = RealtimeToolCallItem(
355
+ item_id=item.id or "",
356
+ previous_item_id=None,
357
+ type="function_call",
358
+ # We use the same item for tool call and output, so it will be completed by the
359
+ # output being added
360
+ status="in_progress",
361
+ arguments=item.arguments or "",
362
+ name=item.name or "",
363
+ output=None,
364
+ )
365
+ await self._emit_event(RealtimeModelItemUpdatedEvent(item=tool_call))
366
+ await self._emit_event(
367
+ RealtimeModelToolCallEvent(
368
+ call_id=item.id or "",
369
+ name=item.name or "",
370
+ arguments=item.arguments or "",
371
+ id=item.id or "",
372
+ )
373
+ )
374
+ elif item.type == "message":
375
+ # Handle message items from output_item events (no previous_item_id)
376
+ message_item: RealtimeMessageItem = TypeAdapter(RealtimeMessageItem).validate_python(
377
+ {
378
+ "item_id": item.id or "",
379
+ "type": item.type,
380
+ "role": item.role,
381
+ "content": item.content,
382
+ "status": "in_progress",
383
+ }
384
+ )
385
+ await self._emit_event(RealtimeModelItemUpdatedEvent(item=message_item))
386
+
387
+ async def _handle_conversation_item(
388
+ self, item: ConversationItem, previous_item_id: str | None
389
+ ) -> None:
390
+ """Handle conversation item creation/retrieval events."""
391
+ message_item = _ConversionHelper.conversation_item_to_realtime_message_item(
392
+ item, previous_item_id
393
+ )
394
+ await self._emit_event(RealtimeModelItemUpdatedEvent(item=message_item))
395
+
396
+ async def close(self) -> None:
397
+ """Close the session."""
398
+ if self._websocket:
399
+ await self._websocket.close()
400
+ self._websocket = None
401
+ if self._websocket_task:
402
+ self._websocket_task.cancel()
403
+ self._websocket_task = None
404
+
405
+ async def _cancel_response(self) -> None:
406
+ if self._ongoing_response:
407
+ await self._send_raw_message(
408
+ RealtimeModelSendRawMessage(message={"type": "response.cancel"})
409
+ )
410
+ self._ongoing_response = False
411
+
412
+ async def _handle_ws_event(self, event: dict[str, Any]):
413
+ try:
414
+ if "previous_item_id" in event and event["previous_item_id"] is None:
415
+ event["previous_item_id"] = "" # TODO (rm) remove
416
+ parsed: OpenAIRealtimeServerEvent = TypeAdapter(
417
+ OpenAIRealtimeServerEvent
418
+ ).validate_python(event)
419
+ except pydantic.ValidationError as e:
420
+ logger.error(f"Failed to validate server event: {event}", exc_info=True)
421
+ await self._emit_event(
422
+ RealtimeModelErrorEvent(
423
+ error=e,
424
+ )
425
+ )
426
+ return
427
+ except Exception as e:
428
+ event_type = event.get("type", "unknown") if isinstance(event, dict) else "unknown"
429
+ logger.error(f"Failed to validate server event: {event}", exc_info=True)
430
+ await self._emit_event(
431
+ RealtimeModelExceptionEvent(
432
+ exception=e,
433
+ context=f"Failed to validate server event: {event_type}",
434
+ )
435
+ )
436
+ return
437
+
438
+ if parsed.type == "response.audio.delta":
439
+ await self._handle_audio_delta(parsed)
440
+ elif parsed.type == "response.audio.done":
441
+ await self._emit_event(RealtimeModelAudioDoneEvent())
442
+ elif parsed.type == "input_audio_buffer.speech_started":
443
+ await self._send_interrupt(RealtimeModelSendInterrupt())
444
+ elif parsed.type == "response.created":
445
+ self._ongoing_response = True
446
+ await self._emit_event(RealtimeModelTurnStartedEvent())
447
+ elif parsed.type == "response.done":
448
+ self._ongoing_response = False
449
+ await self._emit_event(RealtimeModelTurnEndedEvent())
450
+ elif parsed.type == "session.created":
451
+ await self._send_tracing_config(self._tracing_config)
452
+ elif parsed.type == "error":
453
+ await self._emit_event(RealtimeModelErrorEvent(error=parsed.error))
454
+ elif parsed.type == "conversation.item.deleted":
455
+ await self._emit_event(RealtimeModelItemDeletedEvent(item_id=parsed.item_id))
456
+ elif (
457
+ parsed.type == "conversation.item.created"
458
+ or parsed.type == "conversation.item.retrieved"
459
+ ):
460
+ previous_item_id = (
461
+ parsed.previous_item_id if parsed.type == "conversation.item.created" else None
462
+ )
463
+ if parsed.item.type == "message":
464
+ await self._handle_conversation_item(parsed.item, previous_item_id)
465
+ elif (
466
+ parsed.type == "conversation.item.input_audio_transcription.completed"
467
+ or parsed.type == "conversation.item.truncated"
468
+ ):
469
+ await self._send_raw_message(
470
+ RealtimeModelSendRawMessage(
471
+ message={
472
+ "type": "conversation.item.retrieve",
473
+ "other_data": {
474
+ "item_id": self._current_item_id,
475
+ },
476
+ }
477
+ )
478
+ )
479
+ if parsed.type == "conversation.item.input_audio_transcription.completed":
480
+ await self._emit_event(
481
+ RealtimeModelInputAudioTranscriptionCompletedEvent(
482
+ item_id=parsed.item_id, transcript=parsed.transcript
483
+ )
484
+ )
485
+ elif parsed.type == "response.audio_transcript.delta":
486
+ await self._emit_event(
487
+ RealtimeModelTranscriptDeltaEvent(
488
+ item_id=parsed.item_id, delta=parsed.delta, response_id=parsed.response_id
489
+ )
490
+ )
491
+ elif (
492
+ parsed.type == "conversation.item.input_audio_transcription.delta"
493
+ or parsed.type == "response.text.delta"
494
+ or parsed.type == "response.function_call_arguments.delta"
495
+ ):
496
+ # No support for partials yet
497
+ pass
498
+ elif (
499
+ parsed.type == "response.output_item.added"
500
+ or parsed.type == "response.output_item.done"
501
+ ):
502
+ await self._handle_output_item(parsed.item)
503
+
504
+ async def _update_session_config(self, model_settings: RealtimeSessionModelSettings) -> None:
505
+ session_config = self._get_session_config(model_settings)
506
+ await self._send_raw_message(
507
+ RealtimeModelSendRawMessage(
508
+ message={
509
+ "type": "session.update",
510
+ "other_data": {
511
+ "session": session_config.model_dump(exclude_unset=True, exclude_none=True)
512
+ },
513
+ }
514
+ )
515
+ )
516
+
517
+ def _get_session_config(
518
+ self, model_settings: RealtimeSessionModelSettings
519
+ ) -> OpenAISessionObject:
520
+ """Get the session config."""
521
+ return OpenAISessionObject(
522
+ instructions=model_settings.get("instructions", None),
523
+ model=(
524
+ model_settings.get("model_name", self.model) # type: ignore
525
+ or DEFAULT_MODEL_SETTINGS.get("model_name")
526
+ ),
527
+ voice=model_settings.get("voice", DEFAULT_MODEL_SETTINGS.get("voice")),
528
+ modalities=model_settings.get("modalities", DEFAULT_MODEL_SETTINGS.get("modalities")),
529
+ input_audio_format=model_settings.get(
530
+ "input_audio_format",
531
+ DEFAULT_MODEL_SETTINGS.get("input_audio_format"), # type: ignore
532
+ ),
533
+ output_audio_format=model_settings.get(
534
+ "output_audio_format",
535
+ DEFAULT_MODEL_SETTINGS.get("output_audio_format"), # type: ignore
536
+ ),
537
+ input_audio_transcription=model_settings.get(
538
+ "input_audio_transcription",
539
+ DEFAULT_MODEL_SETTINGS.get("input_audio_transcription"), # type: ignore
540
+ ),
541
+ turn_detection=model_settings.get(
542
+ "turn_detection",
543
+ DEFAULT_MODEL_SETTINGS.get("turn_detection"), # type: ignore
544
+ ),
545
+ tool_choice=model_settings.get(
546
+ "tool_choice",
547
+ DEFAULT_MODEL_SETTINGS.get("tool_choice"), # type: ignore
548
+ ),
549
+ tools=self._tools_to_session_tools(model_settings.get("tools", [])),
550
+ )
551
+
552
+ def _tools_to_session_tools(self, tools: list[Tool]) -> list[OpenAISessionTool]:
553
+ converted_tools: list[OpenAISessionTool] = []
554
+ for tool in tools:
555
+ if not isinstance(tool, FunctionTool):
556
+ raise UserError(f"Tool {tool.name} is unsupported. Must be a function tool.")
557
+ converted_tools.append(
558
+ OpenAISessionTool(
559
+ name=tool.name,
560
+ description=tool.description,
561
+ parameters=tool.params_json_schema,
562
+ type="function",
563
+ )
564
+ )
565
+ return converted_tools
566
+
567
+
568
+ class _ConversionHelper:
569
+ @classmethod
570
+ def conversation_item_to_realtime_message_item(
571
+ cls, item: ConversationItem, previous_item_id: str | None
572
+ ) -> RealtimeMessageItem:
573
+ return TypeAdapter(RealtimeMessageItem).validate_python(
574
+ {
575
+ "item_id": item.id or "",
576
+ "previous_item_id": previous_item_id,
577
+ "type": item.type,
578
+ "role": item.role,
579
+ "content": (
580
+ [content.model_dump() for content in item.content] if item.content else []
581
+ ),
582
+ "status": "in_progress",
583
+ },
584
+ )