openai-agents 0.2.7__py3-none-any.whl → 0.2.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of openai-agents might be problematic. Click here for more details.
- agents/_run_impl.py +4 -1
- agents/agent.py +36 -4
- agents/extensions/memory/__init__.py +15 -0
- agents/extensions/memory/sqlalchemy_session.py +298 -0
- agents/extensions/models/litellm_model.py +4 -2
- agents/extensions/models/litellm_provider.py +3 -1
- agents/function_schema.py +2 -2
- agents/items.py +1 -2
- agents/lifecycle.py +40 -1
- agents/mcp/server.py +59 -8
- agents/model_settings.py +4 -1
- agents/models/__init__.py +13 -0
- agents/models/chatcmpl_converter.py +5 -0
- agents/models/default_models.py +58 -0
- agents/models/openai_provider.py +3 -1
- agents/realtime/config.py +3 -0
- agents/realtime/events.py +11 -0
- agents/realtime/model_events.py +10 -0
- agents/realtime/openai_realtime.py +31 -5
- agents/realtime/session.py +61 -2
- agents/repl.py +7 -3
- agents/run.py +127 -11
- agents/tool.py +5 -1
- {openai_agents-0.2.7.dist-info → openai_agents-0.2.9.dist-info}/METADATA +15 -13
- {openai_agents-0.2.7.dist-info → openai_agents-0.2.9.dist-info}/RECORD +27 -24
- {openai_agents-0.2.7.dist-info → openai_agents-0.2.9.dist-info}/WHEEL +0 -0
- {openai_agents-0.2.7.dist-info → openai_agents-0.2.9.dist-info}/licenses/LICENSE +0 -0
agents/mcp/server.py
CHANGED
|
@@ -3,10 +3,11 @@ from __future__ import annotations
|
|
|
3
3
|
import abc
|
|
4
4
|
import asyncio
|
|
5
5
|
import inspect
|
|
6
|
+
from collections.abc import Awaitable
|
|
6
7
|
from contextlib import AbstractAsyncContextManager, AsyncExitStack
|
|
7
8
|
from datetime import timedelta
|
|
8
9
|
from pathlib import Path
|
|
9
|
-
from typing import TYPE_CHECKING, Any, Literal,
|
|
10
|
+
from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar
|
|
10
11
|
|
|
11
12
|
from anyio.streams.memory import MemoryObjectReceiveStream, MemoryObjectSendStream
|
|
12
13
|
from mcp import ClientSession, StdioServerParameters, Tool as MCPTool, stdio_client
|
|
@@ -19,7 +20,9 @@ from typing_extensions import NotRequired, TypedDict
|
|
|
19
20
|
from ..exceptions import UserError
|
|
20
21
|
from ..logger import logger
|
|
21
22
|
from ..run_context import RunContextWrapper
|
|
22
|
-
from .util import ToolFilter,
|
|
23
|
+
from .util import ToolFilter, ToolFilterContext, ToolFilterStatic
|
|
24
|
+
|
|
25
|
+
T = TypeVar("T")
|
|
23
26
|
|
|
24
27
|
if TYPE_CHECKING:
|
|
25
28
|
from ..agent import AgentBase
|
|
@@ -98,6 +101,8 @@ class _MCPServerWithClientSession(MCPServer, abc.ABC):
|
|
|
98
101
|
client_session_timeout_seconds: float | None,
|
|
99
102
|
tool_filter: ToolFilter = None,
|
|
100
103
|
use_structured_content: bool = False,
|
|
104
|
+
max_retry_attempts: int = 0,
|
|
105
|
+
retry_backoff_seconds_base: float = 1.0,
|
|
101
106
|
):
|
|
102
107
|
"""
|
|
103
108
|
Args:
|
|
@@ -115,6 +120,10 @@ class _MCPServerWithClientSession(MCPServer, abc.ABC):
|
|
|
115
120
|
include the structured content in the `tool_result.content`, and using it by
|
|
116
121
|
default will cause duplicate content. You can set this to True if you know the
|
|
117
122
|
server will not duplicate the structured content in the `tool_result.content`.
|
|
123
|
+
max_retry_attempts: Number of times to retry failed list_tools/call_tool calls.
|
|
124
|
+
Defaults to no retries.
|
|
125
|
+
retry_backoff_seconds_base: The base delay, in seconds, used for exponential
|
|
126
|
+
backoff between retries.
|
|
118
127
|
"""
|
|
119
128
|
super().__init__(use_structured_content=use_structured_content)
|
|
120
129
|
self.session: ClientSession | None = None
|
|
@@ -124,6 +133,8 @@ class _MCPServerWithClientSession(MCPServer, abc.ABC):
|
|
|
124
133
|
self.server_initialize_result: InitializeResult | None = None
|
|
125
134
|
|
|
126
135
|
self.client_session_timeout_seconds = client_session_timeout_seconds
|
|
136
|
+
self.max_retry_attempts = max_retry_attempts
|
|
137
|
+
self.retry_backoff_seconds_base = retry_backoff_seconds_base
|
|
127
138
|
|
|
128
139
|
# The cache is always dirty at startup, so that we fetch tools at least once
|
|
129
140
|
self._cache_dirty = True
|
|
@@ -175,10 +186,10 @@ class _MCPServerWithClientSession(MCPServer, abc.ABC):
|
|
|
175
186
|
) -> list[MCPTool]:
|
|
176
187
|
"""Apply dynamic tool filtering using a callable filter function."""
|
|
177
188
|
|
|
178
|
-
# Ensure we have a callable filter
|
|
189
|
+
# Ensure we have a callable filter
|
|
179
190
|
if not callable(self.tool_filter):
|
|
180
191
|
raise ValueError("Tool filter must be callable for dynamic filtering")
|
|
181
|
-
tool_filter_func =
|
|
192
|
+
tool_filter_func = self.tool_filter
|
|
182
193
|
|
|
183
194
|
# Create filter context
|
|
184
195
|
filter_context = ToolFilterContext(
|
|
@@ -233,6 +244,18 @@ class _MCPServerWithClientSession(MCPServer, abc.ABC):
|
|
|
233
244
|
"""Invalidate the tools cache."""
|
|
234
245
|
self._cache_dirty = True
|
|
235
246
|
|
|
247
|
+
async def _run_with_retries(self, func: Callable[[], Awaitable[T]]) -> T:
|
|
248
|
+
attempts = 0
|
|
249
|
+
while True:
|
|
250
|
+
try:
|
|
251
|
+
return await func()
|
|
252
|
+
except Exception:
|
|
253
|
+
attempts += 1
|
|
254
|
+
if self.max_retry_attempts != -1 and attempts > self.max_retry_attempts:
|
|
255
|
+
raise
|
|
256
|
+
backoff = self.retry_backoff_seconds_base * (2 ** (attempts - 1))
|
|
257
|
+
await asyncio.sleep(backoff)
|
|
258
|
+
|
|
236
259
|
async def connect(self):
|
|
237
260
|
"""Connect to the server."""
|
|
238
261
|
try:
|
|
@@ -267,15 +290,17 @@ class _MCPServerWithClientSession(MCPServer, abc.ABC):
|
|
|
267
290
|
"""List the tools available on the server."""
|
|
268
291
|
if not self.session:
|
|
269
292
|
raise UserError("Server not initialized. Make sure you call `connect()` first.")
|
|
293
|
+
session = self.session
|
|
294
|
+
assert session is not None
|
|
270
295
|
|
|
271
296
|
# Return from cache if caching is enabled, we have tools, and the cache is not dirty
|
|
272
297
|
if self.cache_tools_list and not self._cache_dirty and self._tools_list:
|
|
273
298
|
tools = self._tools_list
|
|
274
299
|
else:
|
|
275
|
-
# Reset the cache dirty to False
|
|
276
|
-
self._cache_dirty = False
|
|
277
300
|
# Fetch the tools from the server
|
|
278
|
-
|
|
301
|
+
result = await self._run_with_retries(lambda: session.list_tools())
|
|
302
|
+
self._tools_list = result.tools
|
|
303
|
+
self._cache_dirty = False
|
|
279
304
|
tools = self._tools_list
|
|
280
305
|
|
|
281
306
|
# Filter tools based on tool_filter
|
|
@@ -290,8 +315,10 @@ class _MCPServerWithClientSession(MCPServer, abc.ABC):
|
|
|
290
315
|
"""Invoke a tool on the server."""
|
|
291
316
|
if not self.session:
|
|
292
317
|
raise UserError("Server not initialized. Make sure you call `connect()` first.")
|
|
318
|
+
session = self.session
|
|
319
|
+
assert session is not None
|
|
293
320
|
|
|
294
|
-
return await self.session.call_tool(tool_name, arguments)
|
|
321
|
+
return await self._run_with_retries(lambda: session.call_tool(tool_name, arguments))
|
|
295
322
|
|
|
296
323
|
async def list_prompts(
|
|
297
324
|
self,
|
|
@@ -365,6 +392,8 @@ class MCPServerStdio(_MCPServerWithClientSession):
|
|
|
365
392
|
client_session_timeout_seconds: float | None = 5,
|
|
366
393
|
tool_filter: ToolFilter = None,
|
|
367
394
|
use_structured_content: bool = False,
|
|
395
|
+
max_retry_attempts: int = 0,
|
|
396
|
+
retry_backoff_seconds_base: float = 1.0,
|
|
368
397
|
):
|
|
369
398
|
"""Create a new MCP server based on the stdio transport.
|
|
370
399
|
|
|
@@ -388,12 +417,18 @@ class MCPServerStdio(_MCPServerWithClientSession):
|
|
|
388
417
|
include the structured content in the `tool_result.content`, and using it by
|
|
389
418
|
default will cause duplicate content. You can set this to True if you know the
|
|
390
419
|
server will not duplicate the structured content in the `tool_result.content`.
|
|
420
|
+
max_retry_attempts: Number of times to retry failed list_tools/call_tool calls.
|
|
421
|
+
Defaults to no retries.
|
|
422
|
+
retry_backoff_seconds_base: The base delay, in seconds, for exponential
|
|
423
|
+
backoff between retries.
|
|
391
424
|
"""
|
|
392
425
|
super().__init__(
|
|
393
426
|
cache_tools_list,
|
|
394
427
|
client_session_timeout_seconds,
|
|
395
428
|
tool_filter,
|
|
396
429
|
use_structured_content,
|
|
430
|
+
max_retry_attempts,
|
|
431
|
+
retry_backoff_seconds_base,
|
|
397
432
|
)
|
|
398
433
|
|
|
399
434
|
self.params = StdioServerParameters(
|
|
@@ -455,6 +490,8 @@ class MCPServerSse(_MCPServerWithClientSession):
|
|
|
455
490
|
client_session_timeout_seconds: float | None = 5,
|
|
456
491
|
tool_filter: ToolFilter = None,
|
|
457
492
|
use_structured_content: bool = False,
|
|
493
|
+
max_retry_attempts: int = 0,
|
|
494
|
+
retry_backoff_seconds_base: float = 1.0,
|
|
458
495
|
):
|
|
459
496
|
"""Create a new MCP server based on the HTTP with SSE transport.
|
|
460
497
|
|
|
@@ -480,12 +517,18 @@ class MCPServerSse(_MCPServerWithClientSession):
|
|
|
480
517
|
include the structured content in the `tool_result.content`, and using it by
|
|
481
518
|
default will cause duplicate content. You can set this to True if you know the
|
|
482
519
|
server will not duplicate the structured content in the `tool_result.content`.
|
|
520
|
+
max_retry_attempts: Number of times to retry failed list_tools/call_tool calls.
|
|
521
|
+
Defaults to no retries.
|
|
522
|
+
retry_backoff_seconds_base: The base delay, in seconds, for exponential
|
|
523
|
+
backoff between retries.
|
|
483
524
|
"""
|
|
484
525
|
super().__init__(
|
|
485
526
|
cache_tools_list,
|
|
486
527
|
client_session_timeout_seconds,
|
|
487
528
|
tool_filter,
|
|
488
529
|
use_structured_content,
|
|
530
|
+
max_retry_attempts,
|
|
531
|
+
retry_backoff_seconds_base,
|
|
489
532
|
)
|
|
490
533
|
|
|
491
534
|
self.params = params
|
|
@@ -547,6 +590,8 @@ class MCPServerStreamableHttp(_MCPServerWithClientSession):
|
|
|
547
590
|
client_session_timeout_seconds: float | None = 5,
|
|
548
591
|
tool_filter: ToolFilter = None,
|
|
549
592
|
use_structured_content: bool = False,
|
|
593
|
+
max_retry_attempts: int = 0,
|
|
594
|
+
retry_backoff_seconds_base: float = 1.0,
|
|
550
595
|
):
|
|
551
596
|
"""Create a new MCP server based on the Streamable HTTP transport.
|
|
552
597
|
|
|
@@ -573,12 +618,18 @@ class MCPServerStreamableHttp(_MCPServerWithClientSession):
|
|
|
573
618
|
include the structured content in the `tool_result.content`, and using it by
|
|
574
619
|
default will cause duplicate content. You can set this to True if you know the
|
|
575
620
|
server will not duplicate the structured content in the `tool_result.content`.
|
|
621
|
+
max_retry_attempts: Number of times to retry failed list_tools/call_tool calls.
|
|
622
|
+
Defaults to no retries.
|
|
623
|
+
retry_backoff_seconds_base: The base delay, in seconds, for exponential
|
|
624
|
+
backoff between retries.
|
|
576
625
|
"""
|
|
577
626
|
super().__init__(
|
|
578
627
|
cache_tools_list,
|
|
579
628
|
client_session_timeout_seconds,
|
|
580
629
|
tool_filter,
|
|
581
630
|
use_structured_content,
|
|
631
|
+
max_retry_attempts,
|
|
632
|
+
retry_backoff_seconds_base,
|
|
582
633
|
)
|
|
583
634
|
|
|
584
635
|
self.params = params
|
agents/model_settings.py
CHANGED
|
@@ -92,7 +92,10 @@ class ModelSettings:
|
|
|
92
92
|
"""
|
|
93
93
|
|
|
94
94
|
truncation: Literal["auto", "disabled"] | None = None
|
|
95
|
-
"""The truncation strategy to use when calling the model.
|
|
95
|
+
"""The truncation strategy to use when calling the model.
|
|
96
|
+
See [Responses API documentation](https://platform.openai.com/docs/api-reference/responses/create#responses_create-truncation)
|
|
97
|
+
for more details.
|
|
98
|
+
"""
|
|
96
99
|
|
|
97
100
|
max_tokens: int | None = None
|
|
98
101
|
"""The maximum number of output tokens to generate."""
|
agents/models/__init__.py
CHANGED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from .default_models import (
|
|
2
|
+
get_default_model,
|
|
3
|
+
get_default_model_settings,
|
|
4
|
+
gpt_5_reasoning_settings_required,
|
|
5
|
+
is_gpt_5_default,
|
|
6
|
+
)
|
|
7
|
+
|
|
8
|
+
__all__ = [
|
|
9
|
+
"get_default_model",
|
|
10
|
+
"get_default_model_settings",
|
|
11
|
+
"gpt_5_reasoning_settings_required",
|
|
12
|
+
"is_gpt_5_default",
|
|
13
|
+
]
|
|
@@ -271,11 +271,16 @@ class Converter:
|
|
|
271
271
|
raise UserError(
|
|
272
272
|
f"Only file_data is supported for input_file {casted_file_param}"
|
|
273
273
|
)
|
|
274
|
+
if "filename" not in casted_file_param or not casted_file_param["filename"]:
|
|
275
|
+
raise UserError(
|
|
276
|
+
f"filename must be provided for input_file {casted_file_param}"
|
|
277
|
+
)
|
|
274
278
|
out.append(
|
|
275
279
|
File(
|
|
276
280
|
type="file",
|
|
277
281
|
file=FileFile(
|
|
278
282
|
file_data=casted_file_param["file_data"],
|
|
283
|
+
filename=casted_file_param["filename"],
|
|
279
284
|
),
|
|
280
285
|
)
|
|
281
286
|
)
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import copy
|
|
2
|
+
import os
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
5
|
+
from openai.types.shared.reasoning import Reasoning
|
|
6
|
+
|
|
7
|
+
from agents.model_settings import ModelSettings
|
|
8
|
+
|
|
9
|
+
OPENAI_DEFAULT_MODEL_ENV_VARIABLE_NAME = "OPENAI_DEFAULT_MODEL"
|
|
10
|
+
|
|
11
|
+
# discourage directly accessing this constant
|
|
12
|
+
# use the get_default_model and get_default_model_settings() functions instead
|
|
13
|
+
_GPT_5_DEFAULT_MODEL_SETTINGS: ModelSettings = ModelSettings(
|
|
14
|
+
# We chose "low" instead of "minimal" because some of the built-in tools
|
|
15
|
+
# (e.g., file search, image generation, etc.) do not support "minimal"
|
|
16
|
+
# If you want to use "minimal" reasoning effort, you can pass your own model settings
|
|
17
|
+
reasoning=Reasoning(effort="low"),
|
|
18
|
+
verbosity="low",
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def gpt_5_reasoning_settings_required(model_name: str) -> bool:
|
|
23
|
+
"""
|
|
24
|
+
Returns True if the model name is a GPT-5 model and reasoning settings are required.
|
|
25
|
+
"""
|
|
26
|
+
if model_name.startswith("gpt-5-chat"):
|
|
27
|
+
# gpt-5-chat-latest does not require reasoning settings
|
|
28
|
+
return False
|
|
29
|
+
# matches any of gpt-5 models
|
|
30
|
+
return model_name.startswith("gpt-5")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def is_gpt_5_default() -> bool:
|
|
34
|
+
"""
|
|
35
|
+
Returns True if the default model is a GPT-5 model.
|
|
36
|
+
This is used to determine if the default model settings are compatible with GPT-5 models.
|
|
37
|
+
If the default model is not a GPT-5 model, the model settings are compatible with other models.
|
|
38
|
+
"""
|
|
39
|
+
return gpt_5_reasoning_settings_required(get_default_model())
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def get_default_model() -> str:
|
|
43
|
+
"""
|
|
44
|
+
Returns the default model name.
|
|
45
|
+
"""
|
|
46
|
+
return os.getenv(OPENAI_DEFAULT_MODEL_ENV_VARIABLE_NAME, "gpt-4.1").lower()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def get_default_model_settings(model: Optional[str] = None) -> ModelSettings:
|
|
50
|
+
"""
|
|
51
|
+
Returns the default model settings.
|
|
52
|
+
If the default model is a GPT-5 model, returns the GPT-5 default model settings.
|
|
53
|
+
Otherwise, returns the legacy default model settings.
|
|
54
|
+
"""
|
|
55
|
+
_model = model if model is not None else get_default_model()
|
|
56
|
+
if gpt_5_reasoning_settings_required(_model):
|
|
57
|
+
return copy.deepcopy(_GPT_5_DEFAULT_MODEL_SETTINGS)
|
|
58
|
+
return ModelSettings()
|
agents/models/openai_provider.py
CHANGED
|
@@ -4,10 +4,12 @@ import httpx
|
|
|
4
4
|
from openai import AsyncOpenAI, DefaultAsyncHttpxClient
|
|
5
5
|
|
|
6
6
|
from . import _openai_shared
|
|
7
|
+
from .default_models import get_default_model
|
|
7
8
|
from .interface import Model, ModelProvider
|
|
8
9
|
from .openai_chatcompletions import OpenAIChatCompletionsModel
|
|
9
10
|
from .openai_responses import OpenAIResponsesModel
|
|
10
11
|
|
|
12
|
+
# This is kept for backward compatiblity but using get_default_model() method is recommended.
|
|
11
13
|
DEFAULT_MODEL: str = "gpt-4o"
|
|
12
14
|
|
|
13
15
|
|
|
@@ -80,7 +82,7 @@ class OpenAIProvider(ModelProvider):
|
|
|
80
82
|
|
|
81
83
|
def get_model(self, model_name: str | None) -> Model:
|
|
82
84
|
if model_name is None:
|
|
83
|
-
model_name =
|
|
85
|
+
model_name = get_default_model()
|
|
84
86
|
|
|
85
87
|
client = self._get_client()
|
|
86
88
|
|
agents/realtime/config.py
CHANGED
|
@@ -78,6 +78,9 @@ class RealtimeTurnDetectionConfig(TypedDict):
|
|
|
78
78
|
threshold: NotRequired[float]
|
|
79
79
|
"""The threshold for voice activity detection."""
|
|
80
80
|
|
|
81
|
+
idle_timeout_ms: NotRequired[int]
|
|
82
|
+
"""Threshold for server-vad to trigger a response if the user is idle for this duration."""
|
|
83
|
+
|
|
81
84
|
|
|
82
85
|
class RealtimeSessionModelSettings(TypedDict):
|
|
83
86
|
"""Model settings for a realtime model session."""
|
agents/realtime/events.py
CHANGED
|
@@ -216,6 +216,16 @@ class RealtimeGuardrailTripped:
|
|
|
216
216
|
type: Literal["guardrail_tripped"] = "guardrail_tripped"
|
|
217
217
|
|
|
218
218
|
|
|
219
|
+
@dataclass
|
|
220
|
+
class RealtimeInputAudioTimeoutTriggered:
|
|
221
|
+
"""Called when the model detects a period of inactivity/silence from the user."""
|
|
222
|
+
|
|
223
|
+
info: RealtimeEventInfo
|
|
224
|
+
"""Common info for all events, such as the context."""
|
|
225
|
+
|
|
226
|
+
type: Literal["input_audio_timeout_triggered"] = "input_audio_timeout_triggered"
|
|
227
|
+
|
|
228
|
+
|
|
219
229
|
RealtimeSessionEvent: TypeAlias = Union[
|
|
220
230
|
RealtimeAgentStartEvent,
|
|
221
231
|
RealtimeAgentEndEvent,
|
|
@@ -230,5 +240,6 @@ RealtimeSessionEvent: TypeAlias = Union[
|
|
|
230
240
|
RealtimeHistoryUpdated,
|
|
231
241
|
RealtimeHistoryAdded,
|
|
232
242
|
RealtimeGuardrailTripped,
|
|
243
|
+
RealtimeInputAudioTimeoutTriggered,
|
|
233
244
|
]
|
|
234
245
|
"""An event emitted by the realtime session."""
|
agents/realtime/model_events.py
CHANGED
|
@@ -84,6 +84,15 @@ class RealtimeModelInputAudioTranscriptionCompletedEvent:
|
|
|
84
84
|
|
|
85
85
|
type: Literal["input_audio_transcription_completed"] = "input_audio_transcription_completed"
|
|
86
86
|
|
|
87
|
+
@dataclass
|
|
88
|
+
class RealtimeModelInputAudioTimeoutTriggeredEvent:
|
|
89
|
+
"""Input audio timeout triggered."""
|
|
90
|
+
|
|
91
|
+
item_id: str
|
|
92
|
+
audio_start_ms: int
|
|
93
|
+
audio_end_ms: int
|
|
94
|
+
|
|
95
|
+
type: Literal["input_audio_timeout_triggered"] = "input_audio_timeout_triggered"
|
|
87
96
|
|
|
88
97
|
@dataclass
|
|
89
98
|
class RealtimeModelTranscriptDeltaEvent:
|
|
@@ -174,6 +183,7 @@ RealtimeModelEvent: TypeAlias = Union[
|
|
|
174
183
|
RealtimeModelAudioEvent,
|
|
175
184
|
RealtimeModelAudioInterruptedEvent,
|
|
176
185
|
RealtimeModelAudioDoneEvent,
|
|
186
|
+
RealtimeModelInputAudioTimeoutTriggeredEvent,
|
|
177
187
|
RealtimeModelInputAudioTranscriptionCompletedEvent,
|
|
178
188
|
RealtimeModelTranscriptDeltaEvent,
|
|
179
189
|
RealtimeModelItemUpdatedEvent,
|
|
@@ -6,7 +6,7 @@ import inspect
|
|
|
6
6
|
import json
|
|
7
7
|
import os
|
|
8
8
|
from datetime import datetime
|
|
9
|
-
from typing import Any, Callable, Literal
|
|
9
|
+
from typing import Annotated, Any, Callable, Literal, Union
|
|
10
10
|
|
|
11
11
|
import pydantic
|
|
12
12
|
import websockets
|
|
@@ -52,7 +52,7 @@ from openai.types.beta.realtime.session_update_event import (
|
|
|
52
52
|
SessionTracingTracingConfiguration as OpenAISessionTracingConfiguration,
|
|
53
53
|
SessionUpdateEvent as OpenAISessionUpdateEvent,
|
|
54
54
|
)
|
|
55
|
-
from pydantic import TypeAdapter
|
|
55
|
+
from pydantic import BaseModel, Field, TypeAdapter
|
|
56
56
|
from typing_extensions import assert_never
|
|
57
57
|
from websockets.asyncio.client import ClientConnection
|
|
58
58
|
|
|
@@ -83,6 +83,7 @@ from .model_events import (
|
|
|
83
83
|
RealtimeModelErrorEvent,
|
|
84
84
|
RealtimeModelEvent,
|
|
85
85
|
RealtimeModelExceptionEvent,
|
|
86
|
+
RealtimeModelInputAudioTimeoutTriggeredEvent,
|
|
86
87
|
RealtimeModelInputAudioTranscriptionCompletedEvent,
|
|
87
88
|
RealtimeModelItemDeletedEvent,
|
|
88
89
|
RealtimeModelItemUpdatedEvent,
|
|
@@ -128,6 +129,22 @@ async def get_api_key(key: str | Callable[[], MaybeAwaitable[str]] | None) -> st
|
|
|
128
129
|
return os.getenv("OPENAI_API_KEY")
|
|
129
130
|
|
|
130
131
|
|
|
132
|
+
class _InputAudioBufferTimeoutTriggeredEvent(BaseModel):
|
|
133
|
+
type: Literal["input_audio_buffer.timeout_triggered"]
|
|
134
|
+
event_id: str
|
|
135
|
+
audio_start_ms: int
|
|
136
|
+
audio_end_ms: int
|
|
137
|
+
item_id: str
|
|
138
|
+
|
|
139
|
+
AllRealtimeServerEvents = Annotated[
|
|
140
|
+
Union[
|
|
141
|
+
OpenAIRealtimeServerEvent,
|
|
142
|
+
_InputAudioBufferTimeoutTriggeredEvent,
|
|
143
|
+
],
|
|
144
|
+
Field(discriminator="type"),
|
|
145
|
+
]
|
|
146
|
+
|
|
147
|
+
|
|
131
148
|
class OpenAIRealtimeWebSocketModel(RealtimeModel):
|
|
132
149
|
"""A model that uses OpenAI's WebSocket API."""
|
|
133
150
|
|
|
@@ -170,7 +187,10 @@ class OpenAIRealtimeWebSocketModel(RealtimeModel):
|
|
|
170
187
|
"OpenAI-Beta": "realtime=v1",
|
|
171
188
|
}
|
|
172
189
|
self._websocket = await websockets.connect(
|
|
173
|
-
url,
|
|
190
|
+
url,
|
|
191
|
+
user_agent_header=_USER_AGENT,
|
|
192
|
+
additional_headers=headers,
|
|
193
|
+
max_size=None, # Allow any size of message
|
|
174
194
|
)
|
|
175
195
|
self._websocket_task = asyncio.create_task(self._listen_for_messages())
|
|
176
196
|
await self._update_session_config(model_settings)
|
|
@@ -459,8 +479,8 @@ class OpenAIRealtimeWebSocketModel(RealtimeModel):
|
|
|
459
479
|
try:
|
|
460
480
|
if "previous_item_id" in event and event["previous_item_id"] is None:
|
|
461
481
|
event["previous_item_id"] = "" # TODO (rm) remove
|
|
462
|
-
parsed:
|
|
463
|
-
|
|
482
|
+
parsed: AllRealtimeServerEvents = TypeAdapter(
|
|
483
|
+
AllRealtimeServerEvents
|
|
464
484
|
).validate_python(event)
|
|
465
485
|
except pydantic.ValidationError as e:
|
|
466
486
|
logger.error(f"Failed to validate server event: {event}", exc_info=True)
|
|
@@ -551,6 +571,12 @@ class OpenAIRealtimeWebSocketModel(RealtimeModel):
|
|
|
551
571
|
or parsed.type == "response.output_item.done"
|
|
552
572
|
):
|
|
553
573
|
await self._handle_output_item(parsed.item)
|
|
574
|
+
elif parsed.type == "input_audio_buffer.timeout_triggered":
|
|
575
|
+
await self._emit_event(RealtimeModelInputAudioTimeoutTriggeredEvent(
|
|
576
|
+
item_id=parsed.item_id,
|
|
577
|
+
audio_start_ms=parsed.audio_start_ms,
|
|
578
|
+
audio_end_ms=parsed.audio_end_ms,
|
|
579
|
+
))
|
|
554
580
|
|
|
555
581
|
def _update_created_session(self, session: OpenAISessionObject) -> None:
|
|
556
582
|
self._created_session = session
|
agents/realtime/session.py
CHANGED
|
@@ -10,6 +10,7 @@ from typing_extensions import assert_never
|
|
|
10
10
|
from ..agent import Agent
|
|
11
11
|
from ..exceptions import ModelBehaviorError, UserError
|
|
12
12
|
from ..handoffs import Handoff
|
|
13
|
+
from ..logger import logger
|
|
13
14
|
from ..run_context import RunContextWrapper, TContext
|
|
14
15
|
from ..tool import FunctionTool
|
|
15
16
|
from ..tool_context import ToolContext
|
|
@@ -27,13 +28,14 @@ from .events import (
|
|
|
27
28
|
RealtimeHandoffEvent,
|
|
28
29
|
RealtimeHistoryAdded,
|
|
29
30
|
RealtimeHistoryUpdated,
|
|
31
|
+
RealtimeInputAudioTimeoutTriggered,
|
|
30
32
|
RealtimeRawModelEvent,
|
|
31
33
|
RealtimeSessionEvent,
|
|
32
34
|
RealtimeToolEnd,
|
|
33
35
|
RealtimeToolStart,
|
|
34
36
|
)
|
|
35
37
|
from .handoffs import realtime_handoff
|
|
36
|
-
from .items import InputAudio, InputText, RealtimeItem
|
|
38
|
+
from .items import AssistantAudio, InputAudio, InputText, RealtimeItem
|
|
37
39
|
from .model import RealtimeModel, RealtimeModelConfig, RealtimeModelListener
|
|
38
40
|
from .model_events import (
|
|
39
41
|
RealtimeModelEvent,
|
|
@@ -226,6 +228,12 @@ class RealtimeSession(RealtimeModelListener):
|
|
|
226
228
|
await self._put_event(
|
|
227
229
|
RealtimeHistoryUpdated(info=self._event_info, history=self._history)
|
|
228
230
|
)
|
|
231
|
+
elif event.type == "input_audio_timeout_triggered":
|
|
232
|
+
await self._put_event(
|
|
233
|
+
RealtimeInputAudioTimeoutTriggered(
|
|
234
|
+
info=self._event_info,
|
|
235
|
+
)
|
|
236
|
+
)
|
|
229
237
|
elif event.type == "transcript_delta":
|
|
230
238
|
# Accumulate transcript text for guardrail debouncing per item_id
|
|
231
239
|
item_id = event.item_id
|
|
@@ -246,7 +254,58 @@ class RealtimeSession(RealtimeModelListener):
|
|
|
246
254
|
self._enqueue_guardrail_task(self._item_transcripts[item_id], event.response_id)
|
|
247
255
|
elif event.type == "item_updated":
|
|
248
256
|
is_new = not any(item.item_id == event.item.item_id for item in self._history)
|
|
249
|
-
|
|
257
|
+
|
|
258
|
+
# Preserve previously known transcripts when updating existing items.
|
|
259
|
+
# This prevents transcripts from disappearing when an item is later
|
|
260
|
+
# retrieved without transcript fields populated.
|
|
261
|
+
incoming_item = event.item
|
|
262
|
+
existing_item = next(
|
|
263
|
+
(i for i in self._history if i.item_id == incoming_item.item_id), None
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
if (
|
|
267
|
+
existing_item is not None
|
|
268
|
+
and existing_item.type == "message"
|
|
269
|
+
and incoming_item.type == "message"
|
|
270
|
+
):
|
|
271
|
+
try:
|
|
272
|
+
# Merge transcripts for matching content indices
|
|
273
|
+
existing_content = existing_item.content
|
|
274
|
+
new_content = []
|
|
275
|
+
for idx, entry in enumerate(incoming_item.content):
|
|
276
|
+
# Only attempt to preserve for audio-like content
|
|
277
|
+
if entry.type in ("audio", "input_audio"):
|
|
278
|
+
# Use tuple form for Python 3.9 compatibility
|
|
279
|
+
assert isinstance(entry, (InputAudio, AssistantAudio))
|
|
280
|
+
# Determine if transcript is missing/empty on the incoming entry
|
|
281
|
+
entry_transcript = entry.transcript
|
|
282
|
+
if not entry_transcript:
|
|
283
|
+
preserved: str | None = None
|
|
284
|
+
# First prefer any transcript from the existing history item
|
|
285
|
+
if idx < len(existing_content):
|
|
286
|
+
this_content = existing_content[idx]
|
|
287
|
+
if isinstance(this_content, AssistantAudio) or isinstance(
|
|
288
|
+
this_content, InputAudio
|
|
289
|
+
):
|
|
290
|
+
preserved = this_content.transcript
|
|
291
|
+
|
|
292
|
+
# If still missing and this is an assistant item, fall back to
|
|
293
|
+
# accumulated transcript deltas tracked during the turn.
|
|
294
|
+
if not preserved and incoming_item.role == "assistant":
|
|
295
|
+
preserved = self._item_transcripts.get(incoming_item.item_id)
|
|
296
|
+
|
|
297
|
+
if preserved:
|
|
298
|
+
entry = entry.model_copy(update={"transcript": preserved})
|
|
299
|
+
|
|
300
|
+
new_content.append(entry)
|
|
301
|
+
|
|
302
|
+
if new_content:
|
|
303
|
+
incoming_item = incoming_item.model_copy(update={"content": new_content})
|
|
304
|
+
except Exception:
|
|
305
|
+
logger.error("Error merging transcripts", exc_info=True)
|
|
306
|
+
pass
|
|
307
|
+
|
|
308
|
+
self._history = self._get_new_history(self._history, incoming_item)
|
|
250
309
|
if is_new:
|
|
251
310
|
new_item = next(
|
|
252
311
|
item for item in self._history if item.item_id == event.item.item_id
|
agents/repl.py
CHANGED
|
@@ -8,10 +8,13 @@ from .agent import Agent
|
|
|
8
8
|
from .items import TResponseInputItem
|
|
9
9
|
from .result import RunResultBase
|
|
10
10
|
from .run import Runner
|
|
11
|
+
from .run_context import TContext
|
|
11
12
|
from .stream_events import AgentUpdatedStreamEvent, RawResponsesStreamEvent, RunItemStreamEvent
|
|
12
13
|
|
|
13
14
|
|
|
14
|
-
async def run_demo_loop(
|
|
15
|
+
async def run_demo_loop(
|
|
16
|
+
agent: Agent[Any], *, stream: bool = True, context: TContext | None = None
|
|
17
|
+
) -> None:
|
|
15
18
|
"""Run a simple REPL loop with the given agent.
|
|
16
19
|
|
|
17
20
|
This utility allows quick manual testing and debugging of an agent from the
|
|
@@ -21,6 +24,7 @@ async def run_demo_loop(agent: Agent[Any], *, stream: bool = True) -> None:
|
|
|
21
24
|
Args:
|
|
22
25
|
agent: The starting agent to run.
|
|
23
26
|
stream: Whether to stream the agent output.
|
|
27
|
+
context: Additional context information to pass to the runner.
|
|
24
28
|
"""
|
|
25
29
|
|
|
26
30
|
current_agent = agent
|
|
@@ -40,7 +44,7 @@ async def run_demo_loop(agent: Agent[Any], *, stream: bool = True) -> None:
|
|
|
40
44
|
|
|
41
45
|
result: RunResultBase
|
|
42
46
|
if stream:
|
|
43
|
-
result = Runner.run_streamed(current_agent, input=input_items)
|
|
47
|
+
result = Runner.run_streamed(current_agent, input=input_items, context=context)
|
|
44
48
|
async for event in result.stream_events():
|
|
45
49
|
if isinstance(event, RawResponsesStreamEvent):
|
|
46
50
|
if isinstance(event.data, ResponseTextDeltaEvent):
|
|
@@ -54,7 +58,7 @@ async def run_demo_loop(agent: Agent[Any], *, stream: bool = True) -> None:
|
|
|
54
58
|
print(f"\n[Agent updated: {event.new_agent.name}]", flush=True)
|
|
55
59
|
print()
|
|
56
60
|
else:
|
|
57
|
-
result = await Runner.run(current_agent, input_items)
|
|
61
|
+
result = await Runner.run(current_agent, input_items, context=context)
|
|
58
62
|
if result.final_output is not None:
|
|
59
63
|
print(result.final_output)
|
|
60
64
|
|