openai-agents 0.2.11__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of openai-agents might be problematic. Click here for more details.
- agents/_debug.py +15 -4
- agents/_run_impl.py +34 -37
- agents/agent.py +18 -2
- agents/extensions/handoff_filters.py +2 -0
- agents/extensions/memory/__init__.py +42 -15
- agents/extensions/memory/encrypt_session.py +185 -0
- agents/extensions/models/litellm_model.py +62 -10
- agents/function_schema.py +45 -3
- agents/memory/__init__.py +2 -0
- agents/memory/openai_conversations_session.py +0 -3
- agents/memory/util.py +20 -0
- agents/models/chatcmpl_converter.py +74 -15
- agents/models/chatcmpl_helpers.py +6 -0
- agents/models/chatcmpl_stream_handler.py +29 -1
- agents/models/openai_chatcompletions.py +26 -4
- agents/models/openai_responses.py +30 -4
- agents/realtime/__init__.py +2 -0
- agents/realtime/_util.py +1 -1
- agents/realtime/agent.py +7 -0
- agents/realtime/audio_formats.py +29 -0
- agents/realtime/config.py +32 -4
- agents/realtime/items.py +17 -1
- agents/realtime/model_events.py +2 -0
- agents/realtime/model_inputs.py +15 -1
- agents/realtime/openai_realtime.py +421 -130
- agents/realtime/session.py +167 -14
- agents/result.py +47 -20
- agents/run.py +191 -106
- agents/tool.py +1 -1
- agents/tracing/processor_interface.py +84 -11
- agents/tracing/spans.py +88 -0
- agents/tracing/traces.py +99 -16
- agents/util/_json.py +19 -1
- agents/util/_transforms.py +12 -2
- agents/voice/input.py +5 -4
- agents/voice/models/openai_stt.py +15 -8
- {openai_agents-0.2.11.dist-info → openai_agents-0.3.1.dist-info}/METADATA +4 -2
- {openai_agents-0.2.11.dist-info → openai_agents-0.3.1.dist-info}/RECORD +40 -37
- {openai_agents-0.2.11.dist-info → openai_agents-0.3.1.dist-info}/WHEEL +0 -0
- {openai_agents-0.2.11.dist-info → openai_agents-0.3.1.dist-info}/licenses/LICENSE +0 -0
agents/memory/util.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Callable
|
|
4
|
+
|
|
5
|
+
from ..items import TResponseInputItem
|
|
6
|
+
from ..util._types import MaybeAwaitable
|
|
7
|
+
|
|
8
|
+
SessionInputCallback = Callable[
|
|
9
|
+
[list[TResponseInputItem], list[TResponseInputItem]],
|
|
10
|
+
MaybeAwaitable[list[TResponseInputItem]],
|
|
11
|
+
]
|
|
12
|
+
"""A function that combines session history with new input items.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
history_items: The list of items from the session history.
|
|
16
|
+
new_items: The list of new input items for the current turn.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
A list of combined items to be used as input for the agent. Can be sync or async.
|
|
20
|
+
"""
|
|
@@ -39,7 +39,7 @@ from openai.types.responses import (
|
|
|
39
39
|
ResponseReasoningItemParam,
|
|
40
40
|
)
|
|
41
41
|
from openai.types.responses.response_input_param import FunctionCallOutput, ItemReference, Message
|
|
42
|
-
from openai.types.responses.response_reasoning_item import Summary
|
|
42
|
+
from openai.types.responses.response_reasoning_item import Content, Summary
|
|
43
43
|
|
|
44
44
|
from ..agent_output import AgentOutputSchemaBase
|
|
45
45
|
from ..exceptions import AgentsException, UserError
|
|
@@ -93,16 +93,41 @@ class Converter:
|
|
|
93
93
|
def message_to_output_items(cls, message: ChatCompletionMessage) -> list[TResponseOutputItem]:
|
|
94
94
|
items: list[TResponseOutputItem] = []
|
|
95
95
|
|
|
96
|
-
#
|
|
96
|
+
# Check if message is agents.extentions.models.litellm_model.InternalChatCompletionMessage
|
|
97
|
+
# We can't actually import it here because litellm is an optional dependency
|
|
98
|
+
# So we use hasattr to check for reasoning_content and thinking_blocks
|
|
97
99
|
if hasattr(message, "reasoning_content") and message.reasoning_content:
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
type="reasoning",
|
|
103
|
-
)
|
|
100
|
+
reasoning_item = ResponseReasoningItem(
|
|
101
|
+
id=FAKE_RESPONSES_ID,
|
|
102
|
+
summary=[Summary(text=message.reasoning_content, type="summary_text")],
|
|
103
|
+
type="reasoning",
|
|
104
104
|
)
|
|
105
105
|
|
|
106
|
+
# Store thinking blocks for Anthropic compatibility
|
|
107
|
+
if hasattr(message, "thinking_blocks") and message.thinking_blocks:
|
|
108
|
+
# Store thinking text in content and signature in encrypted_content
|
|
109
|
+
reasoning_item.content = []
|
|
110
|
+
signature = None
|
|
111
|
+
for block in message.thinking_blocks:
|
|
112
|
+
if isinstance(block, dict):
|
|
113
|
+
thinking_text = block.get("thinking", "")
|
|
114
|
+
if thinking_text:
|
|
115
|
+
reasoning_item.content.append(
|
|
116
|
+
Content(text=thinking_text, type="reasoning_text")
|
|
117
|
+
)
|
|
118
|
+
# Store the signature if present
|
|
119
|
+
if block.get("signature"):
|
|
120
|
+
signature = block.get("signature")
|
|
121
|
+
|
|
122
|
+
# Store only the last signature in encrypted_content
|
|
123
|
+
# If there are multiple thinking blocks, this should be a problem.
|
|
124
|
+
# In practice, there should only be one signature for the entire reasoning step.
|
|
125
|
+
# Tested with: claude-sonnet-4-20250514
|
|
126
|
+
if signature:
|
|
127
|
+
reasoning_item.encrypted_content = signature
|
|
128
|
+
|
|
129
|
+
items.append(reasoning_item)
|
|
130
|
+
|
|
106
131
|
message_item = ResponseOutputMessage(
|
|
107
132
|
id=FAKE_RESPONSES_ID,
|
|
108
133
|
content=[],
|
|
@@ -272,9 +297,7 @@ class Converter:
|
|
|
272
297
|
f"Only file_data is supported for input_file {casted_file_param}"
|
|
273
298
|
)
|
|
274
299
|
if "filename" not in casted_file_param or not casted_file_param["filename"]:
|
|
275
|
-
raise UserError(
|
|
276
|
-
f"filename must be provided for input_file {casted_file_param}"
|
|
277
|
-
)
|
|
300
|
+
raise UserError(f"filename must be provided for input_file {casted_file_param}")
|
|
278
301
|
out.append(
|
|
279
302
|
File(
|
|
280
303
|
type="file",
|
|
@@ -292,10 +315,18 @@ class Converter:
|
|
|
292
315
|
def items_to_messages(
|
|
293
316
|
cls,
|
|
294
317
|
items: str | Iterable[TResponseInputItem],
|
|
318
|
+
preserve_thinking_blocks: bool = False,
|
|
295
319
|
) -> list[ChatCompletionMessageParam]:
|
|
296
320
|
"""
|
|
297
321
|
Convert a sequence of 'Item' objects into a list of ChatCompletionMessageParam.
|
|
298
322
|
|
|
323
|
+
Args:
|
|
324
|
+
items: A string or iterable of response input items to convert
|
|
325
|
+
preserve_thinking_blocks: Whether to preserve thinking blocks in tool calls
|
|
326
|
+
for reasoning models like Claude 4 Sonnet/Opus which support interleaved
|
|
327
|
+
thinking. When True, thinking blocks are reconstructed and included in
|
|
328
|
+
assistant messages with tool calls.
|
|
329
|
+
|
|
299
330
|
Rules:
|
|
300
331
|
- EasyInputMessage or InputMessage (role=user) => ChatCompletionUserMessageParam
|
|
301
332
|
- EasyInputMessage or InputMessage (role=system) => ChatCompletionSystemMessageParam
|
|
@@ -316,6 +347,7 @@ class Converter:
|
|
|
316
347
|
|
|
317
348
|
result: list[ChatCompletionMessageParam] = []
|
|
318
349
|
current_assistant_msg: ChatCompletionAssistantMessageParam | None = None
|
|
350
|
+
pending_thinking_blocks: list[dict[str, str]] | None = None
|
|
319
351
|
|
|
320
352
|
def flush_assistant_message() -> None:
|
|
321
353
|
nonlocal current_assistant_msg
|
|
@@ -327,10 +359,11 @@ class Converter:
|
|
|
327
359
|
current_assistant_msg = None
|
|
328
360
|
|
|
329
361
|
def ensure_assistant_message() -> ChatCompletionAssistantMessageParam:
|
|
330
|
-
nonlocal current_assistant_msg
|
|
362
|
+
nonlocal current_assistant_msg, pending_thinking_blocks
|
|
331
363
|
if current_assistant_msg is None:
|
|
332
364
|
current_assistant_msg = ChatCompletionAssistantMessageParam(role="assistant")
|
|
333
365
|
current_assistant_msg["tool_calls"] = []
|
|
366
|
+
|
|
334
367
|
return current_assistant_msg
|
|
335
368
|
|
|
336
369
|
for item in items:
|
|
@@ -446,6 +479,13 @@ class Converter:
|
|
|
446
479
|
|
|
447
480
|
elif func_call := cls.maybe_function_tool_call(item):
|
|
448
481
|
asst = ensure_assistant_message()
|
|
482
|
+
|
|
483
|
+
# If we have pending thinking blocks, use them as the content
|
|
484
|
+
# This is required for Anthropic API tool calls with interleaved thinking
|
|
485
|
+
if pending_thinking_blocks:
|
|
486
|
+
asst["content"] = pending_thinking_blocks # type: ignore
|
|
487
|
+
pending_thinking_blocks = None # Clear after using
|
|
488
|
+
|
|
449
489
|
tool_calls = list(asst.get("tool_calls", []))
|
|
450
490
|
arguments = func_call["arguments"] if func_call["arguments"] else "{}"
|
|
451
491
|
new_tool_call = ChatCompletionMessageFunctionToolCallParam(
|
|
@@ -474,9 +514,28 @@ class Converter:
|
|
|
474
514
|
f"Encountered an item_reference, which is not supported: {item_ref}"
|
|
475
515
|
)
|
|
476
516
|
|
|
477
|
-
# 7) reasoning message =>
|
|
478
|
-
elif cls.maybe_reasoning_message(item):
|
|
479
|
-
|
|
517
|
+
# 7) reasoning message => extract thinking blocks if present
|
|
518
|
+
elif reasoning_item := cls.maybe_reasoning_message(item):
|
|
519
|
+
# Reconstruct thinking blocks from content (text) and encrypted_content (signature)
|
|
520
|
+
content_items = reasoning_item.get("content", [])
|
|
521
|
+
signature = reasoning_item.get("encrypted_content")
|
|
522
|
+
|
|
523
|
+
if content_items and preserve_thinking_blocks:
|
|
524
|
+
# Reconstruct thinking blocks from content and signature
|
|
525
|
+
pending_thinking_blocks = []
|
|
526
|
+
for content_item in content_items:
|
|
527
|
+
if (
|
|
528
|
+
isinstance(content_item, dict)
|
|
529
|
+
and content_item.get("type") == "reasoning_text"
|
|
530
|
+
):
|
|
531
|
+
thinking_block = {
|
|
532
|
+
"type": "thinking",
|
|
533
|
+
"thinking": content_item.get("text", ""),
|
|
534
|
+
}
|
|
535
|
+
# Add signature if available
|
|
536
|
+
if signature:
|
|
537
|
+
thinking_block["signature"] = signature
|
|
538
|
+
pending_thinking_blocks.append(thinking_block)
|
|
480
539
|
|
|
481
540
|
# 8) If we haven't recognized it => fail or ignore
|
|
482
541
|
else:
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
from contextvars import ContextVar
|
|
4
|
+
|
|
3
5
|
from openai import AsyncOpenAI
|
|
4
6
|
|
|
5
7
|
from ..model_settings import ModelSettings
|
|
@@ -8,6 +10,10 @@ from ..version import __version__
|
|
|
8
10
|
_USER_AGENT = f"Agents/Python {__version__}"
|
|
9
11
|
HEADERS = {"User-Agent": _USER_AGENT}
|
|
10
12
|
|
|
13
|
+
USER_AGENT_OVERRIDE: ContextVar[str | None] = ContextVar(
|
|
14
|
+
"openai_chatcompletions_user_agent_override", default=None
|
|
15
|
+
)
|
|
16
|
+
|
|
11
17
|
|
|
12
18
|
class ChatCmplHelpers:
|
|
13
19
|
@classmethod
|
|
@@ -62,6 +62,9 @@ class StreamingState:
|
|
|
62
62
|
# Fields for real-time function call streaming
|
|
63
63
|
function_call_streaming: dict[int, bool] = field(default_factory=dict)
|
|
64
64
|
function_call_output_idx: dict[int, int] = field(default_factory=dict)
|
|
65
|
+
# Store accumulated thinking text and signature for Anthropic compatibility
|
|
66
|
+
thinking_text: str = ""
|
|
67
|
+
thinking_signature: str | None = None
|
|
65
68
|
|
|
66
69
|
|
|
67
70
|
class SequenceNumber:
|
|
@@ -101,6 +104,19 @@ class ChatCmplStreamHandler:
|
|
|
101
104
|
|
|
102
105
|
delta = chunk.choices[0].delta
|
|
103
106
|
|
|
107
|
+
# Handle thinking blocks from Anthropic (for preserving signatures)
|
|
108
|
+
if hasattr(delta, "thinking_blocks") and delta.thinking_blocks:
|
|
109
|
+
for block in delta.thinking_blocks:
|
|
110
|
+
if isinstance(block, dict):
|
|
111
|
+
# Accumulate thinking text
|
|
112
|
+
thinking_text = block.get("thinking", "")
|
|
113
|
+
if thinking_text:
|
|
114
|
+
state.thinking_text += thinking_text
|
|
115
|
+
# Store signature if present
|
|
116
|
+
signature = block.get("signature")
|
|
117
|
+
if signature:
|
|
118
|
+
state.thinking_signature = signature
|
|
119
|
+
|
|
104
120
|
# Handle reasoning content for reasoning summaries
|
|
105
121
|
if hasattr(delta, "reasoning_content"):
|
|
106
122
|
reasoning_content = delta.reasoning_content
|
|
@@ -527,7 +543,19 @@ class ChatCmplStreamHandler:
|
|
|
527
543
|
|
|
528
544
|
# include Reasoning item if it exists
|
|
529
545
|
if state.reasoning_content_index_and_output:
|
|
530
|
-
|
|
546
|
+
reasoning_item = state.reasoning_content_index_and_output[1]
|
|
547
|
+
# Store thinking text in content and signature in encrypted_content
|
|
548
|
+
if state.thinking_text:
|
|
549
|
+
# Add thinking text as a Content object
|
|
550
|
+
if not reasoning_item.content:
|
|
551
|
+
reasoning_item.content = []
|
|
552
|
+
reasoning_item.content.append(
|
|
553
|
+
Content(text=state.thinking_text, type="reasoning_text")
|
|
554
|
+
)
|
|
555
|
+
# Store signature in encrypted_content
|
|
556
|
+
if state.thinking_signature:
|
|
557
|
+
reasoning_item.encrypted_content = state.thinking_signature
|
|
558
|
+
outputs.append(reasoning_item)
|
|
531
559
|
|
|
532
560
|
# include text or refusal content if they exist
|
|
533
561
|
if state.text_content_index_and_output or state.refusal_content_index_and_output:
|
|
@@ -23,8 +23,9 @@ from ..tracing import generation_span
|
|
|
23
23
|
from ..tracing.span_data import GenerationSpanData
|
|
24
24
|
from ..tracing.spans import Span
|
|
25
25
|
from ..usage import Usage
|
|
26
|
+
from ..util._json import _to_dump_compatible
|
|
26
27
|
from .chatcmpl_converter import Converter
|
|
27
|
-
from .chatcmpl_helpers import HEADERS, ChatCmplHelpers
|
|
28
|
+
from .chatcmpl_helpers import HEADERS, USER_AGENT_OVERRIDE, ChatCmplHelpers
|
|
28
29
|
from .chatcmpl_stream_handler import ChatCmplStreamHandler
|
|
29
30
|
from .fake_id import FAKE_RESPONSES_ID
|
|
30
31
|
from .interface import Model, ModelTracing
|
|
@@ -237,6 +238,8 @@ class OpenAIChatCompletionsModel(Model):
|
|
|
237
238
|
"role": "system",
|
|
238
239
|
},
|
|
239
240
|
)
|
|
241
|
+
converted_messages = _to_dump_compatible(converted_messages)
|
|
242
|
+
|
|
240
243
|
if tracing.include_data():
|
|
241
244
|
span.span_data.input = converted_messages
|
|
242
245
|
|
|
@@ -255,12 +258,24 @@ class OpenAIChatCompletionsModel(Model):
|
|
|
255
258
|
for handoff in handoffs:
|
|
256
259
|
converted_tools.append(Converter.convert_handoff_tool(handoff))
|
|
257
260
|
|
|
261
|
+
converted_tools = _to_dump_compatible(converted_tools)
|
|
262
|
+
|
|
258
263
|
if _debug.DONT_LOG_MODEL_DATA:
|
|
259
264
|
logger.debug("Calling LLM")
|
|
260
265
|
else:
|
|
266
|
+
messages_json = json.dumps(
|
|
267
|
+
converted_messages,
|
|
268
|
+
indent=2,
|
|
269
|
+
ensure_ascii=False,
|
|
270
|
+
)
|
|
271
|
+
tools_json = json.dumps(
|
|
272
|
+
converted_tools,
|
|
273
|
+
indent=2,
|
|
274
|
+
ensure_ascii=False,
|
|
275
|
+
)
|
|
261
276
|
logger.debug(
|
|
262
|
-
f"{
|
|
263
|
-
f"Tools:\n{
|
|
277
|
+
f"{messages_json}\n"
|
|
278
|
+
f"Tools:\n{tools_json}\n"
|
|
264
279
|
f"Stream: {stream}\n"
|
|
265
280
|
f"Tool choice: {tool_choice}\n"
|
|
266
281
|
f"Response format: {response_format}\n"
|
|
@@ -291,7 +306,7 @@ class OpenAIChatCompletionsModel(Model):
|
|
|
291
306
|
reasoning_effort=self._non_null_or_not_given(reasoning_effort),
|
|
292
307
|
verbosity=self._non_null_or_not_given(model_settings.verbosity),
|
|
293
308
|
top_logprobs=self._non_null_or_not_given(model_settings.top_logprobs),
|
|
294
|
-
extra_headers=
|
|
309
|
+
extra_headers=self._merge_headers(model_settings),
|
|
295
310
|
extra_query=model_settings.extra_query,
|
|
296
311
|
extra_body=model_settings.extra_body,
|
|
297
312
|
metadata=self._non_null_or_not_given(model_settings.metadata),
|
|
@@ -334,3 +349,10 @@ class OpenAIChatCompletionsModel(Model):
|
|
|
334
349
|
if self._client is None:
|
|
335
350
|
self._client = AsyncOpenAI()
|
|
336
351
|
return self._client
|
|
352
|
+
|
|
353
|
+
def _merge_headers(self, model_settings: ModelSettings):
|
|
354
|
+
merged = {**HEADERS, **(model_settings.extra_headers or {})}
|
|
355
|
+
ua_ctx = USER_AGENT_OVERRIDE.get()
|
|
356
|
+
if ua_ctx is not None:
|
|
357
|
+
merged["User-Agent"] = ua_ctx
|
|
358
|
+
return merged
|
|
@@ -2,6 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import json
|
|
4
4
|
from collections.abc import AsyncIterator
|
|
5
|
+
from contextvars import ContextVar
|
|
5
6
|
from dataclasses import dataclass
|
|
6
7
|
from typing import TYPE_CHECKING, Any, Literal, cast, overload
|
|
7
8
|
|
|
@@ -38,6 +39,7 @@ from ..tool import (
|
|
|
38
39
|
)
|
|
39
40
|
from ..tracing import SpanError, response_span
|
|
40
41
|
from ..usage import Usage
|
|
42
|
+
from ..util._json import _to_dump_compatible
|
|
41
43
|
from ..version import __version__
|
|
42
44
|
from .interface import Model, ModelTracing
|
|
43
45
|
|
|
@@ -48,6 +50,11 @@ if TYPE_CHECKING:
|
|
|
48
50
|
_USER_AGENT = f"Agents/Python {__version__}"
|
|
49
51
|
_HEADERS = {"User-Agent": _USER_AGENT}
|
|
50
52
|
|
|
53
|
+
# Override for the User-Agent header used by the Responses API.
|
|
54
|
+
_USER_AGENT_OVERRIDE: ContextVar[str | None] = ContextVar(
|
|
55
|
+
"openai_responses_user_agent_override", default=None
|
|
56
|
+
)
|
|
57
|
+
|
|
51
58
|
|
|
52
59
|
class OpenAIResponsesModel(Model):
|
|
53
60
|
"""
|
|
@@ -240,6 +247,7 @@ class OpenAIResponsesModel(Model):
|
|
|
240
247
|
prompt: ResponsePromptParam | None = None,
|
|
241
248
|
) -> Response | AsyncStream[ResponseStreamEvent]:
|
|
242
249
|
list_input = ItemHelpers.input_to_new_input_list(input)
|
|
250
|
+
list_input = _to_dump_compatible(list_input)
|
|
243
251
|
|
|
244
252
|
parallel_tool_calls = (
|
|
245
253
|
True
|
|
@@ -251,6 +259,7 @@ class OpenAIResponsesModel(Model):
|
|
|
251
259
|
|
|
252
260
|
tool_choice = Converter.convert_tool_choice(model_settings.tool_choice)
|
|
253
261
|
converted_tools = Converter.convert_tools(tools, handoffs)
|
|
262
|
+
converted_tools_payload = _to_dump_compatible(converted_tools.tools)
|
|
254
263
|
response_format = Converter.get_response_format(output_schema)
|
|
255
264
|
|
|
256
265
|
include_set: set[str] = set(converted_tools.includes)
|
|
@@ -263,10 +272,20 @@ class OpenAIResponsesModel(Model):
|
|
|
263
272
|
if _debug.DONT_LOG_MODEL_DATA:
|
|
264
273
|
logger.debug("Calling LLM")
|
|
265
274
|
else:
|
|
275
|
+
input_json = json.dumps(
|
|
276
|
+
list_input,
|
|
277
|
+
indent=2,
|
|
278
|
+
ensure_ascii=False,
|
|
279
|
+
)
|
|
280
|
+
tools_json = json.dumps(
|
|
281
|
+
converted_tools_payload,
|
|
282
|
+
indent=2,
|
|
283
|
+
ensure_ascii=False,
|
|
284
|
+
)
|
|
266
285
|
logger.debug(
|
|
267
286
|
f"Calling LLM {self.model} with input:\n"
|
|
268
|
-
f"{
|
|
269
|
-
f"Tools:\n{
|
|
287
|
+
f"{input_json}\n"
|
|
288
|
+
f"Tools:\n{tools_json}\n"
|
|
270
289
|
f"Stream: {stream}\n"
|
|
271
290
|
f"Tool choice: {tool_choice}\n"
|
|
272
291
|
f"Response format: {response_format}\n"
|
|
@@ -290,7 +309,7 @@ class OpenAIResponsesModel(Model):
|
|
|
290
309
|
model=self.model,
|
|
291
310
|
input=list_input,
|
|
292
311
|
include=include,
|
|
293
|
-
tools=
|
|
312
|
+
tools=converted_tools_payload,
|
|
294
313
|
prompt=self._non_null_or_not_given(prompt),
|
|
295
314
|
temperature=self._non_null_or_not_given(model_settings.temperature),
|
|
296
315
|
top_p=self._non_null_or_not_given(model_settings.top_p),
|
|
@@ -299,7 +318,7 @@ class OpenAIResponsesModel(Model):
|
|
|
299
318
|
tool_choice=tool_choice,
|
|
300
319
|
parallel_tool_calls=parallel_tool_calls,
|
|
301
320
|
stream=stream,
|
|
302
|
-
extra_headers=
|
|
321
|
+
extra_headers=self._merge_headers(model_settings),
|
|
303
322
|
extra_query=model_settings.extra_query,
|
|
304
323
|
extra_body=model_settings.extra_body,
|
|
305
324
|
text=response_format,
|
|
@@ -314,6 +333,13 @@ class OpenAIResponsesModel(Model):
|
|
|
314
333
|
self._client = AsyncOpenAI()
|
|
315
334
|
return self._client
|
|
316
335
|
|
|
336
|
+
def _merge_headers(self, model_settings: ModelSettings):
|
|
337
|
+
merged = {**_HEADERS, **(model_settings.extra_headers or {})}
|
|
338
|
+
ua_ctx = _USER_AGENT_OVERRIDE.get()
|
|
339
|
+
if ua_ctx is not None:
|
|
340
|
+
merged["User-Agent"] = ua_ctx
|
|
341
|
+
return merged
|
|
342
|
+
|
|
317
343
|
|
|
318
344
|
@dataclass
|
|
319
345
|
class ConvertedTools:
|
agents/realtime/__init__.py
CHANGED
|
@@ -3,6 +3,7 @@ from .config import (
|
|
|
3
3
|
RealtimeAudioFormat,
|
|
4
4
|
RealtimeClientMessage,
|
|
5
5
|
RealtimeGuardrailsSettings,
|
|
6
|
+
RealtimeInputAudioNoiseReductionConfig,
|
|
6
7
|
RealtimeInputAudioTranscriptionConfig,
|
|
7
8
|
RealtimeModelName,
|
|
8
9
|
RealtimeModelTracingConfig,
|
|
@@ -101,6 +102,7 @@ __all__ = [
|
|
|
101
102
|
"RealtimeAudioFormat",
|
|
102
103
|
"RealtimeClientMessage",
|
|
103
104
|
"RealtimeGuardrailsSettings",
|
|
105
|
+
"RealtimeInputAudioNoiseReductionConfig",
|
|
104
106
|
"RealtimeInputAudioTranscriptionConfig",
|
|
105
107
|
"RealtimeModelName",
|
|
106
108
|
"RealtimeModelTracingConfig",
|
agents/realtime/_util.py
CHANGED
|
@@ -4,6 +4,6 @@ from .config import RealtimeAudioFormat
|
|
|
4
4
|
|
|
5
5
|
|
|
6
6
|
def calculate_audio_length_ms(format: RealtimeAudioFormat | None, audio_bytes: bytes) -> float:
|
|
7
|
-
if format and format.startswith("g711"):
|
|
7
|
+
if format and isinstance(format, str) and format.startswith("g711"):
|
|
8
8
|
return (len(audio_bytes) / 8000) * 1000
|
|
9
9
|
return (len(audio_bytes) / 24 / 2) * 1000
|
agents/realtime/agent.py
CHANGED
|
@@ -6,6 +6,8 @@ from collections.abc import Awaitable
|
|
|
6
6
|
from dataclasses import dataclass, field
|
|
7
7
|
from typing import Any, Callable, Generic, cast
|
|
8
8
|
|
|
9
|
+
from agents.prompts import Prompt
|
|
10
|
+
|
|
9
11
|
from ..agent import AgentBase
|
|
10
12
|
from ..guardrail import OutputGuardrail
|
|
11
13
|
from ..handoffs import Handoff
|
|
@@ -55,6 +57,11 @@ class RealtimeAgent(AgentBase, Generic[TContext]):
|
|
|
55
57
|
return a string.
|
|
56
58
|
"""
|
|
57
59
|
|
|
60
|
+
prompt: Prompt | None = None
|
|
61
|
+
"""A prompt object. Prompts allow you to dynamically configure the instructions, tools
|
|
62
|
+
and other config for an agent outside of your code. Only usable with OpenAI models.
|
|
63
|
+
"""
|
|
64
|
+
|
|
58
65
|
handoffs: list[RealtimeAgent[Any] | Handoff[TContext, RealtimeAgent[Any]]] = field(
|
|
59
66
|
default_factory=list
|
|
60
67
|
)
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from openai.types.realtime.realtime_audio_formats import (
|
|
4
|
+
AudioPCM,
|
|
5
|
+
AudioPCMA,
|
|
6
|
+
AudioPCMU,
|
|
7
|
+
RealtimeAudioFormats,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
from ..logger import logger
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def to_realtime_audio_format(
|
|
14
|
+
input_audio_format: str | RealtimeAudioFormats | None,
|
|
15
|
+
) -> RealtimeAudioFormats | None:
|
|
16
|
+
format: RealtimeAudioFormats | None = None
|
|
17
|
+
if input_audio_format is not None:
|
|
18
|
+
if isinstance(input_audio_format, str):
|
|
19
|
+
if input_audio_format in ["pcm16", "audio/pcm", "pcm"]:
|
|
20
|
+
format = AudioPCM(type="audio/pcm", rate=24000)
|
|
21
|
+
elif input_audio_format in ["g711_ulaw", "audio/pcmu", "pcmu"]:
|
|
22
|
+
format = AudioPCMU(type="audio/pcmu")
|
|
23
|
+
elif input_audio_format in ["g711_alaw", "audio/pcma", "pcma"]:
|
|
24
|
+
format = AudioPCMA(type="audio/pcma")
|
|
25
|
+
else:
|
|
26
|
+
logger.debug(f"Unknown input_audio_format: {input_audio_format}")
|
|
27
|
+
else:
|
|
28
|
+
format = input_audio_format
|
|
29
|
+
return format
|
agents/realtime/config.py
CHANGED
|
@@ -6,8 +6,13 @@ from typing import (
|
|
|
6
6
|
Union,
|
|
7
7
|
)
|
|
8
8
|
|
|
9
|
+
from openai.types.realtime.realtime_audio_formats import (
|
|
10
|
+
RealtimeAudioFormats as OpenAIRealtimeAudioFormats,
|
|
11
|
+
)
|
|
9
12
|
from typing_extensions import NotRequired, TypeAlias, TypedDict
|
|
10
13
|
|
|
14
|
+
from agents.prompts import Prompt
|
|
15
|
+
|
|
11
16
|
from ..guardrail import OutputGuardrail
|
|
12
17
|
from ..handoffs import Handoff
|
|
13
18
|
from ..model_settings import ToolChoice
|
|
@@ -15,6 +20,8 @@ from ..tool import Tool
|
|
|
15
20
|
|
|
16
21
|
RealtimeModelName: TypeAlias = Union[
|
|
17
22
|
Literal[
|
|
23
|
+
"gpt-realtime",
|
|
24
|
+
"gpt-realtime-2025-08-28",
|
|
18
25
|
"gpt-4o-realtime-preview",
|
|
19
26
|
"gpt-4o-mini-realtime-preview",
|
|
20
27
|
"gpt-4o-realtime-preview-2025-06-03",
|
|
@@ -54,6 +61,13 @@ class RealtimeInputAudioTranscriptionConfig(TypedDict):
|
|
|
54
61
|
"""An optional prompt to guide transcription."""
|
|
55
62
|
|
|
56
63
|
|
|
64
|
+
class RealtimeInputAudioNoiseReductionConfig(TypedDict):
|
|
65
|
+
"""Noise reduction configuration for input audio."""
|
|
66
|
+
|
|
67
|
+
type: NotRequired[Literal["near_field", "far_field"]]
|
|
68
|
+
"""Noise reduction mode to apply to input audio."""
|
|
69
|
+
|
|
70
|
+
|
|
57
71
|
class RealtimeTurnDetectionConfig(TypedDict):
|
|
58
72
|
"""Turn detection config. Allows extra vendor keys if needed."""
|
|
59
73
|
|
|
@@ -91,6 +105,9 @@ class RealtimeSessionModelSettings(TypedDict):
|
|
|
91
105
|
instructions: NotRequired[str]
|
|
92
106
|
"""System instructions for the model."""
|
|
93
107
|
|
|
108
|
+
prompt: NotRequired[Prompt]
|
|
109
|
+
"""The prompt to use for the model."""
|
|
110
|
+
|
|
94
111
|
modalities: NotRequired[list[Literal["text", "audio"]]]
|
|
95
112
|
"""The modalities the model should support."""
|
|
96
113
|
|
|
@@ -100,15 +117,18 @@ class RealtimeSessionModelSettings(TypedDict):
|
|
|
100
117
|
speed: NotRequired[float]
|
|
101
118
|
"""The speed of the model's responses."""
|
|
102
119
|
|
|
103
|
-
input_audio_format: NotRequired[RealtimeAudioFormat]
|
|
120
|
+
input_audio_format: NotRequired[RealtimeAudioFormat | OpenAIRealtimeAudioFormats]
|
|
104
121
|
"""The format for input audio streams."""
|
|
105
122
|
|
|
106
|
-
output_audio_format: NotRequired[RealtimeAudioFormat]
|
|
123
|
+
output_audio_format: NotRequired[RealtimeAudioFormat | OpenAIRealtimeAudioFormats]
|
|
107
124
|
"""The format for output audio streams."""
|
|
108
125
|
|
|
109
126
|
input_audio_transcription: NotRequired[RealtimeInputAudioTranscriptionConfig]
|
|
110
127
|
"""Configuration for transcribing input audio."""
|
|
111
128
|
|
|
129
|
+
input_audio_noise_reduction: NotRequired[RealtimeInputAudioNoiseReductionConfig | None]
|
|
130
|
+
"""Noise reduction configuration for input audio."""
|
|
131
|
+
|
|
112
132
|
turn_detection: NotRequired[RealtimeTurnDetectionConfig]
|
|
113
133
|
"""Configuration for detecting conversation turns."""
|
|
114
134
|
|
|
@@ -177,6 +197,14 @@ class RealtimeUserInputText(TypedDict):
|
|
|
177
197
|
"""The text content from the user."""
|
|
178
198
|
|
|
179
199
|
|
|
200
|
+
class RealtimeUserInputImage(TypedDict, total=False):
|
|
201
|
+
"""An image input from the user (Realtime)."""
|
|
202
|
+
|
|
203
|
+
type: Literal["input_image"]
|
|
204
|
+
image_url: str
|
|
205
|
+
detail: NotRequired[Literal["auto", "low", "high"] | str]
|
|
206
|
+
|
|
207
|
+
|
|
180
208
|
class RealtimeUserInputMessage(TypedDict):
|
|
181
209
|
"""A message input from the user."""
|
|
182
210
|
|
|
@@ -186,8 +214,8 @@ class RealtimeUserInputMessage(TypedDict):
|
|
|
186
214
|
role: Literal["user"]
|
|
187
215
|
"""The role identifier for user messages."""
|
|
188
216
|
|
|
189
|
-
content: list[RealtimeUserInputText]
|
|
190
|
-
"""List of
|
|
217
|
+
content: list[RealtimeUserInputText | RealtimeUserInputImage]
|
|
218
|
+
"""List of content items (text and image) in the message."""
|
|
191
219
|
|
|
192
220
|
|
|
193
221
|
RealtimeUserInput: TypeAlias = Union[str, RealtimeUserInputMessage]
|
agents/realtime/items.py
CHANGED
|
@@ -34,6 +34,22 @@ class InputAudio(BaseModel):
|
|
|
34
34
|
model_config = ConfigDict(extra="allow")
|
|
35
35
|
|
|
36
36
|
|
|
37
|
+
class InputImage(BaseModel):
|
|
38
|
+
"""Image input content for realtime messages."""
|
|
39
|
+
|
|
40
|
+
type: Literal["input_image"] = "input_image"
|
|
41
|
+
"""The type identifier for image input."""
|
|
42
|
+
|
|
43
|
+
image_url: str | None = None
|
|
44
|
+
"""Data/remote URL string (data:... or https:...)."""
|
|
45
|
+
|
|
46
|
+
detail: str | None = None
|
|
47
|
+
"""Optional detail hint (e.g., 'auto', 'high', 'low')."""
|
|
48
|
+
|
|
49
|
+
# Allow extra data (e.g., `detail`)
|
|
50
|
+
model_config = ConfigDict(extra="allow")
|
|
51
|
+
|
|
52
|
+
|
|
37
53
|
class AssistantText(BaseModel):
|
|
38
54
|
"""Text content from the assistant in realtime responses."""
|
|
39
55
|
|
|
@@ -100,7 +116,7 @@ class UserMessageItem(BaseModel):
|
|
|
100
116
|
role: Literal["user"] = "user"
|
|
101
117
|
"""The role identifier for user messages."""
|
|
102
118
|
|
|
103
|
-
content: list[Annotated[InputText | InputAudio, Field(discriminator="type")]]
|
|
119
|
+
content: list[Annotated[InputText | InputAudio | InputImage, Field(discriminator="type")]]
|
|
104
120
|
"""List of content items, can be text or audio."""
|
|
105
121
|
|
|
106
122
|
# Allow extra data
|
agents/realtime/model_events.py
CHANGED
|
@@ -84,6 +84,7 @@ class RealtimeModelInputAudioTranscriptionCompletedEvent:
|
|
|
84
84
|
|
|
85
85
|
type: Literal["input_audio_transcription_completed"] = "input_audio_transcription_completed"
|
|
86
86
|
|
|
87
|
+
|
|
87
88
|
@dataclass
|
|
88
89
|
class RealtimeModelInputAudioTimeoutTriggeredEvent:
|
|
89
90
|
"""Input audio timeout triggered."""
|
|
@@ -94,6 +95,7 @@ class RealtimeModelInputAudioTimeoutTriggeredEvent:
|
|
|
94
95
|
|
|
95
96
|
type: Literal["input_audio_timeout_triggered"] = "input_audio_timeout_triggered"
|
|
96
97
|
|
|
98
|
+
|
|
97
99
|
@dataclass
|
|
98
100
|
class RealtimeModelTranscriptDeltaEvent:
|
|
99
101
|
"""Partial transcript update."""
|
agents/realtime/model_inputs.py
CHANGED
|
@@ -24,12 +24,26 @@ class RealtimeModelInputTextContent(TypedDict):
|
|
|
24
24
|
text: str
|
|
25
25
|
|
|
26
26
|
|
|
27
|
+
class RealtimeModelInputImageContent(TypedDict, total=False):
|
|
28
|
+
"""An image to be sent to the model.
|
|
29
|
+
|
|
30
|
+
The Realtime API expects `image_url` to be a string data/remote URL.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
type: Literal["input_image"]
|
|
34
|
+
image_url: str
|
|
35
|
+
"""String URL (data:... or https:...)."""
|
|
36
|
+
|
|
37
|
+
detail: NotRequired[str]
|
|
38
|
+
"""Optional detail hint such as 'high', 'low', or 'auto'."""
|
|
39
|
+
|
|
40
|
+
|
|
27
41
|
class RealtimeModelUserInputMessage(TypedDict):
|
|
28
42
|
"""A message to be sent to the model."""
|
|
29
43
|
|
|
30
44
|
type: Literal["message"]
|
|
31
45
|
role: Literal["user"]
|
|
32
|
-
content: list[RealtimeModelInputTextContent]
|
|
46
|
+
content: list[RealtimeModelInputTextContent | RealtimeModelInputImageContent]
|
|
33
47
|
|
|
34
48
|
|
|
35
49
|
RealtimeModelUserInput: TypeAlias = Union[str, RealtimeModelUserInputMessage]
|