openai-agents 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of openai-agents might be problematic. Click here for more details.
- agents/__init__.py +5 -1
- agents/_run_impl.py +5 -1
- agents/agent.py +62 -30
- agents/agent_output.py +2 -2
- agents/function_schema.py +11 -1
- agents/guardrail.py +5 -1
- agents/handoffs.py +32 -14
- agents/lifecycle.py +26 -17
- agents/mcp/server.py +82 -11
- agents/mcp/util.py +16 -9
- agents/memory/__init__.py +3 -0
- agents/memory/session.py +369 -0
- agents/model_settings.py +15 -7
- agents/models/chatcmpl_converter.py +20 -3
- agents/models/chatcmpl_stream_handler.py +134 -43
- agents/models/openai_responses.py +12 -5
- agents/realtime/README.md +3 -0
- agents/realtime/__init__.py +177 -0
- agents/realtime/agent.py +89 -0
- agents/realtime/config.py +188 -0
- agents/realtime/events.py +216 -0
- agents/realtime/handoffs.py +165 -0
- agents/realtime/items.py +184 -0
- agents/realtime/model.py +69 -0
- agents/realtime/model_events.py +159 -0
- agents/realtime/model_inputs.py +100 -0
- agents/realtime/openai_realtime.py +670 -0
- agents/realtime/runner.py +118 -0
- agents/realtime/session.py +535 -0
- agents/run.py +106 -4
- agents/tool.py +6 -7
- agents/tool_context.py +16 -3
- agents/voice/models/openai_stt.py +1 -1
- agents/voice/pipeline.py +6 -0
- agents/voice/workflow.py +8 -0
- {openai_agents-0.1.0.dist-info → openai_agents-0.2.1.dist-info}/METADATA +121 -4
- {openai_agents-0.1.0.dist-info → openai_agents-0.2.1.dist-info}/RECORD +39 -24
- {openai_agents-0.1.0.dist-info → openai_agents-0.2.1.dist-info}/WHEEL +0 -0
- {openai_agents-0.1.0.dist-info → openai_agents-0.2.1.dist-info}/licenses/LICENSE +0 -0
agents/model_settings.py
CHANGED
|
@@ -17,9 +17,9 @@ from typing_extensions import TypeAlias
|
|
|
17
17
|
class _OmitTypeAnnotation:
|
|
18
18
|
@classmethod
|
|
19
19
|
def __get_pydantic_core_schema__(
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
20
|
+
cls,
|
|
21
|
+
_source_type: Any,
|
|
22
|
+
_handler: GetCoreSchemaHandler,
|
|
23
23
|
) -> core_schema.CoreSchema:
|
|
24
24
|
def validate_from_none(value: None) -> _Omit:
|
|
25
25
|
return _Omit()
|
|
@@ -39,12 +39,20 @@ class _OmitTypeAnnotation:
|
|
|
39
39
|
from_none_schema,
|
|
40
40
|
]
|
|
41
41
|
),
|
|
42
|
-
serialization=core_schema.plain_serializer_function_ser_schema(
|
|
43
|
-
lambda instance: None
|
|
44
|
-
),
|
|
42
|
+
serialization=core_schema.plain_serializer_function_ser_schema(lambda instance: None),
|
|
45
43
|
)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class MCPToolChoice:
|
|
48
|
+
server_label: str
|
|
49
|
+
name: str
|
|
50
|
+
|
|
51
|
+
|
|
46
52
|
Omit = Annotated[_Omit, _OmitTypeAnnotation]
|
|
47
53
|
Headers: TypeAlias = Mapping[str, Union[str, Omit]]
|
|
54
|
+
ToolChoice: TypeAlias = Union[Literal["auto", "required", "none"], str, MCPToolChoice, None]
|
|
55
|
+
|
|
48
56
|
|
|
49
57
|
@dataclass
|
|
50
58
|
class ModelSettings:
|
|
@@ -69,7 +77,7 @@ class ModelSettings:
|
|
|
69
77
|
presence_penalty: float | None = None
|
|
70
78
|
"""The presence penalty to use when calling the model."""
|
|
71
79
|
|
|
72
|
-
tool_choice:
|
|
80
|
+
tool_choice: ToolChoice | None = None
|
|
73
81
|
"""The tool choice to use when calling the model."""
|
|
74
82
|
|
|
75
83
|
parallel_tool_calls: bool | None = None
|
|
@@ -19,6 +19,7 @@ from openai.types.chat import (
|
|
|
19
19
|
ChatCompletionToolMessageParam,
|
|
20
20
|
ChatCompletionUserMessageParam,
|
|
21
21
|
)
|
|
22
|
+
from openai.types.chat.chat_completion_content_part_param import File, FileFile
|
|
22
23
|
from openai.types.chat.chat_completion_tool_param import ChatCompletionToolParam
|
|
23
24
|
from openai.types.chat.completion_create_params import ResponseFormat
|
|
24
25
|
from openai.types.responses import (
|
|
@@ -27,6 +28,7 @@ from openai.types.responses import (
|
|
|
27
28
|
ResponseFunctionToolCall,
|
|
28
29
|
ResponseFunctionToolCallParam,
|
|
29
30
|
ResponseInputContentParam,
|
|
31
|
+
ResponseInputFileParam,
|
|
30
32
|
ResponseInputImageParam,
|
|
31
33
|
ResponseInputTextParam,
|
|
32
34
|
ResponseOutputMessage,
|
|
@@ -42,6 +44,7 @@ from ..agent_output import AgentOutputSchemaBase
|
|
|
42
44
|
from ..exceptions import AgentsException, UserError
|
|
43
45
|
from ..handoffs import Handoff
|
|
44
46
|
from ..items import TResponseInputItem, TResponseOutputItem
|
|
47
|
+
from ..model_settings import MCPToolChoice
|
|
45
48
|
from ..tool import FunctionTool, Tool
|
|
46
49
|
from .fake_id import FAKE_RESPONSES_ID
|
|
47
50
|
|
|
@@ -49,10 +52,12 @@ from .fake_id import FAKE_RESPONSES_ID
|
|
|
49
52
|
class Converter:
|
|
50
53
|
@classmethod
|
|
51
54
|
def convert_tool_choice(
|
|
52
|
-
cls, tool_choice: Literal["auto", "required", "none"] | str | None
|
|
55
|
+
cls, tool_choice: Literal["auto", "required", "none"] | str | MCPToolChoice | None
|
|
53
56
|
) -> ChatCompletionToolChoiceOptionParam | NotGiven:
|
|
54
57
|
if tool_choice is None:
|
|
55
58
|
return NOT_GIVEN
|
|
59
|
+
elif isinstance(tool_choice, MCPToolChoice):
|
|
60
|
+
raise UserError("MCPToolChoice is not supported for Chat Completions models")
|
|
56
61
|
elif tool_choice == "auto":
|
|
57
62
|
return "auto"
|
|
58
63
|
elif tool_choice == "required":
|
|
@@ -251,7 +256,19 @@ class Converter:
|
|
|
251
256
|
)
|
|
252
257
|
)
|
|
253
258
|
elif isinstance(c, dict) and c.get("type") == "input_file":
|
|
254
|
-
|
|
259
|
+
casted_file_param = cast(ResponseInputFileParam, c)
|
|
260
|
+
if "file_data" not in casted_file_param or not casted_file_param["file_data"]:
|
|
261
|
+
raise UserError(
|
|
262
|
+
f"Only file_data is supported for input_file {casted_file_param}"
|
|
263
|
+
)
|
|
264
|
+
out.append(
|
|
265
|
+
File(
|
|
266
|
+
type="file",
|
|
267
|
+
file=FileFile(
|
|
268
|
+
file_data=casted_file_param["file_data"],
|
|
269
|
+
),
|
|
270
|
+
)
|
|
271
|
+
)
|
|
255
272
|
else:
|
|
256
273
|
raise UserError(f"Unknown content: {c}")
|
|
257
274
|
return out
|
|
@@ -467,7 +484,7 @@ class Converter:
|
|
|
467
484
|
)
|
|
468
485
|
|
|
469
486
|
@classmethod
|
|
470
|
-
def convert_handoff_tool(cls, handoff: Handoff[Any]) -> ChatCompletionToolParam:
|
|
487
|
+
def convert_handoff_tool(cls, handoff: Handoff[Any, Any]) -> ChatCompletionToolParam:
|
|
471
488
|
return {
|
|
472
489
|
"type": "function",
|
|
473
490
|
"function": {
|
|
@@ -53,6 +53,9 @@ class StreamingState:
|
|
|
53
53
|
refusal_content_index_and_output: tuple[int, ResponseOutputRefusal] | None = None
|
|
54
54
|
reasoning_content_index_and_output: tuple[int, ResponseReasoningItem] | None = None
|
|
55
55
|
function_calls: dict[int, ResponseFunctionToolCall] = field(default_factory=dict)
|
|
56
|
+
# Fields for real-time function call streaming
|
|
57
|
+
function_call_streaming: dict[int, bool] = field(default_factory=dict)
|
|
58
|
+
function_call_output_idx: dict[int, int] = field(default_factory=dict)
|
|
56
59
|
|
|
57
60
|
|
|
58
61
|
class SequenceNumber:
|
|
@@ -255,9 +258,7 @@ class ChatCmplStreamHandler:
|
|
|
255
258
|
# Accumulate the refusal string in the output part
|
|
256
259
|
state.refusal_content_index_and_output[1].refusal += delta.refusal
|
|
257
260
|
|
|
258
|
-
# Handle tool calls
|
|
259
|
-
# Because we don't know the name of the function until the end of the stream, we'll
|
|
260
|
-
# save everything and yield events at the end
|
|
261
|
+
# Handle tool calls with real-time streaming support
|
|
261
262
|
if delta.tool_calls:
|
|
262
263
|
for tc_delta in delta.tool_calls:
|
|
263
264
|
if tc_delta.index not in state.function_calls:
|
|
@@ -268,15 +269,76 @@ class ChatCmplStreamHandler:
|
|
|
268
269
|
type="function_call",
|
|
269
270
|
call_id="",
|
|
270
271
|
)
|
|
272
|
+
state.function_call_streaming[tc_delta.index] = False
|
|
273
|
+
|
|
271
274
|
tc_function = tc_delta.function
|
|
272
275
|
|
|
276
|
+
# Accumulate arguments as they come in
|
|
273
277
|
state.function_calls[tc_delta.index].arguments += (
|
|
274
278
|
tc_function.arguments if tc_function else ""
|
|
275
279
|
) or ""
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
+
|
|
281
|
+
# Set function name directly (it's correct from the first function call chunk)
|
|
282
|
+
if tc_function and tc_function.name:
|
|
283
|
+
state.function_calls[tc_delta.index].name = tc_function.name
|
|
284
|
+
|
|
285
|
+
if tc_delta.id:
|
|
286
|
+
state.function_calls[tc_delta.index].call_id = tc_delta.id
|
|
287
|
+
|
|
288
|
+
function_call = state.function_calls[tc_delta.index]
|
|
289
|
+
|
|
290
|
+
# Start streaming as soon as we have function name and call_id
|
|
291
|
+
if (not state.function_call_streaming[tc_delta.index] and
|
|
292
|
+
function_call.name and
|
|
293
|
+
function_call.call_id):
|
|
294
|
+
|
|
295
|
+
# Calculate the output index for this function call
|
|
296
|
+
function_call_starting_index = 0
|
|
297
|
+
if state.reasoning_content_index_and_output:
|
|
298
|
+
function_call_starting_index += 1
|
|
299
|
+
if state.text_content_index_and_output:
|
|
300
|
+
function_call_starting_index += 1
|
|
301
|
+
if state.refusal_content_index_and_output:
|
|
302
|
+
function_call_starting_index += 1
|
|
303
|
+
|
|
304
|
+
# Add offset for already started function calls
|
|
305
|
+
function_call_starting_index += sum(
|
|
306
|
+
1 for streaming in state.function_call_streaming.values() if streaming
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
# Mark this function call as streaming and store its output index
|
|
310
|
+
state.function_call_streaming[tc_delta.index] = True
|
|
311
|
+
state.function_call_output_idx[
|
|
312
|
+
tc_delta.index
|
|
313
|
+
] = function_call_starting_index
|
|
314
|
+
|
|
315
|
+
# Send initial function call added event
|
|
316
|
+
yield ResponseOutputItemAddedEvent(
|
|
317
|
+
item=ResponseFunctionToolCall(
|
|
318
|
+
id=FAKE_RESPONSES_ID,
|
|
319
|
+
call_id=function_call.call_id,
|
|
320
|
+
arguments="", # Start with empty arguments
|
|
321
|
+
name=function_call.name,
|
|
322
|
+
type="function_call",
|
|
323
|
+
),
|
|
324
|
+
output_index=function_call_starting_index,
|
|
325
|
+
type="response.output_item.added",
|
|
326
|
+
sequence_number=sequence_number.get_and_increment(),
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
# Stream arguments if we've started streaming this function call
|
|
330
|
+
if (state.function_call_streaming.get(tc_delta.index, False) and
|
|
331
|
+
tc_function and
|
|
332
|
+
tc_function.arguments):
|
|
333
|
+
|
|
334
|
+
output_index = state.function_call_output_idx[tc_delta.index]
|
|
335
|
+
yield ResponseFunctionCallArgumentsDeltaEvent(
|
|
336
|
+
delta=tc_function.arguments,
|
|
337
|
+
item_id=FAKE_RESPONSES_ID,
|
|
338
|
+
output_index=output_index,
|
|
339
|
+
type="response.function_call_arguments.delta",
|
|
340
|
+
sequence_number=sequence_number.get_and_increment(),
|
|
341
|
+
)
|
|
280
342
|
|
|
281
343
|
if state.reasoning_content_index_and_output:
|
|
282
344
|
yield ResponseReasoningSummaryPartDoneEvent(
|
|
@@ -327,42 +389,71 @@ class ChatCmplStreamHandler:
|
|
|
327
389
|
sequence_number=sequence_number.get_and_increment(),
|
|
328
390
|
)
|
|
329
391
|
|
|
330
|
-
#
|
|
331
|
-
for function_call in state.function_calls.
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
)
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
392
|
+
# Send completion events for function calls
|
|
393
|
+
for index, function_call in state.function_calls.items():
|
|
394
|
+
if state.function_call_streaming.get(index, False):
|
|
395
|
+
# Function call was streamed, just send the completion event
|
|
396
|
+
output_index = state.function_call_output_idx[index]
|
|
397
|
+
yield ResponseOutputItemDoneEvent(
|
|
398
|
+
item=ResponseFunctionToolCall(
|
|
399
|
+
id=FAKE_RESPONSES_ID,
|
|
400
|
+
call_id=function_call.call_id,
|
|
401
|
+
arguments=function_call.arguments,
|
|
402
|
+
name=function_call.name,
|
|
403
|
+
type="function_call",
|
|
404
|
+
),
|
|
405
|
+
output_index=output_index,
|
|
406
|
+
type="response.output_item.done",
|
|
407
|
+
sequence_number=sequence_number.get_and_increment(),
|
|
408
|
+
)
|
|
409
|
+
else:
|
|
410
|
+
# Function call was not streamed (fallback to old behavior)
|
|
411
|
+
# This handles edge cases where function name never arrived
|
|
412
|
+
fallback_starting_index = 0
|
|
413
|
+
if state.reasoning_content_index_and_output:
|
|
414
|
+
fallback_starting_index += 1
|
|
415
|
+
if state.text_content_index_and_output:
|
|
416
|
+
fallback_starting_index += 1
|
|
417
|
+
if state.refusal_content_index_and_output:
|
|
418
|
+
fallback_starting_index += 1
|
|
419
|
+
|
|
420
|
+
# Add offset for already started function calls
|
|
421
|
+
fallback_starting_index += sum(
|
|
422
|
+
1 for streaming in state.function_call_streaming.values() if streaming
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
# Send all events at once (backward compatibility)
|
|
426
|
+
yield ResponseOutputItemAddedEvent(
|
|
427
|
+
item=ResponseFunctionToolCall(
|
|
428
|
+
id=FAKE_RESPONSES_ID,
|
|
429
|
+
call_id=function_call.call_id,
|
|
430
|
+
arguments=function_call.arguments,
|
|
431
|
+
name=function_call.name,
|
|
432
|
+
type="function_call",
|
|
433
|
+
),
|
|
434
|
+
output_index=fallback_starting_index,
|
|
435
|
+
type="response.output_item.added",
|
|
436
|
+
sequence_number=sequence_number.get_and_increment(),
|
|
437
|
+
)
|
|
438
|
+
yield ResponseFunctionCallArgumentsDeltaEvent(
|
|
439
|
+
delta=function_call.arguments,
|
|
440
|
+
item_id=FAKE_RESPONSES_ID,
|
|
441
|
+
output_index=fallback_starting_index,
|
|
442
|
+
type="response.function_call_arguments.delta",
|
|
443
|
+
sequence_number=sequence_number.get_and_increment(),
|
|
444
|
+
)
|
|
445
|
+
yield ResponseOutputItemDoneEvent(
|
|
446
|
+
item=ResponseFunctionToolCall(
|
|
447
|
+
id=FAKE_RESPONSES_ID,
|
|
448
|
+
call_id=function_call.call_id,
|
|
449
|
+
arguments=function_call.arguments,
|
|
450
|
+
name=function_call.name,
|
|
451
|
+
type="function_call",
|
|
452
|
+
),
|
|
453
|
+
output_index=fallback_starting_index,
|
|
454
|
+
type="response.output_item.done",
|
|
455
|
+
sequence_number=sequence_number.get_and_increment(),
|
|
456
|
+
)
|
|
366
457
|
|
|
367
458
|
# Finally, send the Response completed event
|
|
368
459
|
outputs: list[ResponseOutputItem] = []
|
|
@@ -25,6 +25,7 @@ from ..exceptions import UserError
|
|
|
25
25
|
from ..handoffs import Handoff
|
|
26
26
|
from ..items import ItemHelpers, ModelResponse, TResponseInputItem
|
|
27
27
|
from ..logger import logger
|
|
28
|
+
from ..model_settings import MCPToolChoice
|
|
28
29
|
from ..tool import (
|
|
29
30
|
CodeInterpreterTool,
|
|
30
31
|
ComputerTool,
|
|
@@ -303,10 +304,16 @@ class ConvertedTools:
|
|
|
303
304
|
class Converter:
|
|
304
305
|
@classmethod
|
|
305
306
|
def convert_tool_choice(
|
|
306
|
-
cls, tool_choice: Literal["auto", "required", "none"] | str | None
|
|
307
|
+
cls, tool_choice: Literal["auto", "required", "none"] | str | MCPToolChoice | None
|
|
307
308
|
) -> response_create_params.ToolChoice | NotGiven:
|
|
308
309
|
if tool_choice is None:
|
|
309
310
|
return NOT_GIVEN
|
|
311
|
+
elif isinstance(tool_choice, MCPToolChoice):
|
|
312
|
+
return {
|
|
313
|
+
"server_label": tool_choice.server_label,
|
|
314
|
+
"type": "mcp",
|
|
315
|
+
"name": tool_choice.name,
|
|
316
|
+
}
|
|
310
317
|
elif tool_choice == "required":
|
|
311
318
|
return "required"
|
|
312
319
|
elif tool_choice == "auto":
|
|
@@ -334,9 +341,9 @@ class Converter:
|
|
|
334
341
|
"type": "code_interpreter",
|
|
335
342
|
}
|
|
336
343
|
elif tool_choice == "mcp":
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
}
|
|
344
|
+
# Note that this is still here for backwards compatibility,
|
|
345
|
+
# but migrating to MCPToolChoice is recommended.
|
|
346
|
+
return {"type": "mcp"} # type: ignore [typeddict-item]
|
|
340
347
|
else:
|
|
341
348
|
return {
|
|
342
349
|
"type": "function",
|
|
@@ -363,7 +370,7 @@ class Converter:
|
|
|
363
370
|
def convert_tools(
|
|
364
371
|
cls,
|
|
365
372
|
tools: list[Tool],
|
|
366
|
-
handoffs: list[Handoff[Any]],
|
|
373
|
+
handoffs: list[Handoff[Any, Any]],
|
|
367
374
|
) -> ConvertedTools:
|
|
368
375
|
converted_tools: list[ToolParam] = []
|
|
369
376
|
includes: list[ResponseIncludable] = []
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
from .agent import RealtimeAgent, RealtimeAgentHooks, RealtimeRunHooks
|
|
2
|
+
from .config import (
|
|
3
|
+
RealtimeAudioFormat,
|
|
4
|
+
RealtimeClientMessage,
|
|
5
|
+
RealtimeGuardrailsSettings,
|
|
6
|
+
RealtimeInputAudioTranscriptionConfig,
|
|
7
|
+
RealtimeModelName,
|
|
8
|
+
RealtimeModelTracingConfig,
|
|
9
|
+
RealtimeRunConfig,
|
|
10
|
+
RealtimeSessionModelSettings,
|
|
11
|
+
RealtimeTurnDetectionConfig,
|
|
12
|
+
RealtimeUserInput,
|
|
13
|
+
RealtimeUserInputMessage,
|
|
14
|
+
RealtimeUserInputText,
|
|
15
|
+
)
|
|
16
|
+
from .events import (
|
|
17
|
+
RealtimeAgentEndEvent,
|
|
18
|
+
RealtimeAgentStartEvent,
|
|
19
|
+
RealtimeAudio,
|
|
20
|
+
RealtimeAudioEnd,
|
|
21
|
+
RealtimeAudioInterrupted,
|
|
22
|
+
RealtimeError,
|
|
23
|
+
RealtimeEventInfo,
|
|
24
|
+
RealtimeGuardrailTripped,
|
|
25
|
+
RealtimeHandoffEvent,
|
|
26
|
+
RealtimeHistoryAdded,
|
|
27
|
+
RealtimeHistoryUpdated,
|
|
28
|
+
RealtimeRawModelEvent,
|
|
29
|
+
RealtimeSessionEvent,
|
|
30
|
+
RealtimeToolEnd,
|
|
31
|
+
RealtimeToolStart,
|
|
32
|
+
)
|
|
33
|
+
from .handoffs import realtime_handoff
|
|
34
|
+
from .items import (
|
|
35
|
+
AssistantMessageItem,
|
|
36
|
+
AssistantText,
|
|
37
|
+
InputAudio,
|
|
38
|
+
InputText,
|
|
39
|
+
RealtimeItem,
|
|
40
|
+
RealtimeMessageItem,
|
|
41
|
+
RealtimeResponse,
|
|
42
|
+
RealtimeToolCallItem,
|
|
43
|
+
SystemMessageItem,
|
|
44
|
+
UserMessageItem,
|
|
45
|
+
)
|
|
46
|
+
from .model import (
|
|
47
|
+
RealtimeModel,
|
|
48
|
+
RealtimeModelConfig,
|
|
49
|
+
RealtimeModelListener,
|
|
50
|
+
)
|
|
51
|
+
from .model_events import (
|
|
52
|
+
RealtimeConnectionStatus,
|
|
53
|
+
RealtimeModelAudioDoneEvent,
|
|
54
|
+
RealtimeModelAudioEvent,
|
|
55
|
+
RealtimeModelAudioInterruptedEvent,
|
|
56
|
+
RealtimeModelConnectionStatusEvent,
|
|
57
|
+
RealtimeModelErrorEvent,
|
|
58
|
+
RealtimeModelEvent,
|
|
59
|
+
RealtimeModelExceptionEvent,
|
|
60
|
+
RealtimeModelInputAudioTranscriptionCompletedEvent,
|
|
61
|
+
RealtimeModelItemDeletedEvent,
|
|
62
|
+
RealtimeModelItemUpdatedEvent,
|
|
63
|
+
RealtimeModelOtherEvent,
|
|
64
|
+
RealtimeModelToolCallEvent,
|
|
65
|
+
RealtimeModelTranscriptDeltaEvent,
|
|
66
|
+
RealtimeModelTurnEndedEvent,
|
|
67
|
+
RealtimeModelTurnStartedEvent,
|
|
68
|
+
)
|
|
69
|
+
from .model_inputs import (
|
|
70
|
+
RealtimeModelInputTextContent,
|
|
71
|
+
RealtimeModelRawClientMessage,
|
|
72
|
+
RealtimeModelSendAudio,
|
|
73
|
+
RealtimeModelSendEvent,
|
|
74
|
+
RealtimeModelSendInterrupt,
|
|
75
|
+
RealtimeModelSendRawMessage,
|
|
76
|
+
RealtimeModelSendSessionUpdate,
|
|
77
|
+
RealtimeModelSendToolOutput,
|
|
78
|
+
RealtimeModelSendUserInput,
|
|
79
|
+
RealtimeModelUserInput,
|
|
80
|
+
RealtimeModelUserInputMessage,
|
|
81
|
+
)
|
|
82
|
+
from .openai_realtime import (
|
|
83
|
+
DEFAULT_MODEL_SETTINGS,
|
|
84
|
+
OpenAIRealtimeWebSocketModel,
|
|
85
|
+
get_api_key,
|
|
86
|
+
)
|
|
87
|
+
from .runner import RealtimeRunner
|
|
88
|
+
from .session import RealtimeSession
|
|
89
|
+
|
|
90
|
+
__all__ = [
|
|
91
|
+
# Agent
|
|
92
|
+
"RealtimeAgent",
|
|
93
|
+
"RealtimeAgentHooks",
|
|
94
|
+
"RealtimeRunHooks",
|
|
95
|
+
"RealtimeRunner",
|
|
96
|
+
# Handoffs
|
|
97
|
+
"realtime_handoff",
|
|
98
|
+
# Config
|
|
99
|
+
"RealtimeAudioFormat",
|
|
100
|
+
"RealtimeClientMessage",
|
|
101
|
+
"RealtimeGuardrailsSettings",
|
|
102
|
+
"RealtimeInputAudioTranscriptionConfig",
|
|
103
|
+
"RealtimeModelName",
|
|
104
|
+
"RealtimeModelTracingConfig",
|
|
105
|
+
"RealtimeRunConfig",
|
|
106
|
+
"RealtimeSessionModelSettings",
|
|
107
|
+
"RealtimeTurnDetectionConfig",
|
|
108
|
+
"RealtimeUserInput",
|
|
109
|
+
"RealtimeUserInputMessage",
|
|
110
|
+
"RealtimeUserInputText",
|
|
111
|
+
# Events
|
|
112
|
+
"RealtimeAgentEndEvent",
|
|
113
|
+
"RealtimeAgentStartEvent",
|
|
114
|
+
"RealtimeAudio",
|
|
115
|
+
"RealtimeAudioEnd",
|
|
116
|
+
"RealtimeAudioInterrupted",
|
|
117
|
+
"RealtimeError",
|
|
118
|
+
"RealtimeEventInfo",
|
|
119
|
+
"RealtimeGuardrailTripped",
|
|
120
|
+
"RealtimeHandoffEvent",
|
|
121
|
+
"RealtimeHistoryAdded",
|
|
122
|
+
"RealtimeHistoryUpdated",
|
|
123
|
+
"RealtimeRawModelEvent",
|
|
124
|
+
"RealtimeSessionEvent",
|
|
125
|
+
"RealtimeToolEnd",
|
|
126
|
+
"RealtimeToolStart",
|
|
127
|
+
# Items
|
|
128
|
+
"AssistantMessageItem",
|
|
129
|
+
"AssistantText",
|
|
130
|
+
"InputAudio",
|
|
131
|
+
"InputText",
|
|
132
|
+
"RealtimeItem",
|
|
133
|
+
"RealtimeMessageItem",
|
|
134
|
+
"RealtimeResponse",
|
|
135
|
+
"RealtimeToolCallItem",
|
|
136
|
+
"SystemMessageItem",
|
|
137
|
+
"UserMessageItem",
|
|
138
|
+
# Model
|
|
139
|
+
"RealtimeModel",
|
|
140
|
+
"RealtimeModelConfig",
|
|
141
|
+
"RealtimeModelListener",
|
|
142
|
+
# Model Events
|
|
143
|
+
"RealtimeConnectionStatus",
|
|
144
|
+
"RealtimeModelAudioDoneEvent",
|
|
145
|
+
"RealtimeModelAudioEvent",
|
|
146
|
+
"RealtimeModelAudioInterruptedEvent",
|
|
147
|
+
"RealtimeModelConnectionStatusEvent",
|
|
148
|
+
"RealtimeModelErrorEvent",
|
|
149
|
+
"RealtimeModelEvent",
|
|
150
|
+
"RealtimeModelExceptionEvent",
|
|
151
|
+
"RealtimeModelInputAudioTranscriptionCompletedEvent",
|
|
152
|
+
"RealtimeModelItemDeletedEvent",
|
|
153
|
+
"RealtimeModelItemUpdatedEvent",
|
|
154
|
+
"RealtimeModelOtherEvent",
|
|
155
|
+
"RealtimeModelToolCallEvent",
|
|
156
|
+
"RealtimeModelTranscriptDeltaEvent",
|
|
157
|
+
"RealtimeModelTurnEndedEvent",
|
|
158
|
+
"RealtimeModelTurnStartedEvent",
|
|
159
|
+
# Model Inputs
|
|
160
|
+
"RealtimeModelInputTextContent",
|
|
161
|
+
"RealtimeModelRawClientMessage",
|
|
162
|
+
"RealtimeModelSendAudio",
|
|
163
|
+
"RealtimeModelSendEvent",
|
|
164
|
+
"RealtimeModelSendInterrupt",
|
|
165
|
+
"RealtimeModelSendRawMessage",
|
|
166
|
+
"RealtimeModelSendSessionUpdate",
|
|
167
|
+
"RealtimeModelSendToolOutput",
|
|
168
|
+
"RealtimeModelSendUserInput",
|
|
169
|
+
"RealtimeModelUserInput",
|
|
170
|
+
"RealtimeModelUserInputMessage",
|
|
171
|
+
# OpenAI Realtime
|
|
172
|
+
"DEFAULT_MODEL_SETTINGS",
|
|
173
|
+
"OpenAIRealtimeWebSocketModel",
|
|
174
|
+
"get_api_key",
|
|
175
|
+
# Session
|
|
176
|
+
"RealtimeSession",
|
|
177
|
+
]
|
agents/realtime/agent.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import dataclasses
|
|
4
|
+
import inspect
|
|
5
|
+
from collections.abc import Awaitable
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Any, Callable, Generic, cast
|
|
8
|
+
|
|
9
|
+
from ..agent import AgentBase
|
|
10
|
+
from ..handoffs import Handoff
|
|
11
|
+
from ..lifecycle import AgentHooksBase, RunHooksBase
|
|
12
|
+
from ..logger import logger
|
|
13
|
+
from ..run_context import RunContextWrapper, TContext
|
|
14
|
+
from ..util._types import MaybeAwaitable
|
|
15
|
+
|
|
16
|
+
RealtimeAgentHooks = AgentHooksBase[TContext, "RealtimeAgent[TContext]"]
|
|
17
|
+
"""Agent hooks for `RealtimeAgent`s."""
|
|
18
|
+
|
|
19
|
+
RealtimeRunHooks = RunHooksBase[TContext, "RealtimeAgent[TContext]"]
|
|
20
|
+
"""Run hooks for `RealtimeAgent`s."""
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@dataclass
|
|
24
|
+
class RealtimeAgent(AgentBase, Generic[TContext]):
|
|
25
|
+
"""A specialized agent instance that is meant to be used within a `RealtimeSession` to build
|
|
26
|
+
voice agents. Due to the nature of this agent, some configuration options are not supported
|
|
27
|
+
that are supported by regular `Agent` instances. For example:
|
|
28
|
+
- `model` choice is not supported, as all RealtimeAgents will be handled by the same model
|
|
29
|
+
within a `RealtimeSession`.
|
|
30
|
+
- `modelSettings` is not supported, as all RealtimeAgents will be handled by the same model
|
|
31
|
+
within a `RealtimeSession`.
|
|
32
|
+
- `outputType` is not supported, as RealtimeAgents do not support structured outputs.
|
|
33
|
+
- `toolUseBehavior` is not supported, as all RealtimeAgents will be handled by the same model
|
|
34
|
+
within a `RealtimeSession`.
|
|
35
|
+
- `voice` can be configured on an `Agent` level; however, it cannot be changed after the first
|
|
36
|
+
agent within a `RealtimeSession` has spoken.
|
|
37
|
+
|
|
38
|
+
See `AgentBase` for base parameters that are shared with `Agent`s.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
instructions: (
|
|
42
|
+
str
|
|
43
|
+
| Callable[
|
|
44
|
+
[RunContextWrapper[TContext], RealtimeAgent[TContext]],
|
|
45
|
+
MaybeAwaitable[str],
|
|
46
|
+
]
|
|
47
|
+
| None
|
|
48
|
+
) = None
|
|
49
|
+
"""The instructions for the agent. Will be used as the "system prompt" when this agent is
|
|
50
|
+
invoked. Describes what the agent should do, and how it responds.
|
|
51
|
+
|
|
52
|
+
Can either be a string, or a function that dynamically generates instructions for the agent. If
|
|
53
|
+
you provide a function, it will be called with the context and the agent instance. It must
|
|
54
|
+
return a string.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
handoffs: list[RealtimeAgent[Any] | Handoff[TContext, RealtimeAgent[Any]]] = field(
|
|
58
|
+
default_factory=list
|
|
59
|
+
)
|
|
60
|
+
"""Handoffs are sub-agents that the agent can delegate to. You can provide a list of handoffs,
|
|
61
|
+
and the agent can choose to delegate to them if relevant. Allows for separation of concerns and
|
|
62
|
+
modularity.
|
|
63
|
+
"""
|
|
64
|
+
|
|
65
|
+
hooks: RealtimeAgentHooks | None = None
|
|
66
|
+
"""A class that receives callbacks on various lifecycle events for this agent.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
def clone(self, **kwargs: Any) -> RealtimeAgent[TContext]:
|
|
70
|
+
"""Make a copy of the agent, with the given arguments changed. For example, you could do:
|
|
71
|
+
```
|
|
72
|
+
new_agent = agent.clone(instructions="New instructions")
|
|
73
|
+
```
|
|
74
|
+
"""
|
|
75
|
+
return dataclasses.replace(self, **kwargs)
|
|
76
|
+
|
|
77
|
+
async def get_system_prompt(self, run_context: RunContextWrapper[TContext]) -> str | None:
|
|
78
|
+
"""Get the system prompt for the agent."""
|
|
79
|
+
if isinstance(self.instructions, str):
|
|
80
|
+
return self.instructions
|
|
81
|
+
elif callable(self.instructions):
|
|
82
|
+
if inspect.iscoroutinefunction(self.instructions):
|
|
83
|
+
return await cast(Awaitable[str], self.instructions(run_context, self))
|
|
84
|
+
else:
|
|
85
|
+
return cast(str, self.instructions(run_context, self))
|
|
86
|
+
elif self.instructions is not None:
|
|
87
|
+
logger.error(f"Instructions must be a string or a function, got {self.instructions}")
|
|
88
|
+
|
|
89
|
+
return None
|