openai-agents 0.0.19__py3-none-any.whl → 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of openai-agents might be problematic. Click here for more details.
- agents/__init__.py +5 -2
- agents/_run_impl.py +35 -1
- agents/agent.py +65 -29
- agents/extensions/models/litellm_model.py +7 -3
- agents/function_schema.py +11 -1
- agents/guardrail.py +5 -1
- agents/handoffs.py +14 -0
- agents/lifecycle.py +26 -17
- agents/mcp/__init__.py +13 -1
- agents/mcp/server.py +173 -16
- agents/mcp/util.py +89 -6
- agents/memory/__init__.py +3 -0
- agents/memory/session.py +369 -0
- agents/model_settings.py +60 -6
- agents/models/chatcmpl_converter.py +31 -2
- agents/models/chatcmpl_stream_handler.py +128 -16
- agents/models/openai_chatcompletions.py +12 -10
- agents/models/openai_responses.py +25 -8
- agents/realtime/README.md +3 -0
- agents/realtime/__init__.py +174 -0
- agents/realtime/agent.py +80 -0
- agents/realtime/config.py +128 -0
- agents/realtime/events.py +216 -0
- agents/realtime/items.py +91 -0
- agents/realtime/model.py +69 -0
- agents/realtime/model_events.py +159 -0
- agents/realtime/model_inputs.py +100 -0
- agents/realtime/openai_realtime.py +584 -0
- agents/realtime/runner.py +118 -0
- agents/realtime/session.py +502 -0
- agents/repl.py +1 -4
- agents/run.py +131 -10
- agents/tool.py +30 -6
- agents/tool_context.py +16 -3
- agents/tracing/__init__.py +1 -2
- agents/tracing/processor_interface.py +1 -1
- agents/voice/models/openai_stt.py +1 -1
- agents/voice/pipeline.py +6 -0
- agents/voice/workflow.py +8 -0
- {openai_agents-0.0.19.dist-info → openai_agents-0.2.0.dist-info}/METADATA +133 -8
- {openai_agents-0.0.19.dist-info → openai_agents-0.2.0.dist-info}/RECORD +43 -29
- {openai_agents-0.0.19.dist-info → openai_agents-0.2.0.dist-info}/WHEEL +0 -0
- {openai_agents-0.0.19.dist-info → openai_agents-0.2.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -20,21 +20,38 @@ from openai.types.responses import (
|
|
|
20
20
|
ResponseOutputMessage,
|
|
21
21
|
ResponseOutputRefusal,
|
|
22
22
|
ResponseOutputText,
|
|
23
|
+
ResponseReasoningItem,
|
|
24
|
+
ResponseReasoningSummaryPartAddedEvent,
|
|
25
|
+
ResponseReasoningSummaryPartDoneEvent,
|
|
26
|
+
ResponseReasoningSummaryTextDeltaEvent,
|
|
23
27
|
ResponseRefusalDeltaEvent,
|
|
24
28
|
ResponseTextDeltaEvent,
|
|
25
29
|
ResponseUsage,
|
|
26
30
|
)
|
|
31
|
+
from openai.types.responses.response_reasoning_item import Summary
|
|
32
|
+
from openai.types.responses.response_reasoning_summary_part_added_event import (
|
|
33
|
+
Part as AddedEventPart,
|
|
34
|
+
)
|
|
35
|
+
from openai.types.responses.response_reasoning_summary_part_done_event import Part as DoneEventPart
|
|
27
36
|
from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
|
|
28
37
|
|
|
29
38
|
from ..items import TResponseStreamEvent
|
|
30
39
|
from .fake_id import FAKE_RESPONSES_ID
|
|
31
40
|
|
|
32
41
|
|
|
42
|
+
# Define a Part class for internal use
|
|
43
|
+
class Part:
|
|
44
|
+
def __init__(self, text: str, type: str):
|
|
45
|
+
self.text = text
|
|
46
|
+
self.type = type
|
|
47
|
+
|
|
48
|
+
|
|
33
49
|
@dataclass
|
|
34
50
|
class StreamingState:
|
|
35
51
|
started: bool = False
|
|
36
52
|
text_content_index_and_output: tuple[int, ResponseOutputText] | None = None
|
|
37
53
|
refusal_content_index_and_output: tuple[int, ResponseOutputRefusal] | None = None
|
|
54
|
+
reasoning_content_index_and_output: tuple[int, ResponseReasoningItem] | None = None
|
|
38
55
|
function_calls: dict[int, ResponseFunctionToolCall] = field(default_factory=dict)
|
|
39
56
|
|
|
40
57
|
|
|
@@ -75,12 +92,65 @@ class ChatCmplStreamHandler:
|
|
|
75
92
|
|
|
76
93
|
delta = chunk.choices[0].delta
|
|
77
94
|
|
|
78
|
-
# Handle
|
|
79
|
-
if delta
|
|
95
|
+
# Handle reasoning content
|
|
96
|
+
if hasattr(delta, "reasoning_content"):
|
|
97
|
+
reasoning_content = delta.reasoning_content
|
|
98
|
+
if reasoning_content and not state.reasoning_content_index_and_output:
|
|
99
|
+
state.reasoning_content_index_and_output = (
|
|
100
|
+
0,
|
|
101
|
+
ResponseReasoningItem(
|
|
102
|
+
id=FAKE_RESPONSES_ID,
|
|
103
|
+
summary=[Summary(text="", type="summary_text")],
|
|
104
|
+
type="reasoning",
|
|
105
|
+
),
|
|
106
|
+
)
|
|
107
|
+
yield ResponseOutputItemAddedEvent(
|
|
108
|
+
item=ResponseReasoningItem(
|
|
109
|
+
id=FAKE_RESPONSES_ID,
|
|
110
|
+
summary=[Summary(text="", type="summary_text")],
|
|
111
|
+
type="reasoning",
|
|
112
|
+
),
|
|
113
|
+
output_index=0,
|
|
114
|
+
type="response.output_item.added",
|
|
115
|
+
sequence_number=sequence_number.get_and_increment(),
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
yield ResponseReasoningSummaryPartAddedEvent(
|
|
119
|
+
item_id=FAKE_RESPONSES_ID,
|
|
120
|
+
output_index=0,
|
|
121
|
+
summary_index=0,
|
|
122
|
+
part=AddedEventPart(text="", type="summary_text"),
|
|
123
|
+
type="response.reasoning_summary_part.added",
|
|
124
|
+
sequence_number=sequence_number.get_and_increment(),
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
if reasoning_content and state.reasoning_content_index_and_output:
|
|
128
|
+
yield ResponseReasoningSummaryTextDeltaEvent(
|
|
129
|
+
delta=reasoning_content,
|
|
130
|
+
item_id=FAKE_RESPONSES_ID,
|
|
131
|
+
output_index=0,
|
|
132
|
+
summary_index=0,
|
|
133
|
+
type="response.reasoning_summary_text.delta",
|
|
134
|
+
sequence_number=sequence_number.get_and_increment(),
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Create a new summary with updated text
|
|
138
|
+
current_summary = state.reasoning_content_index_and_output[1].summary[0]
|
|
139
|
+
updated_text = current_summary.text + reasoning_content
|
|
140
|
+
new_summary = Summary(text=updated_text, type="summary_text")
|
|
141
|
+
state.reasoning_content_index_and_output[1].summary[0] = new_summary
|
|
142
|
+
|
|
143
|
+
# Handle regular content
|
|
144
|
+
if delta.content is not None:
|
|
80
145
|
if not state.text_content_index_and_output:
|
|
81
|
-
|
|
146
|
+
content_index = 0
|
|
147
|
+
if state.reasoning_content_index_and_output:
|
|
148
|
+
content_index += 1
|
|
149
|
+
if state.refusal_content_index_and_output:
|
|
150
|
+
content_index += 1
|
|
151
|
+
|
|
82
152
|
state.text_content_index_and_output = (
|
|
83
|
-
|
|
153
|
+
content_index,
|
|
84
154
|
ResponseOutputText(
|
|
85
155
|
text="",
|
|
86
156
|
type="output_text",
|
|
@@ -98,14 +168,16 @@ class ChatCmplStreamHandler:
|
|
|
98
168
|
# Notify consumers of the start of a new output message + first content part
|
|
99
169
|
yield ResponseOutputItemAddedEvent(
|
|
100
170
|
item=assistant_item,
|
|
101
|
-
output_index=
|
|
171
|
+
output_index=state.reasoning_content_index_and_output
|
|
172
|
+
is not None, # fixed 0 -> 0 or 1
|
|
102
173
|
type="response.output_item.added",
|
|
103
174
|
sequence_number=sequence_number.get_and_increment(),
|
|
104
175
|
)
|
|
105
176
|
yield ResponseContentPartAddedEvent(
|
|
106
177
|
content_index=state.text_content_index_and_output[0],
|
|
107
178
|
item_id=FAKE_RESPONSES_ID,
|
|
108
|
-
output_index=
|
|
179
|
+
output_index=state.reasoning_content_index_and_output
|
|
180
|
+
is not None, # fixed 0 -> 0 or 1
|
|
109
181
|
part=ResponseOutputText(
|
|
110
182
|
text="",
|
|
111
183
|
type="output_text",
|
|
@@ -119,7 +191,8 @@ class ChatCmplStreamHandler:
|
|
|
119
191
|
content_index=state.text_content_index_and_output[0],
|
|
120
192
|
delta=delta.content,
|
|
121
193
|
item_id=FAKE_RESPONSES_ID,
|
|
122
|
-
output_index=
|
|
194
|
+
output_index=state.reasoning_content_index_and_output
|
|
195
|
+
is not None, # fixed 0 -> 0 or 1
|
|
123
196
|
type="response.output_text.delta",
|
|
124
197
|
sequence_number=sequence_number.get_and_increment(),
|
|
125
198
|
)
|
|
@@ -130,9 +203,14 @@ class ChatCmplStreamHandler:
|
|
|
130
203
|
# This is always set by the OpenAI API, but not by others e.g. LiteLLM
|
|
131
204
|
if hasattr(delta, "refusal") and delta.refusal:
|
|
132
205
|
if not state.refusal_content_index_and_output:
|
|
133
|
-
|
|
206
|
+
refusal_index = 0
|
|
207
|
+
if state.reasoning_content_index_and_output:
|
|
208
|
+
refusal_index += 1
|
|
209
|
+
if state.text_content_index_and_output:
|
|
210
|
+
refusal_index += 1
|
|
211
|
+
|
|
134
212
|
state.refusal_content_index_and_output = (
|
|
135
|
-
|
|
213
|
+
refusal_index,
|
|
136
214
|
ResponseOutputRefusal(refusal="", type="refusal"),
|
|
137
215
|
)
|
|
138
216
|
# Start a new assistant message if one doesn't exist yet (in-progress)
|
|
@@ -146,14 +224,16 @@ class ChatCmplStreamHandler:
|
|
|
146
224
|
# Notify downstream that assistant message + first content part are starting
|
|
147
225
|
yield ResponseOutputItemAddedEvent(
|
|
148
226
|
item=assistant_item,
|
|
149
|
-
output_index=
|
|
227
|
+
output_index=state.reasoning_content_index_and_output
|
|
228
|
+
is not None, # fixed 0 -> 0 or 1
|
|
150
229
|
type="response.output_item.added",
|
|
151
230
|
sequence_number=sequence_number.get_and_increment(),
|
|
152
231
|
)
|
|
153
232
|
yield ResponseContentPartAddedEvent(
|
|
154
233
|
content_index=state.refusal_content_index_and_output[0],
|
|
155
234
|
item_id=FAKE_RESPONSES_ID,
|
|
156
|
-
output_index=
|
|
235
|
+
output_index=state.reasoning_content_index_and_output
|
|
236
|
+
is not None, # fixed 0 -> 0 or 1
|
|
157
237
|
part=ResponseOutputText(
|
|
158
238
|
text="",
|
|
159
239
|
type="output_text",
|
|
@@ -167,7 +247,8 @@ class ChatCmplStreamHandler:
|
|
|
167
247
|
content_index=state.refusal_content_index_and_output[0],
|
|
168
248
|
delta=delta.refusal,
|
|
169
249
|
item_id=FAKE_RESPONSES_ID,
|
|
170
|
-
output_index=
|
|
250
|
+
output_index=state.reasoning_content_index_and_output
|
|
251
|
+
is not None, # fixed 0 -> 0 or 1
|
|
171
252
|
type="response.refusal.delta",
|
|
172
253
|
sequence_number=sequence_number.get_and_increment(),
|
|
173
254
|
)
|
|
@@ -195,16 +276,39 @@ class ChatCmplStreamHandler:
|
|
|
195
276
|
state.function_calls[tc_delta.index].name += (
|
|
196
277
|
tc_function.name if tc_function else ""
|
|
197
278
|
) or ""
|
|
198
|
-
state.function_calls[tc_delta.index].call_id
|
|
279
|
+
state.function_calls[tc_delta.index].call_id = tc_delta.id or ""
|
|
280
|
+
|
|
281
|
+
if state.reasoning_content_index_and_output:
|
|
282
|
+
yield ResponseReasoningSummaryPartDoneEvent(
|
|
283
|
+
item_id=FAKE_RESPONSES_ID,
|
|
284
|
+
output_index=0,
|
|
285
|
+
summary_index=0,
|
|
286
|
+
part=DoneEventPart(
|
|
287
|
+
text=state.reasoning_content_index_and_output[1].summary[0].text,
|
|
288
|
+
type="summary_text",
|
|
289
|
+
),
|
|
290
|
+
type="response.reasoning_summary_part.done",
|
|
291
|
+
sequence_number=sequence_number.get_and_increment(),
|
|
292
|
+
)
|
|
293
|
+
yield ResponseOutputItemDoneEvent(
|
|
294
|
+
item=state.reasoning_content_index_and_output[1],
|
|
295
|
+
output_index=0,
|
|
296
|
+
type="response.output_item.done",
|
|
297
|
+
sequence_number=sequence_number.get_and_increment(),
|
|
298
|
+
)
|
|
199
299
|
|
|
200
300
|
function_call_starting_index = 0
|
|
301
|
+
if state.reasoning_content_index_and_output:
|
|
302
|
+
function_call_starting_index += 1
|
|
303
|
+
|
|
201
304
|
if state.text_content_index_and_output:
|
|
202
305
|
function_call_starting_index += 1
|
|
203
306
|
# Send end event for this content part
|
|
204
307
|
yield ResponseContentPartDoneEvent(
|
|
205
308
|
content_index=state.text_content_index_and_output[0],
|
|
206
309
|
item_id=FAKE_RESPONSES_ID,
|
|
207
|
-
output_index=
|
|
310
|
+
output_index=state.reasoning_content_index_and_output
|
|
311
|
+
is not None, # fixed 0 -> 0 or 1
|
|
208
312
|
part=state.text_content_index_and_output[1],
|
|
209
313
|
type="response.content_part.done",
|
|
210
314
|
sequence_number=sequence_number.get_and_increment(),
|
|
@@ -216,7 +320,8 @@ class ChatCmplStreamHandler:
|
|
|
216
320
|
yield ResponseContentPartDoneEvent(
|
|
217
321
|
content_index=state.refusal_content_index_and_output[0],
|
|
218
322
|
item_id=FAKE_RESPONSES_ID,
|
|
219
|
-
output_index=
|
|
323
|
+
output_index=state.reasoning_content_index_and_output
|
|
324
|
+
is not None, # fixed 0 -> 0 or 1
|
|
220
325
|
part=state.refusal_content_index_and_output[1],
|
|
221
326
|
type="response.content_part.done",
|
|
222
327
|
sequence_number=sequence_number.get_and_increment(),
|
|
@@ -261,6 +366,12 @@ class ChatCmplStreamHandler:
|
|
|
261
366
|
|
|
262
367
|
# Finally, send the Response completed event
|
|
263
368
|
outputs: list[ResponseOutputItem] = []
|
|
369
|
+
|
|
370
|
+
# include Reasoning item if it exists
|
|
371
|
+
if state.reasoning_content_index_and_output:
|
|
372
|
+
outputs.append(state.reasoning_content_index_and_output[1])
|
|
373
|
+
|
|
374
|
+
# include text or refusal content if they exist
|
|
264
375
|
if state.text_content_index_and_output or state.refusal_content_index_and_output:
|
|
265
376
|
assistant_msg = ResponseOutputMessage(
|
|
266
377
|
id=FAKE_RESPONSES_ID,
|
|
@@ -278,7 +389,8 @@ class ChatCmplStreamHandler:
|
|
|
278
389
|
# send a ResponseOutputItemDone for the assistant message
|
|
279
390
|
yield ResponseOutputItemDoneEvent(
|
|
280
391
|
item=assistant_msg,
|
|
281
|
-
output_index=
|
|
392
|
+
output_index=state.reasoning_content_index_and_output
|
|
393
|
+
is not None, # fixed 0 -> 0 or 1
|
|
282
394
|
type="response.output_item.done",
|
|
283
395
|
sequence_number=sequence_number.get_and_increment(),
|
|
284
396
|
)
|
|
@@ -7,7 +7,8 @@ from typing import TYPE_CHECKING, Any, Literal, cast, overload
|
|
|
7
7
|
|
|
8
8
|
from openai import NOT_GIVEN, AsyncOpenAI, AsyncStream
|
|
9
9
|
from openai.types import ChatModel
|
|
10
|
-
from openai.types.chat import ChatCompletion, ChatCompletionChunk
|
|
10
|
+
from openai.types.chat import ChatCompletion, ChatCompletionChunk, ChatCompletionMessage
|
|
11
|
+
from openai.types.chat.chat_completion import Choice
|
|
11
12
|
from openai.types.responses import Response
|
|
12
13
|
from openai.types.responses.response_prompt_param import ResponsePromptParam
|
|
13
14
|
from openai.types.responses.response_usage import InputTokensDetails, OutputTokensDetails
|
|
@@ -74,8 +75,11 @@ class OpenAIChatCompletionsModel(Model):
|
|
|
74
75
|
prompt=prompt,
|
|
75
76
|
)
|
|
76
77
|
|
|
77
|
-
|
|
78
|
-
|
|
78
|
+
message: ChatCompletionMessage | None = None
|
|
79
|
+
first_choice: Choice | None = None
|
|
80
|
+
if response.choices and len(response.choices) > 0:
|
|
81
|
+
first_choice = response.choices[0]
|
|
82
|
+
message = first_choice.message
|
|
79
83
|
|
|
80
84
|
if _debug.DONT_LOG_MODEL_DATA:
|
|
81
85
|
logger.debug("Received model response")
|
|
@@ -83,13 +87,11 @@ class OpenAIChatCompletionsModel(Model):
|
|
|
83
87
|
if message is not None:
|
|
84
88
|
logger.debug(
|
|
85
89
|
"LLM resp:\n%s\n",
|
|
86
|
-
json.dumps(message.model_dump(), indent=2),
|
|
90
|
+
json.dumps(message.model_dump(), indent=2, ensure_ascii=False),
|
|
87
91
|
)
|
|
88
92
|
else:
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
first_choice.finish_reason,
|
|
92
|
-
)
|
|
93
|
+
finish_reason = first_choice.finish_reason if first_choice else "-"
|
|
94
|
+
logger.debug(f"LLM resp had no message. finish_reason: {finish_reason}")
|
|
93
95
|
|
|
94
96
|
usage = (
|
|
95
97
|
Usage(
|
|
@@ -254,8 +256,8 @@ class OpenAIChatCompletionsModel(Model):
|
|
|
254
256
|
logger.debug("Calling LLM")
|
|
255
257
|
else:
|
|
256
258
|
logger.debug(
|
|
257
|
-
f"{json.dumps(converted_messages, indent=2)}\n"
|
|
258
|
-
f"Tools:\n{json.dumps(converted_tools, indent=2)}\n"
|
|
259
|
+
f"{json.dumps(converted_messages, indent=2, ensure_ascii=False)}\n"
|
|
260
|
+
f"Tools:\n{json.dumps(converted_tools, indent=2, ensure_ascii=False)}\n"
|
|
259
261
|
f"Stream: {stream}\n"
|
|
260
262
|
f"Tool choice: {tool_choice}\n"
|
|
261
263
|
f"Response format: {response_format}\n"
|
|
@@ -25,6 +25,7 @@ from ..exceptions import UserError
|
|
|
25
25
|
from ..handoffs import Handoff
|
|
26
26
|
from ..items import ItemHelpers, ModelResponse, TResponseInputItem
|
|
27
27
|
from ..logger import logger
|
|
28
|
+
from ..model_settings import MCPToolChoice
|
|
28
29
|
from ..tool import (
|
|
29
30
|
CodeInterpreterTool,
|
|
30
31
|
ComputerTool,
|
|
@@ -96,7 +97,13 @@ class OpenAIResponsesModel(Model):
|
|
|
96
97
|
else:
|
|
97
98
|
logger.debug(
|
|
98
99
|
"LLM resp:\n"
|
|
99
|
-
f"{
|
|
100
|
+
f"""{
|
|
101
|
+
json.dumps(
|
|
102
|
+
[x.model_dump() for x in response.output],
|
|
103
|
+
indent=2,
|
|
104
|
+
ensure_ascii=False,
|
|
105
|
+
)
|
|
106
|
+
}\n"""
|
|
100
107
|
)
|
|
101
108
|
|
|
102
109
|
usage = (
|
|
@@ -240,13 +247,17 @@ class OpenAIResponsesModel(Model):
|
|
|
240
247
|
converted_tools = Converter.convert_tools(tools, handoffs)
|
|
241
248
|
response_format = Converter.get_response_format(output_schema)
|
|
242
249
|
|
|
250
|
+
include: list[ResponseIncludable] = converted_tools.includes
|
|
251
|
+
if model_settings.response_include is not None:
|
|
252
|
+
include = list({*include, *model_settings.response_include})
|
|
253
|
+
|
|
243
254
|
if _debug.DONT_LOG_MODEL_DATA:
|
|
244
255
|
logger.debug("Calling LLM")
|
|
245
256
|
else:
|
|
246
257
|
logger.debug(
|
|
247
258
|
f"Calling LLM {self.model} with input:\n"
|
|
248
|
-
f"{json.dumps(list_input, indent=2)}\n"
|
|
249
|
-
f"Tools:\n{json.dumps(converted_tools.tools, indent=2)}\n"
|
|
259
|
+
f"{json.dumps(list_input, indent=2, ensure_ascii=False)}\n"
|
|
260
|
+
f"Tools:\n{json.dumps(converted_tools.tools, indent=2, ensure_ascii=False)}\n"
|
|
250
261
|
f"Stream: {stream}\n"
|
|
251
262
|
f"Tool choice: {tool_choice}\n"
|
|
252
263
|
f"Response format: {response_format}\n"
|
|
@@ -258,7 +269,7 @@ class OpenAIResponsesModel(Model):
|
|
|
258
269
|
instructions=self._non_null_or_not_given(system_instructions),
|
|
259
270
|
model=self.model,
|
|
260
271
|
input=list_input,
|
|
261
|
-
include=
|
|
272
|
+
include=include,
|
|
262
273
|
tools=converted_tools.tools,
|
|
263
274
|
prompt=self._non_null_or_not_given(prompt),
|
|
264
275
|
temperature=self._non_null_or_not_given(model_settings.temperature),
|
|
@@ -293,10 +304,16 @@ class ConvertedTools:
|
|
|
293
304
|
class Converter:
|
|
294
305
|
@classmethod
|
|
295
306
|
def convert_tool_choice(
|
|
296
|
-
cls, tool_choice: Literal["auto", "required", "none"] | str | None
|
|
307
|
+
cls, tool_choice: Literal["auto", "required", "none"] | str | MCPToolChoice | None
|
|
297
308
|
) -> response_create_params.ToolChoice | NotGiven:
|
|
298
309
|
if tool_choice is None:
|
|
299
310
|
return NOT_GIVEN
|
|
311
|
+
elif isinstance(tool_choice, MCPToolChoice):
|
|
312
|
+
return {
|
|
313
|
+
"server_label": tool_choice.server_label,
|
|
314
|
+
"type": "mcp",
|
|
315
|
+
"name": tool_choice.name,
|
|
316
|
+
}
|
|
300
317
|
elif tool_choice == "required":
|
|
301
318
|
return "required"
|
|
302
319
|
elif tool_choice == "auto":
|
|
@@ -324,9 +341,9 @@ class Converter:
|
|
|
324
341
|
"type": "code_interpreter",
|
|
325
342
|
}
|
|
326
343
|
elif tool_choice == "mcp":
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
}
|
|
344
|
+
# Note that this is still here for backwards compatibility,
|
|
345
|
+
# but migrating to MCPToolChoice is recommended.
|
|
346
|
+
return {"type": "mcp"} # type: ignore [typeddict-item]
|
|
330
347
|
else:
|
|
331
348
|
return {
|
|
332
349
|
"type": "function",
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
from .agent import RealtimeAgent, RealtimeAgentHooks, RealtimeRunHooks
|
|
2
|
+
from .config import (
|
|
3
|
+
RealtimeAudioFormat,
|
|
4
|
+
RealtimeClientMessage,
|
|
5
|
+
RealtimeGuardrailsSettings,
|
|
6
|
+
RealtimeInputAudioTranscriptionConfig,
|
|
7
|
+
RealtimeModelName,
|
|
8
|
+
RealtimeModelTracingConfig,
|
|
9
|
+
RealtimeRunConfig,
|
|
10
|
+
RealtimeSessionModelSettings,
|
|
11
|
+
RealtimeTurnDetectionConfig,
|
|
12
|
+
RealtimeUserInput,
|
|
13
|
+
RealtimeUserInputMessage,
|
|
14
|
+
RealtimeUserInputText,
|
|
15
|
+
)
|
|
16
|
+
from .events import (
|
|
17
|
+
RealtimeAgentEndEvent,
|
|
18
|
+
RealtimeAgentStartEvent,
|
|
19
|
+
RealtimeAudio,
|
|
20
|
+
RealtimeAudioEnd,
|
|
21
|
+
RealtimeAudioInterrupted,
|
|
22
|
+
RealtimeError,
|
|
23
|
+
RealtimeEventInfo,
|
|
24
|
+
RealtimeGuardrailTripped,
|
|
25
|
+
RealtimeHandoffEvent,
|
|
26
|
+
RealtimeHistoryAdded,
|
|
27
|
+
RealtimeHistoryUpdated,
|
|
28
|
+
RealtimeRawModelEvent,
|
|
29
|
+
RealtimeSessionEvent,
|
|
30
|
+
RealtimeToolEnd,
|
|
31
|
+
RealtimeToolStart,
|
|
32
|
+
)
|
|
33
|
+
from .items import (
|
|
34
|
+
AssistantMessageItem,
|
|
35
|
+
AssistantText,
|
|
36
|
+
InputAudio,
|
|
37
|
+
InputText,
|
|
38
|
+
RealtimeItem,
|
|
39
|
+
RealtimeMessageItem,
|
|
40
|
+
RealtimeResponse,
|
|
41
|
+
RealtimeToolCallItem,
|
|
42
|
+
SystemMessageItem,
|
|
43
|
+
UserMessageItem,
|
|
44
|
+
)
|
|
45
|
+
from .model import (
|
|
46
|
+
RealtimeModel,
|
|
47
|
+
RealtimeModelConfig,
|
|
48
|
+
RealtimeModelListener,
|
|
49
|
+
)
|
|
50
|
+
from .model_events import (
|
|
51
|
+
RealtimeConnectionStatus,
|
|
52
|
+
RealtimeModelAudioDoneEvent,
|
|
53
|
+
RealtimeModelAudioEvent,
|
|
54
|
+
RealtimeModelAudioInterruptedEvent,
|
|
55
|
+
RealtimeModelConnectionStatusEvent,
|
|
56
|
+
RealtimeModelErrorEvent,
|
|
57
|
+
RealtimeModelEvent,
|
|
58
|
+
RealtimeModelExceptionEvent,
|
|
59
|
+
RealtimeModelInputAudioTranscriptionCompletedEvent,
|
|
60
|
+
RealtimeModelItemDeletedEvent,
|
|
61
|
+
RealtimeModelItemUpdatedEvent,
|
|
62
|
+
RealtimeModelOtherEvent,
|
|
63
|
+
RealtimeModelToolCallEvent,
|
|
64
|
+
RealtimeModelTranscriptDeltaEvent,
|
|
65
|
+
RealtimeModelTurnEndedEvent,
|
|
66
|
+
RealtimeModelTurnStartedEvent,
|
|
67
|
+
)
|
|
68
|
+
from .model_inputs import (
|
|
69
|
+
RealtimeModelInputTextContent,
|
|
70
|
+
RealtimeModelRawClientMessage,
|
|
71
|
+
RealtimeModelSendAudio,
|
|
72
|
+
RealtimeModelSendEvent,
|
|
73
|
+
RealtimeModelSendInterrupt,
|
|
74
|
+
RealtimeModelSendRawMessage,
|
|
75
|
+
RealtimeModelSendSessionUpdate,
|
|
76
|
+
RealtimeModelSendToolOutput,
|
|
77
|
+
RealtimeModelSendUserInput,
|
|
78
|
+
RealtimeModelUserInput,
|
|
79
|
+
RealtimeModelUserInputMessage,
|
|
80
|
+
)
|
|
81
|
+
from .openai_realtime import (
|
|
82
|
+
DEFAULT_MODEL_SETTINGS,
|
|
83
|
+
OpenAIRealtimeWebSocketModel,
|
|
84
|
+
get_api_key,
|
|
85
|
+
)
|
|
86
|
+
from .runner import RealtimeRunner
|
|
87
|
+
from .session import RealtimeSession
|
|
88
|
+
|
|
89
|
+
__all__ = [
|
|
90
|
+
# Agent
|
|
91
|
+
"RealtimeAgent",
|
|
92
|
+
"RealtimeAgentHooks",
|
|
93
|
+
"RealtimeRunHooks",
|
|
94
|
+
"RealtimeRunner",
|
|
95
|
+
# Config
|
|
96
|
+
"RealtimeAudioFormat",
|
|
97
|
+
"RealtimeClientMessage",
|
|
98
|
+
"RealtimeGuardrailsSettings",
|
|
99
|
+
"RealtimeInputAudioTranscriptionConfig",
|
|
100
|
+
"RealtimeModelName",
|
|
101
|
+
"RealtimeModelTracingConfig",
|
|
102
|
+
"RealtimeRunConfig",
|
|
103
|
+
"RealtimeSessionModelSettings",
|
|
104
|
+
"RealtimeTurnDetectionConfig",
|
|
105
|
+
"RealtimeUserInput",
|
|
106
|
+
"RealtimeUserInputMessage",
|
|
107
|
+
"RealtimeUserInputText",
|
|
108
|
+
# Events
|
|
109
|
+
"RealtimeAgentEndEvent",
|
|
110
|
+
"RealtimeAgentStartEvent",
|
|
111
|
+
"RealtimeAudio",
|
|
112
|
+
"RealtimeAudioEnd",
|
|
113
|
+
"RealtimeAudioInterrupted",
|
|
114
|
+
"RealtimeError",
|
|
115
|
+
"RealtimeEventInfo",
|
|
116
|
+
"RealtimeGuardrailTripped",
|
|
117
|
+
"RealtimeHandoffEvent",
|
|
118
|
+
"RealtimeHistoryAdded",
|
|
119
|
+
"RealtimeHistoryUpdated",
|
|
120
|
+
"RealtimeRawModelEvent",
|
|
121
|
+
"RealtimeSessionEvent",
|
|
122
|
+
"RealtimeToolEnd",
|
|
123
|
+
"RealtimeToolStart",
|
|
124
|
+
# Items
|
|
125
|
+
"AssistantMessageItem",
|
|
126
|
+
"AssistantText",
|
|
127
|
+
"InputAudio",
|
|
128
|
+
"InputText",
|
|
129
|
+
"RealtimeItem",
|
|
130
|
+
"RealtimeMessageItem",
|
|
131
|
+
"RealtimeResponse",
|
|
132
|
+
"RealtimeToolCallItem",
|
|
133
|
+
"SystemMessageItem",
|
|
134
|
+
"UserMessageItem",
|
|
135
|
+
# Model
|
|
136
|
+
"RealtimeModel",
|
|
137
|
+
"RealtimeModelConfig",
|
|
138
|
+
"RealtimeModelListener",
|
|
139
|
+
# Model Events
|
|
140
|
+
"RealtimeConnectionStatus",
|
|
141
|
+
"RealtimeModelAudioDoneEvent",
|
|
142
|
+
"RealtimeModelAudioEvent",
|
|
143
|
+
"RealtimeModelAudioInterruptedEvent",
|
|
144
|
+
"RealtimeModelConnectionStatusEvent",
|
|
145
|
+
"RealtimeModelErrorEvent",
|
|
146
|
+
"RealtimeModelEvent",
|
|
147
|
+
"RealtimeModelExceptionEvent",
|
|
148
|
+
"RealtimeModelInputAudioTranscriptionCompletedEvent",
|
|
149
|
+
"RealtimeModelItemDeletedEvent",
|
|
150
|
+
"RealtimeModelItemUpdatedEvent",
|
|
151
|
+
"RealtimeModelOtherEvent",
|
|
152
|
+
"RealtimeModelToolCallEvent",
|
|
153
|
+
"RealtimeModelTranscriptDeltaEvent",
|
|
154
|
+
"RealtimeModelTurnEndedEvent",
|
|
155
|
+
"RealtimeModelTurnStartedEvent",
|
|
156
|
+
# Model Inputs
|
|
157
|
+
"RealtimeModelInputTextContent",
|
|
158
|
+
"RealtimeModelRawClientMessage",
|
|
159
|
+
"RealtimeModelSendAudio",
|
|
160
|
+
"RealtimeModelSendEvent",
|
|
161
|
+
"RealtimeModelSendInterrupt",
|
|
162
|
+
"RealtimeModelSendRawMessage",
|
|
163
|
+
"RealtimeModelSendSessionUpdate",
|
|
164
|
+
"RealtimeModelSendToolOutput",
|
|
165
|
+
"RealtimeModelSendUserInput",
|
|
166
|
+
"RealtimeModelUserInput",
|
|
167
|
+
"RealtimeModelUserInputMessage",
|
|
168
|
+
# OpenAI Realtime
|
|
169
|
+
"DEFAULT_MODEL_SETTINGS",
|
|
170
|
+
"OpenAIRealtimeWebSocketModel",
|
|
171
|
+
"get_api_key",
|
|
172
|
+
# Session
|
|
173
|
+
"RealtimeSession",
|
|
174
|
+
]
|
agents/realtime/agent.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import dataclasses
|
|
4
|
+
import inspect
|
|
5
|
+
from collections.abc import Awaitable
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from typing import Any, Callable, Generic, cast
|
|
8
|
+
|
|
9
|
+
from ..agent import AgentBase
|
|
10
|
+
from ..lifecycle import AgentHooksBase, RunHooksBase
|
|
11
|
+
from ..logger import logger
|
|
12
|
+
from ..run_context import RunContextWrapper, TContext
|
|
13
|
+
from ..util._types import MaybeAwaitable
|
|
14
|
+
|
|
15
|
+
RealtimeAgentHooks = AgentHooksBase[TContext, "RealtimeAgent[TContext]"]
|
|
16
|
+
"""Agent hooks for `RealtimeAgent`s."""
|
|
17
|
+
|
|
18
|
+
RealtimeRunHooks = RunHooksBase[TContext, "RealtimeAgent[TContext]"]
|
|
19
|
+
"""Run hooks for `RealtimeAgent`s."""
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class RealtimeAgent(AgentBase, Generic[TContext]):
|
|
24
|
+
"""A specialized agent instance that is meant to be used within a `RealtimeSession` to build
|
|
25
|
+
voice agents. Due to the nature of this agent, some configuration options are not supported
|
|
26
|
+
that are supported by regular `Agent` instances. For example:
|
|
27
|
+
- `model` choice is not supported, as all RealtimeAgents will be handled by the same model
|
|
28
|
+
within a `RealtimeSession`.
|
|
29
|
+
- `modelSettings` is not supported, as all RealtimeAgents will be handled by the same model
|
|
30
|
+
within a `RealtimeSession`.
|
|
31
|
+
- `outputType` is not supported, as RealtimeAgents do not support structured outputs.
|
|
32
|
+
- `toolUseBehavior` is not supported, as all RealtimeAgents will be handled by the same model
|
|
33
|
+
within a `RealtimeSession`.
|
|
34
|
+
- `voice` can be configured on an `Agent` level; however, it cannot be changed after the first
|
|
35
|
+
agent within a `RealtimeSession` has spoken.
|
|
36
|
+
|
|
37
|
+
See `AgentBase` for base parameters that are shared with `Agent`s.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
instructions: (
|
|
41
|
+
str
|
|
42
|
+
| Callable[
|
|
43
|
+
[RunContextWrapper[TContext], RealtimeAgent[TContext]],
|
|
44
|
+
MaybeAwaitable[str],
|
|
45
|
+
]
|
|
46
|
+
| None
|
|
47
|
+
) = None
|
|
48
|
+
"""The instructions for the agent. Will be used as the "system prompt" when this agent is
|
|
49
|
+
invoked. Describes what the agent should do, and how it responds.
|
|
50
|
+
|
|
51
|
+
Can either be a string, or a function that dynamically generates instructions for the agent. If
|
|
52
|
+
you provide a function, it will be called with the context and the agent instance. It must
|
|
53
|
+
return a string.
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
hooks: RealtimeAgentHooks | None = None
|
|
57
|
+
"""A class that receives callbacks on various lifecycle events for this agent.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def clone(self, **kwargs: Any) -> RealtimeAgent[TContext]:
|
|
61
|
+
"""Make a copy of the agent, with the given arguments changed. For example, you could do:
|
|
62
|
+
```
|
|
63
|
+
new_agent = agent.clone(instructions="New instructions")
|
|
64
|
+
```
|
|
65
|
+
"""
|
|
66
|
+
return dataclasses.replace(self, **kwargs)
|
|
67
|
+
|
|
68
|
+
async def get_system_prompt(self, run_context: RunContextWrapper[TContext]) -> str | None:
|
|
69
|
+
"""Get the system prompt for the agent."""
|
|
70
|
+
if isinstance(self.instructions, str):
|
|
71
|
+
return self.instructions
|
|
72
|
+
elif callable(self.instructions):
|
|
73
|
+
if inspect.iscoroutinefunction(self.instructions):
|
|
74
|
+
return await cast(Awaitable[str], self.instructions(run_context, self))
|
|
75
|
+
else:
|
|
76
|
+
return cast(str, self.instructions(run_context, self))
|
|
77
|
+
elif self.instructions is not None:
|
|
78
|
+
logger.error(f"Instructions must be a string or a function, got {self.instructions}")
|
|
79
|
+
|
|
80
|
+
return None
|