@openai/agents-realtime 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +13 -0
- package/dist/bundle/openai-realtime-agents.mjs +8777 -0
- package/dist/bundle/openai-realtime-agents.umd.js +11 -0
- package/dist/clientMessages.d.ts +40 -0
- package/dist/clientMessages.js +2 -0
- package/dist/clientMessages.js.map +1 -0
- package/dist/clientMessages.mjs +2 -0
- package/dist/clientMessages.mjs.map +1 -0
- package/dist/guardrail.d.ts +32 -0
- package/dist/guardrail.js +34 -0
- package/dist/guardrail.js.map +1 -0
- package/dist/guardrail.mjs +34 -0
- package/dist/guardrail.mjs.map +1 -0
- package/dist/index.d.ts +18 -0
- package/dist/index.js +13 -0
- package/dist/index.js.map +1 -0
- package/dist/index.mjs +13 -0
- package/dist/index.mjs.map +1 -0
- package/dist/items.d.ts +183 -0
- package/dist/items.js +47 -0
- package/dist/items.js.map +1 -0
- package/dist/items.mjs +47 -0
- package/dist/items.mjs.map +1 -0
- package/dist/logger.d.ts +2 -0
- package/dist/logger.js +4 -0
- package/dist/logger.js.map +1 -0
- package/dist/logger.mjs +4 -0
- package/dist/logger.mjs.map +1 -0
- package/dist/metadata.d.ts +9 -0
- package/dist/metadata.js +11 -0
- package/dist/metadata.js.map +1 -0
- package/dist/metadata.mjs +11 -0
- package/dist/metadata.mjs.map +1 -0
- package/dist/openaiRealtimeBase.d.ts +143 -0
- package/dist/openaiRealtimeBase.js +449 -0
- package/dist/openaiRealtimeBase.js.map +1 -0
- package/dist/openaiRealtimeBase.mjs +449 -0
- package/dist/openaiRealtimeBase.mjs.map +1 -0
- package/dist/openaiRealtimeEvents.d.ts +3242 -0
- package/dist/openaiRealtimeEvents.js +439 -0
- package/dist/openaiRealtimeEvents.js.map +1 -0
- package/dist/openaiRealtimeEvents.mjs +439 -0
- package/dist/openaiRealtimeEvents.mjs.map +1 -0
- package/dist/openaiRealtimeWebRtc.d.ts +102 -0
- package/dist/openaiRealtimeWebRtc.js +245 -0
- package/dist/openaiRealtimeWebRtc.js.map +1 -0
- package/dist/openaiRealtimeWebRtc.mjs +245 -0
- package/dist/openaiRealtimeWebRtc.mjs.map +1 -0
- package/dist/openaiRealtimeWebsocket.d.ts +126 -0
- package/dist/openaiRealtimeWebsocket.js +293 -0
- package/dist/openaiRealtimeWebsocket.js.map +1 -0
- package/dist/openaiRealtimeWebsocket.mjs +293 -0
- package/dist/openaiRealtimeWebsocket.mjs.map +1 -0
- package/dist/realtimeAgent.d.ts +49 -0
- package/dist/realtimeAgent.js +37 -0
- package/dist/realtimeAgent.js.map +1 -0
- package/dist/realtimeAgent.mjs +37 -0
- package/dist/realtimeAgent.mjs.map +1 -0
- package/dist/realtimeSession.d.ts +210 -0
- package/dist/realtimeSession.js +469 -0
- package/dist/realtimeSession.js.map +1 -0
- package/dist/realtimeSession.mjs +469 -0
- package/dist/realtimeSession.mjs.map +1 -0
- package/dist/realtimeSessionEvents.d.ts +118 -0
- package/dist/realtimeSessionEvents.js +2 -0
- package/dist/realtimeSessionEvents.js.map +1 -0
- package/dist/realtimeSessionEvents.mjs +2 -0
- package/dist/realtimeSessionEvents.mjs.map +1 -0
- package/dist/shims/shims-browser.d.ts +9 -0
- package/dist/shims/shims-browser.js +6 -0
- package/dist/shims/shims-browser.js.map +1 -0
- package/dist/shims/shims-browser.mjs +6 -0
- package/dist/shims/shims-browser.mjs.map +1 -0
- package/dist/shims/shims-node.d.ts +2 -0
- package/dist/shims/shims-node.js +5 -0
- package/dist/shims/shims-node.js.map +1 -0
- package/dist/shims/shims-node.mjs +5 -0
- package/dist/shims/shims-node.mjs.map +1 -0
- package/dist/shims/shims.d.ts +1 -0
- package/dist/shims/shims.js +2 -0
- package/dist/shims/shims.js.map +1 -0
- package/dist/shims/shims.mjs +2 -0
- package/dist/shims/shims.mjs.map +1 -0
- package/dist/transportLayer.d.ts +96 -0
- package/dist/transportLayer.js +2 -0
- package/dist/transportLayer.js.map +1 -0
- package/dist/transportLayer.mjs +2 -0
- package/dist/transportLayer.mjs.map +1 -0
- package/dist/transportLayerEvents.d.ts +99 -0
- package/dist/transportLayerEvents.js +2 -0
- package/dist/transportLayerEvents.js.map +1 -0
- package/dist/transportLayerEvents.mjs +2 -0
- package/dist/transportLayerEvents.mjs.map +1 -0
- package/dist/utils.d.ts +61 -0
- package/dist/utils.js +183 -0
- package/dist/utils.js.map +1 -0
- package/dist/utils.mjs +183 -0
- package/dist/utils.mjs.map +1 -0
- package/package.json +77 -0
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
import { isBrowserEnvironment, WebSocket, } from '@openai/agents-realtime/_shims';
|
|
2
|
+
import { OpenAIRealtimeBase, } from "./openaiRealtimeBase.js";
|
|
3
|
+
import { base64ToArrayBuffer, HEADERS, WEBSOCKET_META } from "./utils.js";
|
|
4
|
+
import { UserError } from '@openai/agents-core';
|
|
5
|
+
import { parseRealtimeEvent } from "./openaiRealtimeEvents.js";
|
|
6
|
+
/**
|
|
7
|
+
* Transport layer that's handling the connection between the client and OpenAI's Realtime API
|
|
8
|
+
* via WebSockets. While this transport layer is designed to be used within a RealtimeSession, it
|
|
9
|
+
* can also be used standalone if you want to have a direct connection to the Realtime API.
|
|
10
|
+
*/
|
|
11
|
+
export class OpenAIRealtimeWebSocket extends OpenAIRealtimeBase {
|
|
12
|
+
#apiKey;
|
|
13
|
+
#url;
|
|
14
|
+
#state = {
|
|
15
|
+
status: 'disconnected',
|
|
16
|
+
websocket: undefined,
|
|
17
|
+
};
|
|
18
|
+
#useInsecureApiKey;
|
|
19
|
+
#currentItemId;
|
|
20
|
+
#currentAudioContentIndex;
|
|
21
|
+
/**
|
|
22
|
+
* Timestamp maintained by the transport layer to aid with the calculation of the elapsed time
|
|
23
|
+
* since the response started to compute the right interruption time.
|
|
24
|
+
*
|
|
25
|
+
* Mostly internal but might be used by extended transport layers for their interruption
|
|
26
|
+
* calculation.
|
|
27
|
+
*/
|
|
28
|
+
_firstAudioTimestamp;
|
|
29
|
+
_audioLengthMs = 0;
|
|
30
|
+
#ongoingResponse = false;
|
|
31
|
+
constructor(options = {}) {
|
|
32
|
+
super(options);
|
|
33
|
+
this.#url = `wss://api.openai.com/v1/realtime?model=${this.currentModel}`;
|
|
34
|
+
this.#useInsecureApiKey = options.useInsecureApiKey ?? false;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* The current status of the WebSocket connection.
|
|
38
|
+
*/
|
|
39
|
+
get status() {
|
|
40
|
+
return this.#state.status;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* The current connection state of the WebSocket connection.
|
|
44
|
+
*/
|
|
45
|
+
get connectionState() {
|
|
46
|
+
return this.#state;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Always returns `null` as the WebSocket transport layer does not handle muting. Instead,
|
|
50
|
+
* this should be handled by the client by not triggering the `sendAudio` method.
|
|
51
|
+
*/
|
|
52
|
+
get muted() {
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* The current item ID of the ongoing response.
|
|
57
|
+
*/
|
|
58
|
+
get currentItemId() {
|
|
59
|
+
return this.#currentItemId;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Triggers the `audio` event that a client might listen to to receive the audio buffer.
|
|
63
|
+
* Protected for you to be able to override and disable emitting this event in case your extended
|
|
64
|
+
* transport layer handles audio internally.
|
|
65
|
+
*
|
|
66
|
+
* @param audioEvent - The audio event to emit.
|
|
67
|
+
*/
|
|
68
|
+
_onAudio(audioEvent) {
|
|
69
|
+
this.emit('audio', audioEvent);
|
|
70
|
+
}
|
|
71
|
+
#setupWebSocket(resolve, reject, sessionConfig) {
|
|
72
|
+
if (this.#state.websocket) {
|
|
73
|
+
resolve();
|
|
74
|
+
return;
|
|
75
|
+
}
|
|
76
|
+
if (!this.#apiKey) {
|
|
77
|
+
throw new UserError('API key is not set. Please call `connect()` with an API key first.');
|
|
78
|
+
}
|
|
79
|
+
if (isBrowserEnvironment() &&
|
|
80
|
+
!this.#apiKey.startsWith('ek_') &&
|
|
81
|
+
!this.#useInsecureApiKey) {
|
|
82
|
+
throw new UserError('Using the WebSocket connection in a browser environment requires an ephemeral client key. If you have to use a regular API key, set the `useInsecureApiKey` option to true.');
|
|
83
|
+
}
|
|
84
|
+
const websocketArguments = isBrowserEnvironment()
|
|
85
|
+
? [
|
|
86
|
+
'realtime',
|
|
87
|
+
// Auth
|
|
88
|
+
'openai-insecure-api-key.' + this.#apiKey,
|
|
89
|
+
// Beta protocol, required
|
|
90
|
+
'openai-beta.realtime-v1',
|
|
91
|
+
// Version header
|
|
92
|
+
WEBSOCKET_META,
|
|
93
|
+
]
|
|
94
|
+
: {
|
|
95
|
+
headers: {
|
|
96
|
+
Authorization: `Bearer ${this.#apiKey}`,
|
|
97
|
+
'OpenAI-Beta': 'realtime=v1',
|
|
98
|
+
...HEADERS,
|
|
99
|
+
},
|
|
100
|
+
};
|
|
101
|
+
const ws = new WebSocket(this.#url, websocketArguments);
|
|
102
|
+
this.#state = {
|
|
103
|
+
status: 'connecting',
|
|
104
|
+
websocket: ws,
|
|
105
|
+
};
|
|
106
|
+
this.emit('connection_change', this.#state.status);
|
|
107
|
+
ws.addEventListener('open', () => {
|
|
108
|
+
this.#state = {
|
|
109
|
+
status: 'connected',
|
|
110
|
+
websocket: ws,
|
|
111
|
+
};
|
|
112
|
+
this.emit('connection_change', this.#state.status);
|
|
113
|
+
this._onOpen();
|
|
114
|
+
resolve();
|
|
115
|
+
});
|
|
116
|
+
ws.addEventListener('error', (error) => {
|
|
117
|
+
this._onError(error);
|
|
118
|
+
this.#state = {
|
|
119
|
+
status: 'disconnected',
|
|
120
|
+
websocket: undefined,
|
|
121
|
+
};
|
|
122
|
+
this.emit('connection_change', this.#state.status);
|
|
123
|
+
reject(error);
|
|
124
|
+
});
|
|
125
|
+
ws.addEventListener('message', (message) => {
|
|
126
|
+
this._onMessage(message);
|
|
127
|
+
const { data: parsed, isGeneric } = parseRealtimeEvent(message);
|
|
128
|
+
if (!parsed || isGeneric) {
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
if (parsed.type === 'response.audio.delta') {
|
|
132
|
+
this.#currentAudioContentIndex = parsed.content_index;
|
|
133
|
+
this.#currentItemId = parsed.item_id;
|
|
134
|
+
if (this._firstAudioTimestamp === undefined) {
|
|
135
|
+
// If the response start timestamp is not set, we set it to the current time.
|
|
136
|
+
// This is used to calculate the elapsed time for interruption.
|
|
137
|
+
this._firstAudioTimestamp = Date.now();
|
|
138
|
+
this._audioLengthMs = 0;
|
|
139
|
+
}
|
|
140
|
+
const buff = base64ToArrayBuffer(parsed.delta);
|
|
141
|
+
// calculate the audio length in milliseconds assuming 24kHz pcm16le
|
|
142
|
+
this._audioLengthMs += buff.byteLength / 24 / 2; // 24kHz * 2 bytes per sample
|
|
143
|
+
const audioEvent = {
|
|
144
|
+
type: 'audio',
|
|
145
|
+
data: buff,
|
|
146
|
+
responseId: parsed.response_id,
|
|
147
|
+
};
|
|
148
|
+
this._onAudio(audioEvent);
|
|
149
|
+
}
|
|
150
|
+
else if (parsed.type === 'input_audio_buffer.speech_started') {
|
|
151
|
+
this.interrupt();
|
|
152
|
+
}
|
|
153
|
+
else if (parsed.type === 'response.created') {
|
|
154
|
+
this.#ongoingResponse = true;
|
|
155
|
+
}
|
|
156
|
+
else if (parsed.type === 'response.done') {
|
|
157
|
+
this.#ongoingResponse = false;
|
|
158
|
+
}
|
|
159
|
+
else if (parsed.type === 'session.created') {
|
|
160
|
+
this._tracingConfig = parsed.session.tracing;
|
|
161
|
+
// Trying to turn on tracing after the session is created
|
|
162
|
+
this._updateTracingConfig(sessionConfig.tracing ?? 'auto');
|
|
163
|
+
}
|
|
164
|
+
});
|
|
165
|
+
ws.addEventListener('close', () => {
|
|
166
|
+
this.#state = {
|
|
167
|
+
status: 'disconnected',
|
|
168
|
+
websocket: undefined,
|
|
169
|
+
};
|
|
170
|
+
this.emit('connection_change', this.#state.status);
|
|
171
|
+
this._onClose();
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
async connect(options) {
|
|
175
|
+
const model = options.model ?? this.currentModel;
|
|
176
|
+
this.currentModel = model;
|
|
177
|
+
this.#apiKey = await this._getApiKey(options);
|
|
178
|
+
this.#url =
|
|
179
|
+
options.url ??
|
|
180
|
+
`wss://api.openai.com/v1/realtime?model=${this.currentModel}`;
|
|
181
|
+
const sessionConfig = {
|
|
182
|
+
...(options.initialSessionConfig || {}),
|
|
183
|
+
model: this.currentModel,
|
|
184
|
+
};
|
|
185
|
+
await new Promise((resolve, reject) => {
|
|
186
|
+
try {
|
|
187
|
+
this.#setupWebSocket(resolve, reject, sessionConfig);
|
|
188
|
+
}
|
|
189
|
+
catch (error) {
|
|
190
|
+
reject(error);
|
|
191
|
+
}
|
|
192
|
+
});
|
|
193
|
+
await this.updateSessionConfig(sessionConfig);
|
|
194
|
+
}
|
|
195
|
+
/**
|
|
196
|
+
* Send an event to the Realtime API. This will stringify the event and send it directly to the
|
|
197
|
+
* API. This can be used if you want to take control over the connection and send events manually.
|
|
198
|
+
*
|
|
199
|
+
* @param event - The event to send.
|
|
200
|
+
*/
|
|
201
|
+
sendEvent(event) {
|
|
202
|
+
if (!this.#state.websocket) {
|
|
203
|
+
throw new Error('WebSocket is not connected. Make sure you call `connect()` before sending events.');
|
|
204
|
+
}
|
|
205
|
+
this.#state.websocket.send(JSON.stringify(event));
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
* Close the WebSocket connection.
|
|
209
|
+
*
|
|
210
|
+
* This will also reset any internal connection tracking used for interruption handling.
|
|
211
|
+
*/
|
|
212
|
+
close() {
|
|
213
|
+
this.#state.websocket?.close();
|
|
214
|
+
this.#currentItemId = undefined;
|
|
215
|
+
this._firstAudioTimestamp = undefined;
|
|
216
|
+
this._audioLengthMs = 0;
|
|
217
|
+
this.#currentAudioContentIndex = undefined;
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Will throw an error as the WebSocket transport layer does not support muting.
|
|
221
|
+
*/
|
|
222
|
+
mute(_muted) {
|
|
223
|
+
throw new Error('Mute is not supported for the WebSocket transport. You have to mute the audio input yourself.');
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Send an audio buffer to the Realtime API. This is used for your client to send audio to the
|
|
227
|
+
* model to respond.
|
|
228
|
+
*
|
|
229
|
+
* @param audio - The audio buffer to send.
|
|
230
|
+
* @param options - The options for the audio buffer.
|
|
231
|
+
*/
|
|
232
|
+
sendAudio(audio, options = {}) {
|
|
233
|
+
if (this.#state.status === 'connected') {
|
|
234
|
+
super.sendAudio(audio, options);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* Send a cancel response event to the Realtime API. This is used to cancel an ongoing
|
|
239
|
+
* response that the model is currently generating.
|
|
240
|
+
*/
|
|
241
|
+
_cancelResponse() {
|
|
242
|
+
// cancel the ongoing response
|
|
243
|
+
if (this.#ongoingResponse) {
|
|
244
|
+
this.sendEvent({
|
|
245
|
+
type: 'response.cancel',
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
/**
|
|
250
|
+
* Do NOT call this method directly. Call `interrupt()` instead for proper interruption handling.
|
|
251
|
+
*
|
|
252
|
+
* This method is used to send the right events to the API to inform the model that the user has
|
|
253
|
+
* interrupted the response. It might be overridden/extended by an extended transport layer. See
|
|
254
|
+
* the `TwilioRealtimeTransportLayer` for an example.
|
|
255
|
+
*
|
|
256
|
+
* @param elapsedTime - The elapsed time since the response started.
|
|
257
|
+
*/
|
|
258
|
+
_interrupt(elapsedTime) {
|
|
259
|
+
// immediately emit this event so the client can stop playing audio
|
|
260
|
+
this.emit('audio_interrupted');
|
|
261
|
+
this.sendEvent({
|
|
262
|
+
type: 'conversation.item.truncate',
|
|
263
|
+
item_id: this.#currentItemId,
|
|
264
|
+
content_index: this.#currentAudioContentIndex,
|
|
265
|
+
audio_end_ms: elapsedTime,
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
/**
|
|
269
|
+
* Interrupt the ongoing response. This method is triggered automatically by the client when
|
|
270
|
+
* voice activity detection (VAD) is enabled (default) as well as when an output guardrail got
|
|
271
|
+
* triggered.
|
|
272
|
+
*
|
|
273
|
+
* You can also call this method directly if you want to interrupt the conversation for example
|
|
274
|
+
* based on an event in the client.
|
|
275
|
+
*/
|
|
276
|
+
interrupt() {
|
|
277
|
+
if (!this.#currentItemId || typeof this._firstAudioTimestamp !== 'number') {
|
|
278
|
+
return;
|
|
279
|
+
}
|
|
280
|
+
this._cancelResponse();
|
|
281
|
+
const elapsedTime = Date.now() - this._firstAudioTimestamp;
|
|
282
|
+
console.log(`Interrupting response after ${elapsedTime}ms`);
|
|
283
|
+
console.log(`Audio length: ${this._audioLengthMs}ms`);
|
|
284
|
+
if (elapsedTime >= 0 && elapsedTime < this._audioLengthMs) {
|
|
285
|
+
this._interrupt(elapsedTime);
|
|
286
|
+
}
|
|
287
|
+
this.#currentItemId = undefined;
|
|
288
|
+
this._firstAudioTimestamp = undefined;
|
|
289
|
+
this._audioLengthMs = 0;
|
|
290
|
+
this.#currentAudioContentIndex = undefined;
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
//# sourceMappingURL=openaiRealtimeWebsocket.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openaiRealtimeWebsocket.js","sourceRoot":"","sources":["../src/openaiRealtimeWebsocket.ts"],"names":[],"mappings":"OAAO,EACL,oBAAoB,EACpB,SAAS,GACV,MAAM,gCAAgC;OAOhC,EACL,kBAAkB,GAEnB;OACM,EAAE,mBAAmB,EAAE,OAAO,EAAE,cAAc,EAAE;OAChD,EAAE,SAAS,EAAE,MAAM,qBAAqB;OAExC,EAAE,kBAAkB,EAAE;AAiC7B;;;;GAIG;AACH,MAAM,OAAO,uBACX,SAAQ,kBAAkB;IAG1B,OAAO,CAAqB;IAC5B,IAAI,CAAS;IACb,MAAM,GAAmB;QACvB,MAAM,EAAE,cAAc;QACtB,SAAS,EAAE,SAAS;KACrB,CAAC;IACF,kBAAkB,CAAU;IAC5B,cAAc,CAAqB;IACnC,yBAAyB,CAAqB;IAC9C;;;;;;OAMG;IACO,oBAAoB,CAAqB;IACzC,cAAc,GAAW,CAAC,CAAC;IACrC,gBAAgB,GAAY,KAAK,CAAC;IAElC,YAAY,UAA0C,EAAE;QACtD,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,IAAI,GAAG,0CAA0C,IAAI,CAAC,YAAY,EAAE,CAAC;QAC1E,IAAI,CAAC,kBAAkB,GAAG,OAAO,CAAC,iBAAiB,IAAI,KAAK,CAAC;IAC/D,CAAC;IAED;;OAEG;IACH,IAAI,MAAM;QACR,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,IAAI,eAAe;QACjB,OAAO,IAAI,CAAC,MAAM,CAAC;IACrB,CAAC;IAED;;;OAGG;IACH,IAAI,KAAK;QACP,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACH,IAAc,aAAa;QACzB,OAAO,IAAI,CAAC,cAAc,CAAC;IAC7B,CAAC;IAED;;;;;;OAMG;IACO,QAAQ,CAAC,UAA+B;QAChD,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC;IACjC,CAAC;IAED,eAAe,CACb,OAA8B,EAC9B,MAA8B,EAC9B,aAA6C;QAE7C,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;YAC1B,OAAO,EAAE,CAAC;YACV,OAAO;QACT,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YAClB,MAAM,IAAI,SAAS,CACjB,oEAAoE,CACrE,CAAC;QACJ,CAAC;QAED,IACE,oBAAoB,EAAE;YACtB,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC;YAC/B,CAAC,IAAI,CAAC,kBAAkB,EACxB,CAAC;YACD,MAAM,IAAI,SAAS,CACjB,6KAA6K,CAC9K,CAAC;QACJ,CAAC;QAED,MAAM,kBAAkB,GAAG,oBAAoB,EAAE;YAC/C,CAAC,CAAC;gBACE,UAAU;gBACV,OAAO;gBACP,0BAA0B,GAAG,IAAI,CAAC,OAAO;gBACzC,0BAA0B;gBAC1B,yBAAyB;gBACzB,iBAAiB;gBACjB,cAAc;aACf;YACH,CAAC,CAAC;gBACE,OAAO,EAAE;oBACP,aAAa,EAAE,UAAU,IAAI,CAAC,OAAO,EAAE;oBACvC,aAAa,EAAE,aAAa;oBAC5B,GAAG,OAAO;iBACX;aACF,CAAC;QAEN,MAAM,EAAE,GAAG,IAAI,SAAS,CAAC,IAAI,CAAC,IAAI,EAAE,kBAAyB,CAAC,CAAC;QAC/D,IAAI,CAAC,MAAM,GAAG;YACZ,MAAM,EAAE,YAAY;YACpB,SAAS,EAAE,EAAE;SACd,CAAC;QACF,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAEnD,EAAE,CAAC,gBAAgB,CAAC,MAAM,EAAE,GAAG,EAAE;YAC/B,IAAI,CAAC,MAAM,GAAG;gBACZ,MAAM,EAAE,WAAW;gBACnB,SAAS,EAAE,EAAE;aACd,CAAC;YACF,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACnD,IAAI,CAAC,OAAO,EAAE,CAAC;YACf,OAAO,EAAE,CAAC;QACZ,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gBAAgB,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE;YACrC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;YACrB,IAAI,CAAC,MAAM,GAAG;gBACZ,MAAM,EAAE,cAAc;gBACtB,SAAS,EAAE,SAAS;aACrB,CAAC;YACF,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACnD,MAAM,CAAC,KAAK,CAAC,CAAC;QAChB,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gBAAgB,CAAC,SAAS,EAAE,CAAC,OAAO,EAAE,EAAE;YACzC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;YACzB,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAC;YAChE,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;gBACzB,OAAO;YACT,CAAC;YAED,IAAI,MAAM,CAAC,IAAI,KAAK,sBAAsB,EAAE,CAAC;gBAC3C,IAAI,CAAC,yBAAyB,GAAG,MAAM,CAAC,aAAa,CAAC;gBACtD,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,OAAO,CAAC;gBACrC,IAAI,IAAI,CAAC,oBAAoB,KAAK,SAAS,EAAE,CAAC;oBAC5C,6EAA6E;oBAC7E,+DAA+D;oBAC/D,IAAI,CAAC,oBAAoB,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;oBACvC,IAAI,CAAC,cAAc,GAAG,CAAC,CAAC;gBAC1B,CAAC;gBAED,MAAM,IAAI,GAAG,mBAAmB,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAC/C,oEAAoE;gBACpE,IAAI,CAAC,cAAc,IAAI,IAAI,CAAC,UAAU,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,6BAA6B;gBAE9E,MAAM,UAAU,GAAwB;oBACtC,IAAI,EAAE,OAAO;oBACb,IAAI,EAAE,IAAI;oBACV,UAAU,EAAE,MAAM,CAAC,WAAW;iBAC/B,CAAC;gBACF,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;YAC5B,CAAC;iBAAM,IAAI,MAAM,CAAC,IAAI,KAAK,mCAAmC,EAAE,CAAC;gBAC/D,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,CAAC;iBAAM,IAAI,MAAM,CAAC,IAAI,KAAK,kBAAkB,EAAE,CAAC;gBAC9C,IAAI,CAAC,gBAAgB,GAAG,IAAI,CAAC;YAC/B,CAAC;iBAAM,IAAI,MAAM,CAAC,IAAI,KAAK,eAAe,EAAE,CAAC;gBAC3C,IAAI,CAAC,gBAAgB,GAAG,KAAK,CAAC;YAChC,CAAC;iBAAM,IAAI,MAAM,CAAC,IAAI,KAAK,iBAAiB,EAAE,CAAC;gBAC7C,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC;gBAC7C,yDAAyD;gBACzD,IAAI,CAAC,oBAAoB,CAAC,aAAa,CAAC,OAAO,IAAI,MAAM,CAAC,CAAC;YAC7D,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gBAAgB,CAAC,OAAO,EAAE,GAAG,EAAE;YAChC,IAAI,CAAC,MAAM,GAAG;gBACZ,MAAM,EAAE,cAAc;gBACtB,SAAS,EAAE,SAAS;aACrB,CAAC;YACF,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACnD,IAAI,CAAC,QAAQ,EAAE,CAAC;QAClB,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,OAAO,CAAC,OAA6C;QACzD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,IAAI,CAAC,YAAY,CAAC;QACjD,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC;QAC1B,IAAI,CAAC,OAAO,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;QAC9C,IAAI,CAAC,IAAI;YACP,OAAO,CAAC,GAAG;gBACX,0CAA0C,IAAI,CAAC,YAAY,EAAE,CAAC;QAEhE,MAAM,aAAa,GAAmC;YACpD,GAAG,CAAC,OAAO,CAAC,oBAAoB,IAAI,EAAE,CAAC;YACvC,KAAK,EAAE,IAAI,CAAC,YAAY;SACzB,CAAC;QAEF,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YAC1C,IAAI,CAAC;gBACH,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,MAAM,EAAE,aAAa,CAAC,CAAC;YACvD,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,CAAC,KAAK,CAAC,CAAC;YAChB,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,MAAM,IAAI,CAAC,mBAAmB,CAAC,aAAa,CAAC,CAAC;IAChD,CAAC;IAED;;;;;OAKG;IACH,SAAS,CAAC,KAA4B;QACpC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;YAC3B,MAAM,IAAI,KAAK,CACb,mFAAmF,CACpF,CAAC;QACJ,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC;IACpD,CAAC;IAED;;;;OAIG;IACH,KAAK;QACH,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,KAAK,EAAE,CAAC;QAC/B,IAAI,CAAC,cAAc,GAAG,SAAS,CAAC;QAChC,IAAI,CAAC,oBAAoB,GAAG,SAAS,CAAC;QACtC,IAAI,CAAC,cAAc,GAAG,CAAC,CAAC;QACxB,IAAI,CAAC,yBAAyB,GAAG,SAAS,CAAC;IAC7C,CAAC;IAED;;OAEG;IACH,IAAI,CAAC,MAAe;QAClB,MAAM,IAAI,KAAK,CACb,+FAA+F,CAChG,CAAC;IACJ,CAAC;IAED;;;;;;OAMG;IACH,SAAS,CAAC,KAAkB,EAAE,UAAgC,EAAE;QAC9D,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;YACvC,KAAK,CAAC,SAAS,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IAED;;;OAGG;IACH,eAAe;QACb,8BAA8B;QAC9B,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC1B,IAAI,CAAC,SAAS,CAAC;gBACb,IAAI,EAAE,iBAAiB;aACxB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED;;;;;;;;OAQG;IACH,UAAU,CAAC,WAAmB;QAC5B,mEAAmE;QACnE,IAAI,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QAC/B,IAAI,CAAC,SAAS,CAAC;YACb,IAAI,EAAE,4BAA4B;YAClC,OAAO,EAAE,IAAI,CAAC,cAAc;YAC5B,aAAa,EAAE,IAAI,CAAC,yBAAyB;YAC7C,YAAY,EAAE,WAAW;SAC1B,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;OAOG;IACH,SAAS;QACP,IAAI,CAAC,IAAI,CAAC,cAAc,IAAI,OAAO,IAAI,CAAC,oBAAoB,KAAK,QAAQ,EAAE,CAAC;YAC1E,OAAO;QACT,CAAC;QAED,IAAI,CAAC,eAAe,EAAE,CAAC;QAEvB,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,oBAAoB,CAAC;QAC3D,OAAO,CAAC,GAAG,CAAC,+BAA+B,WAAW,IAAI,CAAC,CAAC;QAC5D,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,CAAC,cAAc,IAAI,CAAC,CAAC;QACtD,IAAI,WAAW,IAAI,CAAC,IAAI,WAAW,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;YAC1D,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC;QAC/B,CAAC;QAED,IAAI,CAAC,cAAc,GAAG,SAAS,CAAC;QAChC,IAAI,CAAC,oBAAoB,GAAG,SAAS,CAAC;QACtC,IAAI,CAAC,cAAc,GAAG,CAAC,CAAC;QACxB,IAAI,CAAC,yBAAyB,GAAG,SAAS,CAAC;IAC7C,CAAC;CACF"}
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
import { isBrowserEnvironment, WebSocket, } from '@openai/agents-realtime/_shims';
|
|
2
|
+
import { OpenAIRealtimeBase, } from "./openaiRealtimeBase.mjs";
|
|
3
|
+
import { base64ToArrayBuffer, HEADERS, WEBSOCKET_META } from "./utils.mjs";
|
|
4
|
+
import { UserError } from '@openai/agents-core';
|
|
5
|
+
import { parseRealtimeEvent } from "./openaiRealtimeEvents.mjs";
|
|
6
|
+
/**
|
|
7
|
+
* Transport layer that's handling the connection between the client and OpenAI's Realtime API
|
|
8
|
+
* via WebSockets. While this transport layer is designed to be used within a RealtimeSession, it
|
|
9
|
+
* can also be used standalone if you want to have a direct connection to the Realtime API.
|
|
10
|
+
*/
|
|
11
|
+
export class OpenAIRealtimeWebSocket extends OpenAIRealtimeBase {
|
|
12
|
+
#apiKey;
|
|
13
|
+
#url;
|
|
14
|
+
#state = {
|
|
15
|
+
status: 'disconnected',
|
|
16
|
+
websocket: undefined,
|
|
17
|
+
};
|
|
18
|
+
#useInsecureApiKey;
|
|
19
|
+
#currentItemId;
|
|
20
|
+
#currentAudioContentIndex;
|
|
21
|
+
/**
|
|
22
|
+
* Timestamp maintained by the transport layer to aid with the calculation of the elapsed time
|
|
23
|
+
* since the response started to compute the right interruption time.
|
|
24
|
+
*
|
|
25
|
+
* Mostly internal but might be used by extended transport layers for their interruption
|
|
26
|
+
* calculation.
|
|
27
|
+
*/
|
|
28
|
+
_firstAudioTimestamp;
|
|
29
|
+
_audioLengthMs = 0;
|
|
30
|
+
#ongoingResponse = false;
|
|
31
|
+
constructor(options = {}) {
|
|
32
|
+
super(options);
|
|
33
|
+
this.#url = `wss://api.openai.com/v1/realtime?model=${this.currentModel}`;
|
|
34
|
+
this.#useInsecureApiKey = options.useInsecureApiKey ?? false;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* The current status of the WebSocket connection.
|
|
38
|
+
*/
|
|
39
|
+
get status() {
|
|
40
|
+
return this.#state.status;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* The current connection state of the WebSocket connection.
|
|
44
|
+
*/
|
|
45
|
+
get connectionState() {
|
|
46
|
+
return this.#state;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Always returns `null` as the WebSocket transport layer does not handle muting. Instead,
|
|
50
|
+
* this should be handled by the client by not triggering the `sendAudio` method.
|
|
51
|
+
*/
|
|
52
|
+
get muted() {
|
|
53
|
+
return null;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* The current item ID of the ongoing response.
|
|
57
|
+
*/
|
|
58
|
+
get currentItemId() {
|
|
59
|
+
return this.#currentItemId;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Triggers the `audio` event that a client might listen to to receive the audio buffer.
|
|
63
|
+
* Protected for you to be able to override and disable emitting this event in case your extended
|
|
64
|
+
* transport layer handles audio internally.
|
|
65
|
+
*
|
|
66
|
+
* @param audioEvent - The audio event to emit.
|
|
67
|
+
*/
|
|
68
|
+
_onAudio(audioEvent) {
|
|
69
|
+
this.emit('audio', audioEvent);
|
|
70
|
+
}
|
|
71
|
+
#setupWebSocket(resolve, reject, sessionConfig) {
|
|
72
|
+
if (this.#state.websocket) {
|
|
73
|
+
resolve();
|
|
74
|
+
return;
|
|
75
|
+
}
|
|
76
|
+
if (!this.#apiKey) {
|
|
77
|
+
throw new UserError('API key is not set. Please call `connect()` with an API key first.');
|
|
78
|
+
}
|
|
79
|
+
if (isBrowserEnvironment() &&
|
|
80
|
+
!this.#apiKey.startsWith('ek_') &&
|
|
81
|
+
!this.#useInsecureApiKey) {
|
|
82
|
+
throw new UserError('Using the WebSocket connection in a browser environment requires an ephemeral client key. If you have to use a regular API key, set the `useInsecureApiKey` option to true.');
|
|
83
|
+
}
|
|
84
|
+
const websocketArguments = isBrowserEnvironment()
|
|
85
|
+
? [
|
|
86
|
+
'realtime',
|
|
87
|
+
// Auth
|
|
88
|
+
'openai-insecure-api-key.' + this.#apiKey,
|
|
89
|
+
// Beta protocol, required
|
|
90
|
+
'openai-beta.realtime-v1',
|
|
91
|
+
// Version header
|
|
92
|
+
WEBSOCKET_META,
|
|
93
|
+
]
|
|
94
|
+
: {
|
|
95
|
+
headers: {
|
|
96
|
+
Authorization: `Bearer ${this.#apiKey}`,
|
|
97
|
+
'OpenAI-Beta': 'realtime=v1',
|
|
98
|
+
...HEADERS,
|
|
99
|
+
},
|
|
100
|
+
};
|
|
101
|
+
const ws = new WebSocket(this.#url, websocketArguments);
|
|
102
|
+
this.#state = {
|
|
103
|
+
status: 'connecting',
|
|
104
|
+
websocket: ws,
|
|
105
|
+
};
|
|
106
|
+
this.emit('connection_change', this.#state.status);
|
|
107
|
+
ws.addEventListener('open', () => {
|
|
108
|
+
this.#state = {
|
|
109
|
+
status: 'connected',
|
|
110
|
+
websocket: ws,
|
|
111
|
+
};
|
|
112
|
+
this.emit('connection_change', this.#state.status);
|
|
113
|
+
this._onOpen();
|
|
114
|
+
resolve();
|
|
115
|
+
});
|
|
116
|
+
ws.addEventListener('error', (error) => {
|
|
117
|
+
this._onError(error);
|
|
118
|
+
this.#state = {
|
|
119
|
+
status: 'disconnected',
|
|
120
|
+
websocket: undefined,
|
|
121
|
+
};
|
|
122
|
+
this.emit('connection_change', this.#state.status);
|
|
123
|
+
reject(error);
|
|
124
|
+
});
|
|
125
|
+
ws.addEventListener('message', (message) => {
|
|
126
|
+
this._onMessage(message);
|
|
127
|
+
const { data: parsed, isGeneric } = parseRealtimeEvent(message);
|
|
128
|
+
if (!parsed || isGeneric) {
|
|
129
|
+
return;
|
|
130
|
+
}
|
|
131
|
+
if (parsed.type === 'response.audio.delta') {
|
|
132
|
+
this.#currentAudioContentIndex = parsed.content_index;
|
|
133
|
+
this.#currentItemId = parsed.item_id;
|
|
134
|
+
if (this._firstAudioTimestamp === undefined) {
|
|
135
|
+
// If the response start timestamp is not set, we set it to the current time.
|
|
136
|
+
// This is used to calculate the elapsed time for interruption.
|
|
137
|
+
this._firstAudioTimestamp = Date.now();
|
|
138
|
+
this._audioLengthMs = 0;
|
|
139
|
+
}
|
|
140
|
+
const buff = base64ToArrayBuffer(parsed.delta);
|
|
141
|
+
// calculate the audio length in milliseconds assuming 24kHz pcm16le
|
|
142
|
+
this._audioLengthMs += buff.byteLength / 24 / 2; // 24kHz * 2 bytes per sample
|
|
143
|
+
const audioEvent = {
|
|
144
|
+
type: 'audio',
|
|
145
|
+
data: buff,
|
|
146
|
+
responseId: parsed.response_id,
|
|
147
|
+
};
|
|
148
|
+
this._onAudio(audioEvent);
|
|
149
|
+
}
|
|
150
|
+
else if (parsed.type === 'input_audio_buffer.speech_started') {
|
|
151
|
+
this.interrupt();
|
|
152
|
+
}
|
|
153
|
+
else if (parsed.type === 'response.created') {
|
|
154
|
+
this.#ongoingResponse = true;
|
|
155
|
+
}
|
|
156
|
+
else if (parsed.type === 'response.done') {
|
|
157
|
+
this.#ongoingResponse = false;
|
|
158
|
+
}
|
|
159
|
+
else if (parsed.type === 'session.created') {
|
|
160
|
+
this._tracingConfig = parsed.session.tracing;
|
|
161
|
+
// Trying to turn on tracing after the session is created
|
|
162
|
+
this._updateTracingConfig(sessionConfig.tracing ?? 'auto');
|
|
163
|
+
}
|
|
164
|
+
});
|
|
165
|
+
ws.addEventListener('close', () => {
|
|
166
|
+
this.#state = {
|
|
167
|
+
status: 'disconnected',
|
|
168
|
+
websocket: undefined,
|
|
169
|
+
};
|
|
170
|
+
this.emit('connection_change', this.#state.status);
|
|
171
|
+
this._onClose();
|
|
172
|
+
});
|
|
173
|
+
}
|
|
174
|
+
async connect(options) {
|
|
175
|
+
const model = options.model ?? this.currentModel;
|
|
176
|
+
this.currentModel = model;
|
|
177
|
+
this.#apiKey = await this._getApiKey(options);
|
|
178
|
+
this.#url =
|
|
179
|
+
options.url ??
|
|
180
|
+
`wss://api.openai.com/v1/realtime?model=${this.currentModel}`;
|
|
181
|
+
const sessionConfig = {
|
|
182
|
+
...(options.initialSessionConfig || {}),
|
|
183
|
+
model: this.currentModel,
|
|
184
|
+
};
|
|
185
|
+
await new Promise((resolve, reject) => {
|
|
186
|
+
try {
|
|
187
|
+
this.#setupWebSocket(resolve, reject, sessionConfig);
|
|
188
|
+
}
|
|
189
|
+
catch (error) {
|
|
190
|
+
reject(error);
|
|
191
|
+
}
|
|
192
|
+
});
|
|
193
|
+
await this.updateSessionConfig(sessionConfig);
|
|
194
|
+
}
|
|
195
|
+
/**
|
|
196
|
+
* Send an event to the Realtime API. This will stringify the event and send it directly to the
|
|
197
|
+
* API. This can be used if you want to take control over the connection and send events manually.
|
|
198
|
+
*
|
|
199
|
+
* @param event - The event to send.
|
|
200
|
+
*/
|
|
201
|
+
sendEvent(event) {
|
|
202
|
+
if (!this.#state.websocket) {
|
|
203
|
+
throw new Error('WebSocket is not connected. Make sure you call `connect()` before sending events.');
|
|
204
|
+
}
|
|
205
|
+
this.#state.websocket.send(JSON.stringify(event));
|
|
206
|
+
}
|
|
207
|
+
/**
|
|
208
|
+
* Close the WebSocket connection.
|
|
209
|
+
*
|
|
210
|
+
* This will also reset any internal connection tracking used for interruption handling.
|
|
211
|
+
*/
|
|
212
|
+
close() {
|
|
213
|
+
this.#state.websocket?.close();
|
|
214
|
+
this.#currentItemId = undefined;
|
|
215
|
+
this._firstAudioTimestamp = undefined;
|
|
216
|
+
this._audioLengthMs = 0;
|
|
217
|
+
this.#currentAudioContentIndex = undefined;
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Will throw an error as the WebSocket transport layer does not support muting.
|
|
221
|
+
*/
|
|
222
|
+
mute(_muted) {
|
|
223
|
+
throw new Error('Mute is not supported for the WebSocket transport. You have to mute the audio input yourself.');
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Send an audio buffer to the Realtime API. This is used for your client to send audio to the
|
|
227
|
+
* model to respond.
|
|
228
|
+
*
|
|
229
|
+
* @param audio - The audio buffer to send.
|
|
230
|
+
* @param options - The options for the audio buffer.
|
|
231
|
+
*/
|
|
232
|
+
sendAudio(audio, options = {}) {
|
|
233
|
+
if (this.#state.status === 'connected') {
|
|
234
|
+
super.sendAudio(audio, options);
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
/**
|
|
238
|
+
* Send a cancel response event to the Realtime API. This is used to cancel an ongoing
|
|
239
|
+
* response that the model is currently generating.
|
|
240
|
+
*/
|
|
241
|
+
_cancelResponse() {
|
|
242
|
+
// cancel the ongoing response
|
|
243
|
+
if (this.#ongoingResponse) {
|
|
244
|
+
this.sendEvent({
|
|
245
|
+
type: 'response.cancel',
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
/**
|
|
250
|
+
* Do NOT call this method directly. Call `interrupt()` instead for proper interruption handling.
|
|
251
|
+
*
|
|
252
|
+
* This method is used to send the right events to the API to inform the model that the user has
|
|
253
|
+
* interrupted the response. It might be overridden/extended by an extended transport layer. See
|
|
254
|
+
* the `TwilioRealtimeTransportLayer` for an example.
|
|
255
|
+
*
|
|
256
|
+
* @param elapsedTime - The elapsed time since the response started.
|
|
257
|
+
*/
|
|
258
|
+
_interrupt(elapsedTime) {
|
|
259
|
+
// immediately emit this event so the client can stop playing audio
|
|
260
|
+
this.emit('audio_interrupted');
|
|
261
|
+
this.sendEvent({
|
|
262
|
+
type: 'conversation.item.truncate',
|
|
263
|
+
item_id: this.#currentItemId,
|
|
264
|
+
content_index: this.#currentAudioContentIndex,
|
|
265
|
+
audio_end_ms: elapsedTime,
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
/**
|
|
269
|
+
* Interrupt the ongoing response. This method is triggered automatically by the client when
|
|
270
|
+
* voice activity detection (VAD) is enabled (default) as well as when an output guardrail got
|
|
271
|
+
* triggered.
|
|
272
|
+
*
|
|
273
|
+
* You can also call this method directly if you want to interrupt the conversation for example
|
|
274
|
+
* based on an event in the client.
|
|
275
|
+
*/
|
|
276
|
+
interrupt() {
|
|
277
|
+
if (!this.#currentItemId || typeof this._firstAudioTimestamp !== 'number') {
|
|
278
|
+
return;
|
|
279
|
+
}
|
|
280
|
+
this._cancelResponse();
|
|
281
|
+
const elapsedTime = Date.now() - this._firstAudioTimestamp;
|
|
282
|
+
console.log(`Interrupting response after ${elapsedTime}ms`);
|
|
283
|
+
console.log(`Audio length: ${this._audioLengthMs}ms`);
|
|
284
|
+
if (elapsedTime >= 0 && elapsedTime < this._audioLengthMs) {
|
|
285
|
+
this._interrupt(elapsedTime);
|
|
286
|
+
}
|
|
287
|
+
this.#currentItemId = undefined;
|
|
288
|
+
this._firstAudioTimestamp = undefined;
|
|
289
|
+
this._audioLengthMs = 0;
|
|
290
|
+
this.#currentAudioContentIndex = undefined;
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
//# sourceMappingURL=openaiRealtimeWebsocket.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"openaiRealtimeWebsocket.mjs","sourceRoot":"","sources":["../src/openaiRealtimeWebsocket.ts"],"names":[],"mappings":"OAAO,EACL,oBAAoB,EACpB,SAAS,GACV,MAAM,gCAAgC;OAOhC,EACL,kBAAkB,GAEnB;OACM,EAAE,mBAAmB,EAAE,OAAO,EAAE,cAAc,EAAE;OAChD,EAAE,SAAS,EAAE,MAAM,qBAAqB;OAExC,EAAE,kBAAkB,EAAE;AAiC7B;;;;GAIG;AACH,MAAM,OAAO,uBACX,SAAQ,kBAAkB;IAG1B,OAAO,CAAqB;IAC5B,IAAI,CAAS;IACb,MAAM,GAAmB;QACvB,MAAM,EAAE,cAAc;QACtB,SAAS,EAAE,SAAS;KACrB,CAAC;IACF,kBAAkB,CAAU;IAC5B,cAAc,CAAqB;IACnC,yBAAyB,CAAqB;IAC9C;;;;;;OAMG;IACO,oBAAoB,CAAqB;IACzC,cAAc,GAAW,CAAC,CAAC;IACrC,gBAAgB,GAAY,KAAK,CAAC;IAElC,YAAY,UAA0C,EAAE;QACtD,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,IAAI,GAAG,0CAA0C,IAAI,CAAC,YAAY,EAAE,CAAC;QAC1E,IAAI,CAAC,kBAAkB,GAAG,OAAO,CAAC,iBAAiB,IAAI,KAAK,CAAC;IAC/D,CAAC;IAED;;OAEG;IACH,IAAI,MAAM;QACR,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,IAAI,eAAe;QACjB,OAAO,IAAI,CAAC,MAAM,CAAC;IACrB,CAAC;IAED;;;OAGG;IACH,IAAI,KAAK;QACP,OAAO,IAAI,CAAC;IACd,CAAC;IAED;;OAEG;IACH,IAAc,aAAa;QACzB,OAAO,IAAI,CAAC,cAAc,CAAC;IAC7B,CAAC;IAED;;;;;;OAMG;IACO,QAAQ,CAAC,UAA+B;QAChD,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC;IACjC,CAAC;IAED,eAAe,CACb,OAA8B,EAC9B,MAA8B,EAC9B,aAA6C;QAE7C,IAAI,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;YAC1B,OAAO,EAAE,CAAC;YACV,OAAO;QACT,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;YAClB,MAAM,IAAI,SAAS,CACjB,oEAAoE,CACrE,CAAC;QACJ,CAAC;QAED,IACE,oBAAoB,EAAE;YACtB,CAAC,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,KAAK,CAAC;YAC/B,CAAC,IAAI,CAAC,kBAAkB,EACxB,CAAC;YACD,MAAM,IAAI,SAAS,CACjB,6KAA6K,CAC9K,CAAC;QACJ,CAAC;QAED,MAAM,kBAAkB,GAAG,oBAAoB,EAAE;YAC/C,CAAC,CAAC;gBACE,UAAU;gBACV,OAAO;gBACP,0BAA0B,GAAG,IAAI,CAAC,OAAO;gBACzC,0BAA0B;gBAC1B,yBAAyB;gBACzB,iBAAiB;gBACjB,cAAc;aACf;YACH,CAAC,CAAC;gBACE,OAAO,EAAE;oBACP,aAAa,EAAE,UAAU,IAAI,CAAC,OAAO,EAAE;oBACvC,aAAa,EAAE,aAAa;oBAC5B,GAAG,OAAO;iBACX;aACF,CAAC;QAEN,MAAM,EAAE,GAAG,IAAI,SAAS,CAAC,IAAI,CAAC,IAAI,EAAE,kBAAyB,CAAC,CAAC;QAC/D,IAAI,CAAC,MAAM,GAAG;YACZ,MAAM,EAAE,YAAY;YACpB,SAAS,EAAE,EAAE;SACd,CAAC;QACF,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAEnD,EAAE,CAAC,gBAAgB,CAAC,MAAM,EAAE,GAAG,EAAE;YAC/B,IAAI,CAAC,MAAM,GAAG;gBACZ,MAAM,EAAE,WAAW;gBACnB,SAAS,EAAE,EAAE;aACd,CAAC;YACF,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACnD,IAAI,CAAC,OAAO,EAAE,CAAC;YACf,OAAO,EAAE,CAAC;QACZ,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gBAAgB,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE;YACrC,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;YACrB,IAAI,CAAC,MAAM,GAAG;gBACZ,MAAM,EAAE,cAAc;gBACtB,SAAS,EAAE,SAAS;aACrB,CAAC;YACF,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACnD,MAAM,CAAC,KAAK,CAAC,CAAC;QAChB,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gBAAgB,CAAC,SAAS,EAAE,CAAC,OAAO,EAAE,EAAE;YACzC,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;YACzB,MAAM,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,GAAG,kBAAkB,CAAC,OAAO,CAAC,CAAC;YAChE,IAAI,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;gBACzB,OAAO;YACT,CAAC;YAED,IAAI,MAAM,CAAC,IAAI,KAAK,sBAAsB,EAAE,CAAC;gBAC3C,IAAI,CAAC,yBAAyB,GAAG,MAAM,CAAC,aAAa,CAAC;gBACtD,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,OAAO,CAAC;gBACrC,IAAI,IAAI,CAAC,oBAAoB,KAAK,SAAS,EAAE,CAAC;oBAC5C,6EAA6E;oBAC7E,+DAA+D;oBAC/D,IAAI,CAAC,oBAAoB,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;oBACvC,IAAI,CAAC,cAAc,GAAG,CAAC,CAAC;gBAC1B,CAAC;gBAED,MAAM,IAAI,GAAG,mBAAmB,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAC/C,oEAAoE;gBACpE,IAAI,CAAC,cAAc,IAAI,IAAI,CAAC,UAAU,GAAG,EAAE,GAAG,CAAC,CAAC,CAAC,6BAA6B;gBAE9E,MAAM,UAAU,GAAwB;oBACtC,IAAI,EAAE,OAAO;oBACb,IAAI,EAAE,IAAI;oBACV,UAAU,EAAE,MAAM,CAAC,WAAW;iBAC/B,CAAC;gBACF,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC;YAC5B,CAAC;iBAAM,IAAI,MAAM,CAAC,IAAI,KAAK,mCAAmC,EAAE,CAAC;gBAC/D,IAAI,CAAC,SAAS,EAAE,CAAC;YACnB,CAAC;iBAAM,IAAI,MAAM,CAAC,IAAI,KAAK,kBAAkB,EAAE,CAAC;gBAC9C,IAAI,CAAC,gBAAgB,GAAG,IAAI,CAAC;YAC/B,CAAC;iBAAM,IAAI,MAAM,CAAC,IAAI,KAAK,eAAe,EAAE,CAAC;gBAC3C,IAAI,CAAC,gBAAgB,GAAG,KAAK,CAAC;YAChC,CAAC;iBAAM,IAAI,MAAM,CAAC,IAAI,KAAK,iBAAiB,EAAE,CAAC;gBAC7C,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC;gBAC7C,yDAAyD;gBACzD,IAAI,CAAC,oBAAoB,CAAC,aAAa,CAAC,OAAO,IAAI,MAAM,CAAC,CAAC;YAC7D,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,EAAE,CAAC,gBAAgB,CAAC,OAAO,EAAE,GAAG,EAAE;YAChC,IAAI,CAAC,MAAM,GAAG;gBACZ,MAAM,EAAE,cAAc;gBACtB,SAAS,EAAE,SAAS;aACrB,CAAC;YACF,IAAI,CAAC,IAAI,CAAC,mBAAmB,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YACnD,IAAI,CAAC,QAAQ,EAAE,CAAC;QAClB,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,OAAO,CAAC,OAA6C;QACzD,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,IAAI,CAAC,YAAY,CAAC;QACjD,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC;QAC1B,IAAI,CAAC,OAAO,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAC;QAC9C,IAAI,CAAC,IAAI;YACP,OAAO,CAAC,GAAG;gBACX,0CAA0C,IAAI,CAAC,YAAY,EAAE,CAAC;QAEhE,MAAM,aAAa,GAAmC;YACpD,GAAG,CAAC,OAAO,CAAC,oBAAoB,IAAI,EAAE,CAAC;YACvC,KAAK,EAAE,IAAI,CAAC,YAAY;SACzB,CAAC;QAEF,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE;YAC1C,IAAI,CAAC;gBACH,IAAI,CAAC,eAAe,CAAC,OAAO,EAAE,MAAM,EAAE,aAAa,CAAC,CAAC;YACvD,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,CAAC,KAAK,CAAC,CAAC;YAChB,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,MAAM,IAAI,CAAC,mBAAmB,CAAC,aAAa,CAAC,CAAC;IAChD,CAAC;IAED;;;;;OAKG;IACH,SAAS,CAAC,KAA4B;QACpC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC;YAC3B,MAAM,IAAI,KAAK,CACb,mFAAmF,CACpF,CAAC;QACJ,CAAC;QACD,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC;IACpD,CAAC;IAED;;;;OAIG;IACH,KAAK;QACH,IAAI,CAAC,MAAM,CAAC,SAAS,EAAE,KAAK,EAAE,CAAC;QAC/B,IAAI,CAAC,cAAc,GAAG,SAAS,CAAC;QAChC,IAAI,CAAC,oBAAoB,GAAG,SAAS,CAAC;QACtC,IAAI,CAAC,cAAc,GAAG,CAAC,CAAC;QACxB,IAAI,CAAC,yBAAyB,GAAG,SAAS,CAAC;IAC7C,CAAC;IAED;;OAEG;IACH,IAAI,CAAC,MAAe;QAClB,MAAM,IAAI,KAAK,CACb,+FAA+F,CAChG,CAAC;IACJ,CAAC;IAED;;;;;;OAMG;IACH,SAAS,CAAC,KAAkB,EAAE,UAAgC,EAAE;QAC9D,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;YACvC,KAAK,CAAC,SAAS,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QAClC,CAAC;IACH,CAAC;IAED;;;OAGG;IACH,eAAe;QACb,8BAA8B;QAC9B,IAAI,IAAI,CAAC,gBAAgB,EAAE,CAAC;YAC1B,IAAI,CAAC,SAAS,CAAC;gBACb,IAAI,EAAE,iBAAiB;aACxB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED;;;;;;;;OAQG;IACH,UAAU,CAAC,WAAmB;QAC5B,mEAAmE;QACnE,IAAI,CAAC,IAAI,CAAC,mBAAmB,CAAC,CAAC;QAC/B,IAAI,CAAC,SAAS,CAAC;YACb,IAAI,EAAE,4BAA4B;YAClC,OAAO,EAAE,IAAI,CAAC,cAAc;YAC5B,aAAa,EAAE,IAAI,CAAC,yBAAyB;YAC7C,YAAY,EAAE,WAAW;SAC1B,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;OAOG;IACH,SAAS;QACP,IAAI,CAAC,IAAI,CAAC,cAAc,IAAI,OAAO,IAAI,CAAC,oBAAoB,KAAK,QAAQ,EAAE,CAAC;YAC1E,OAAO;QACT,CAAC;QAED,IAAI,CAAC,eAAe,EAAE,CAAC;QAEvB,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,IAAI,CAAC,oBAAoB,CAAC;QAC3D,OAAO,CAAC,GAAG,CAAC,+BAA+B,WAAW,IAAI,CAAC,CAAC;QAC5D,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,CAAC,cAAc,IAAI,CAAC,CAAC;QACtD,IAAI,WAAW,IAAI,CAAC,IAAI,WAAW,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;YAC1D,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC;QAC/B,CAAC;QAED,IAAI,CAAC,cAAc,GAAG,SAAS,CAAC;QAChC,IAAI,CAAC,oBAAoB,GAAG,SAAS,CAAC;QACtC,IAAI,CAAC,cAAc,GAAG,CAAC,CAAC;QACxB,IAAI,CAAC,yBAAyB,GAAG,SAAS,CAAC;IAC7C,CAAC;CACF"}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import { Agent, type AgentConfiguration, Handoff, TextOutput, UnknownContext } from '@openai/agents-core';
|
|
2
|
+
import { RealtimeContextData } from './realtimeSession';
|
|
3
|
+
export type RealtimeAgentConfiguration<TContext = UnknownContext> = Partial<Omit<AgentConfiguration<TContext, TextOutput>, 'model' | 'handoffs' | 'modelSettings' | 'outputType' | 'toolUseBehavior' | 'resetToolChoice' | 'outputGuardrails' | 'inputGuardrails' | 'model'>> & {
|
|
4
|
+
/**
|
|
5
|
+
* The name of your realtime agent.
|
|
6
|
+
*/
|
|
7
|
+
name: string;
|
|
8
|
+
/**
|
|
9
|
+
* Any other `RealtimeAgent` instances the agent is able to hand off to.
|
|
10
|
+
*/
|
|
11
|
+
handoffs?: (RealtimeAgent | Handoff)[];
|
|
12
|
+
/**
|
|
13
|
+
* The voice intended to be used by the agent. If another agent already spoke during the
|
|
14
|
+
* RealtimeSession, changing the voice during a handoff will fail.
|
|
15
|
+
*/
|
|
16
|
+
voice?: string;
|
|
17
|
+
};
|
|
18
|
+
/**
|
|
19
|
+
* A specialized agent instance that is meant to be used within a `RealtimeSession` to build
|
|
20
|
+
* voice agents. Due to the nature of this agent, some configuration options are not supported
|
|
21
|
+
* that are supported by regular `Agent` instances. For example:
|
|
22
|
+
* - `model` choice is not supported as all RealtimeAgents will be handled by the same model within
|
|
23
|
+
* a `RealtimeSession`
|
|
24
|
+
* - `modelSettings` is not supported as all RealtimeAgents will be handled by the same model within
|
|
25
|
+
* a `RealtimeSession`
|
|
26
|
+
* - `outputType` is not supported as RealtimeAgents do not support structured outputs
|
|
27
|
+
* - `toolUseBehavior` is not supported as all RealtimeAgents will be handled by the same model within
|
|
28
|
+
* a `RealtimeSession`
|
|
29
|
+
* - `voice` can be configured on an `Agent` level however it cannot be changed after the first
|
|
30
|
+
* agent within a `RealtimeSession` spoke
|
|
31
|
+
*
|
|
32
|
+
* @example
|
|
33
|
+
* ```ts
|
|
34
|
+
* const agent = new RealtimeAgent({
|
|
35
|
+
* name: 'my-agent',
|
|
36
|
+
* instructions: 'You are a helpful assistant that can answer questions and help with tasks.',
|
|
37
|
+
* })
|
|
38
|
+
*
|
|
39
|
+
* const session = new RealtimeSession(agent);
|
|
40
|
+
* ```
|
|
41
|
+
*/
|
|
42
|
+
export declare class RealtimeAgent<TContext = UnknownContext> extends Agent<RealtimeContextData<TContext>, TextOutput> {
|
|
43
|
+
/**
|
|
44
|
+
* The voice intended to be used by the agent. If another agent already spoke during the
|
|
45
|
+
* RealtimeSession, changing the voice during a handoff will fail.
|
|
46
|
+
*/
|
|
47
|
+
readonly voice: string;
|
|
48
|
+
constructor(config: RealtimeAgentConfiguration<TContext>);
|
|
49
|
+
}
|