@livekit/agents-plugin-openai 1.0.31 → 1.0.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/realtime/api_proto.cjs.map +1 -1
- package/dist/realtime/api_proto.d.cts +50 -12
- package/dist/realtime/api_proto.d.ts +50 -12
- package/dist/realtime/api_proto.d.ts.map +1 -1
- package/dist/realtime/api_proto.js.map +1 -1
- package/dist/realtime/index.cjs +19 -0
- package/dist/realtime/index.cjs.map +1 -1
- package/dist/realtime/index.d.cts +1 -0
- package/dist/realtime/index.d.ts +1 -0
- package/dist/realtime/index.d.ts.map +1 -1
- package/dist/realtime/index.js +4 -0
- package/dist/realtime/index.js.map +1 -1
- package/dist/realtime/realtime_model.cjs +69 -33
- package/dist/realtime/realtime_model.cjs.map +1 -1
- package/dist/realtime/realtime_model.d.cts +14 -6
- package/dist/realtime/realtime_model.d.ts +14 -6
- package/dist/realtime/realtime_model.d.ts.map +1 -1
- package/dist/realtime/realtime_model.js +69 -33
- package/dist/realtime/realtime_model.js.map +1 -1
- package/dist/realtime/realtime_model_beta.cjs +1300 -0
- package/dist/realtime/realtime_model_beta.cjs.map +1 -0
- package/dist/realtime/realtime_model_beta.d.cts +165 -0
- package/dist/realtime/realtime_model_beta.d.ts +165 -0
- package/dist/realtime/realtime_model_beta.d.ts.map +1 -0
- package/dist/realtime/realtime_model_beta.js +1280 -0
- package/dist/realtime/realtime_model_beta.js.map +1 -0
- package/package.json +5 -5
- package/src/realtime/api_proto.ts +76 -17
- package/src/realtime/index.ts +1 -0
- package/src/realtime/realtime_model.ts +86 -49
- package/src/realtime/realtime_model_beta.ts +1665 -0
|
@@ -0,0 +1,1300 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __create = Object.create;
|
|
3
|
+
var __defProp = Object.defineProperty;
|
|
4
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
5
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
6
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
7
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
8
|
+
var __export = (target, all) => {
|
|
9
|
+
for (var name in all)
|
|
10
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
11
|
+
};
|
|
12
|
+
var __copyProps = (to, from, except, desc) => {
|
|
13
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
14
|
+
for (let key of __getOwnPropNames(from))
|
|
15
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
16
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
17
|
+
}
|
|
18
|
+
return to;
|
|
19
|
+
};
|
|
20
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
21
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
22
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
23
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
24
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
25
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
26
|
+
mod
|
|
27
|
+
));
|
|
28
|
+
var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
|
|
29
|
+
var realtime_model_beta_exports = {};
|
|
30
|
+
__export(realtime_model_beta_exports, {
|
|
31
|
+
RealtimeModel: () => RealtimeModel,
|
|
32
|
+
RealtimeSession: () => RealtimeSession
|
|
33
|
+
});
|
|
34
|
+
module.exports = __toCommonJS(realtime_model_beta_exports);
|
|
35
|
+
var import_agents = require("@livekit/agents");
|
|
36
|
+
var import_mutex = require("@livekit/mutex");
|
|
37
|
+
var import_rtc_node = require("@livekit/rtc-node");
|
|
38
|
+
var import_ws = require("ws");
|
|
39
|
+
var api_proto = __toESM(require("./api_proto.cjs"), 1);
|
|
40
|
+
const lkOaiDebug = process.env.LK_OPENAI_DEBUG ? Number(process.env.LK_OPENAI_DEBUG) : 0;
|
|
41
|
+
const SAMPLE_RATE = 24e3;
|
|
42
|
+
const NUM_CHANNELS = 1;
|
|
43
|
+
const BASE_URL = "https://api.openai.com/v1";
|
|
44
|
+
const MOCK_AUDIO_ID_PREFIX = "lk_mock_audio_item_";
|
|
45
|
+
class CreateResponseHandle {
|
|
46
|
+
instructions;
|
|
47
|
+
doneFut;
|
|
48
|
+
// TODO(shubhra): add timeout
|
|
49
|
+
constructor({ instructions }) {
|
|
50
|
+
this.instructions = instructions;
|
|
51
|
+
this.doneFut = new import_agents.Future();
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
const DEFAULT_FIRST_RETRY_INTERVAL_MS = 100;
|
|
55
|
+
const DEFAULT_TEMPERATURE = 0.8;
|
|
56
|
+
const DEFAULT_TURN_DETECTION = {
|
|
57
|
+
type: "semantic_vad",
|
|
58
|
+
eagerness: "medium",
|
|
59
|
+
create_response: true,
|
|
60
|
+
interrupt_response: true
|
|
61
|
+
};
|
|
62
|
+
const DEFAULT_INPUT_AUDIO_TRANSCRIPTION = {
|
|
63
|
+
model: "gpt-4o-mini-transcribe"
|
|
64
|
+
};
|
|
65
|
+
const DEFAULT_TOOL_CHOICE = "auto";
|
|
66
|
+
const DEFAULT_MAX_RESPONSE_OUTPUT_TOKENS = "inf";
|
|
67
|
+
const AZURE_DEFAULT_INPUT_AUDIO_TRANSCRIPTION = {
|
|
68
|
+
model: "whisper-1"
|
|
69
|
+
};
|
|
70
|
+
const AZURE_DEFAULT_TURN_DETECTION = {
|
|
71
|
+
type: "server_vad",
|
|
72
|
+
threshold: 0.5,
|
|
73
|
+
prefix_padding_ms: 300,
|
|
74
|
+
silence_duration_ms: 200,
|
|
75
|
+
create_response: true
|
|
76
|
+
};
|
|
77
|
+
const DEFAULT_MAX_SESSION_DURATION = 20 * 60 * 1e3;
|
|
78
|
+
const DEFAULT_REALTIME_MODEL_OPTIONS = {
|
|
79
|
+
model: "gpt-realtime",
|
|
80
|
+
voice: "marin",
|
|
81
|
+
temperature: DEFAULT_TEMPERATURE,
|
|
82
|
+
inputAudioTranscription: DEFAULT_INPUT_AUDIO_TRANSCRIPTION,
|
|
83
|
+
turnDetection: DEFAULT_TURN_DETECTION,
|
|
84
|
+
toolChoice: DEFAULT_TOOL_CHOICE,
|
|
85
|
+
maxResponseOutputTokens: DEFAULT_MAX_RESPONSE_OUTPUT_TOKENS,
|
|
86
|
+
maxSessionDuration: DEFAULT_MAX_SESSION_DURATION,
|
|
87
|
+
connOptions: import_agents.DEFAULT_API_CONNECT_OPTIONS,
|
|
88
|
+
modalities: ["text", "audio"]
|
|
89
|
+
};
|
|
90
|
+
class RealtimeModel extends import_agents.llm.RealtimeModel {
|
|
91
|
+
sampleRate = api_proto.SAMPLE_RATE;
|
|
92
|
+
numChannels = api_proto.NUM_CHANNELS;
|
|
93
|
+
inFrameSize = api_proto.IN_FRAME_SIZE;
|
|
94
|
+
outFrameSize = api_proto.OUT_FRAME_SIZE;
|
|
95
|
+
/* @internal */
|
|
96
|
+
_options;
|
|
97
|
+
get model() {
|
|
98
|
+
return this._options.model;
|
|
99
|
+
}
|
|
100
|
+
constructor(options = {}) {
|
|
101
|
+
const modalities = options.modalities || DEFAULT_REALTIME_MODEL_OPTIONS.modalities;
|
|
102
|
+
super({
|
|
103
|
+
messageTruncation: true,
|
|
104
|
+
turnDetection: options.turnDetection !== null,
|
|
105
|
+
userTranscription: options.inputAudioTranscription !== null,
|
|
106
|
+
autoToolReplyGeneration: false,
|
|
107
|
+
audioOutput: modalities.includes("audio")
|
|
108
|
+
});
|
|
109
|
+
const isAzure = !!(options.apiVersion || options.entraToken || options.azureDeployment);
|
|
110
|
+
if (options.apiKey === "" && !isAzure) {
|
|
111
|
+
throw new Error(
|
|
112
|
+
"OpenAI API key is required, either using the argument or by setting the OPENAI_API_KEY environment variable"
|
|
113
|
+
);
|
|
114
|
+
}
|
|
115
|
+
const apiKey = options.apiKey || process.env.OPENAI_API_KEY;
|
|
116
|
+
if (!apiKey && !isAzure) {
|
|
117
|
+
throw new Error(
|
|
118
|
+
"OpenAI API key is required, either using the argument or by setting the OPENAI_API_KEY environment variable"
|
|
119
|
+
);
|
|
120
|
+
}
|
|
121
|
+
if (!options.baseURL && isAzure) {
|
|
122
|
+
const azureEndpoint = process.env.AZURE_OPENAI_ENDPOINT;
|
|
123
|
+
if (!azureEndpoint) {
|
|
124
|
+
throw new Error(
|
|
125
|
+
"Missing Azure endpoint. Please pass base_url or set AZURE_OPENAI_ENDPOINT environment variable."
|
|
126
|
+
);
|
|
127
|
+
}
|
|
128
|
+
options.baseURL = `${azureEndpoint.replace(/\/$/, "")}/openai`;
|
|
129
|
+
}
|
|
130
|
+
const { modalities: _, ...optionsWithoutModalities } = options;
|
|
131
|
+
this._options = {
|
|
132
|
+
...DEFAULT_REALTIME_MODEL_OPTIONS,
|
|
133
|
+
...optionsWithoutModalities,
|
|
134
|
+
baseURL: options.baseURL || BASE_URL,
|
|
135
|
+
apiKey,
|
|
136
|
+
isAzure,
|
|
137
|
+
model: options.model || DEFAULT_REALTIME_MODEL_OPTIONS.model,
|
|
138
|
+
modalities
|
|
139
|
+
};
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Create a RealtimeModel instance configured for Azure OpenAI Service.
|
|
143
|
+
*
|
|
144
|
+
* @param azureDeployment - The name of your Azure OpenAI deployment.
|
|
145
|
+
* @param azureEndpoint - The endpoint URL for your Azure OpenAI resource. If undefined, will attempt to read from the environment variable AZURE_OPENAI_ENDPOINT.
|
|
146
|
+
* @param apiVersion - API version to use with Azure OpenAI Service. If undefined, will attempt to read from the environment variable OPENAI_API_VERSION.
|
|
147
|
+
* @param apiKey - Azure OpenAI API key. If undefined, will attempt to read from the environment variable AZURE_OPENAI_API_KEY.
|
|
148
|
+
* @param entraToken - Azure Entra authentication token. Required if not using API key authentication.
|
|
149
|
+
* @param baseURL - Base URL for the API endpoint. If undefined, constructed from the azure_endpoint.
|
|
150
|
+
* @param voice - Voice setting for audio outputs. Defaults to "alloy".
|
|
151
|
+
* @param inputAudioTranscription - Options for transcribing input audio. Defaults to @see DEFAULT_INPUT_AUDIO_TRANSCRIPTION.
|
|
152
|
+
* @param turnDetection - Options for server-based voice activity detection (VAD). Defaults to @see DEFAULT_SERVER_VAD_OPTIONS.
|
|
153
|
+
* @param temperature - Sampling temperature for response generation. Defaults to @see DEFAULT_TEMPERATURE.
|
|
154
|
+
* @param speed - Speed of the audio output. Defaults to 1.0.
|
|
155
|
+
* @param maxResponseOutputTokens - Maximum number of tokens in the response. Defaults to @see DEFAULT_MAX_RESPONSE_OUTPUT_TOKENS.
|
|
156
|
+
* @param maxSessionDuration - Maximum duration of the session in milliseconds. Defaults to @see DEFAULT_MAX_SESSION_DURATION.
|
|
157
|
+
*
|
|
158
|
+
* @returns A RealtimeModel instance configured for Azure OpenAI Service.
|
|
159
|
+
*
|
|
160
|
+
* @throws Error if required Azure parameters are missing or invalid.
|
|
161
|
+
*/
|
|
162
|
+
static withAzure({
|
|
163
|
+
azureDeployment,
|
|
164
|
+
azureEndpoint,
|
|
165
|
+
apiVersion,
|
|
166
|
+
apiKey,
|
|
167
|
+
entraToken,
|
|
168
|
+
baseURL,
|
|
169
|
+
voice = "alloy",
|
|
170
|
+
inputAudioTranscription = AZURE_DEFAULT_INPUT_AUDIO_TRANSCRIPTION,
|
|
171
|
+
turnDetection = AZURE_DEFAULT_TURN_DETECTION,
|
|
172
|
+
temperature = 0.8,
|
|
173
|
+
speed
|
|
174
|
+
}) {
|
|
175
|
+
apiKey = apiKey || process.env.AZURE_OPENAI_API_KEY;
|
|
176
|
+
if (!apiKey && !entraToken) {
|
|
177
|
+
throw new Error(
|
|
178
|
+
"Missing credentials. Please pass one of `apiKey`, `entraToken`, or the `AZURE_OPENAI_API_KEY` environment variable."
|
|
179
|
+
);
|
|
180
|
+
}
|
|
181
|
+
apiVersion = apiVersion || process.env.OPENAI_API_VERSION;
|
|
182
|
+
if (!apiVersion) {
|
|
183
|
+
throw new Error(
|
|
184
|
+
"Must provide either the `apiVersion` argument or the `OPENAI_API_VERSION` environment variable"
|
|
185
|
+
);
|
|
186
|
+
}
|
|
187
|
+
if (!baseURL) {
|
|
188
|
+
azureEndpoint = azureEndpoint || process.env.AZURE_OPENAI_ENDPOINT;
|
|
189
|
+
if (!azureEndpoint) {
|
|
190
|
+
throw new Error(
|
|
191
|
+
"Missing Azure endpoint. Please pass the `azure_endpoint` parameter or set the `AZURE_OPENAI_ENDPOINT` environment variable."
|
|
192
|
+
);
|
|
193
|
+
}
|
|
194
|
+
baseURL = `${azureEndpoint.replace(/\/$/, "")}/openai`;
|
|
195
|
+
}
|
|
196
|
+
return new RealtimeModel({
|
|
197
|
+
voice,
|
|
198
|
+
inputAudioTranscription,
|
|
199
|
+
turnDetection,
|
|
200
|
+
temperature,
|
|
201
|
+
speed,
|
|
202
|
+
apiKey,
|
|
203
|
+
azureDeployment,
|
|
204
|
+
apiVersion,
|
|
205
|
+
entraToken,
|
|
206
|
+
baseURL
|
|
207
|
+
});
|
|
208
|
+
}
|
|
209
|
+
session() {
|
|
210
|
+
return new RealtimeSession(this);
|
|
211
|
+
}
|
|
212
|
+
async close() {
|
|
213
|
+
return;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
function processBaseURL({
|
|
217
|
+
baseURL,
|
|
218
|
+
model,
|
|
219
|
+
isAzure = false,
|
|
220
|
+
azureDeployment,
|
|
221
|
+
apiVersion
|
|
222
|
+
}) {
|
|
223
|
+
const url = new URL([baseURL, "realtime"].join("/"));
|
|
224
|
+
if (url.protocol === "https:") {
|
|
225
|
+
url.protocol = "wss:";
|
|
226
|
+
}
|
|
227
|
+
if (!url.pathname || ["", "/v1", "/openai"].includes(url.pathname.replace(/\/$/, ""))) {
|
|
228
|
+
url.pathname = url.pathname.replace(/\/$/, "") + "/realtime";
|
|
229
|
+
} else {
|
|
230
|
+
url.pathname = url.pathname.replace(/\/$/, "");
|
|
231
|
+
}
|
|
232
|
+
const queryParams = {};
|
|
233
|
+
if (isAzure) {
|
|
234
|
+
if (apiVersion) {
|
|
235
|
+
queryParams["api-version"] = apiVersion;
|
|
236
|
+
}
|
|
237
|
+
if (azureDeployment) {
|
|
238
|
+
queryParams["deployment"] = azureDeployment;
|
|
239
|
+
}
|
|
240
|
+
} else {
|
|
241
|
+
queryParams["model"] = model;
|
|
242
|
+
}
|
|
243
|
+
for (const [key, value] of Object.entries(queryParams)) {
|
|
244
|
+
url.searchParams.set(key, value);
|
|
245
|
+
}
|
|
246
|
+
return url.toString();
|
|
247
|
+
}
|
|
248
|
+
class RealtimeSession extends import_agents.llm.RealtimeSession {
|
|
249
|
+
_tools = {};
|
|
250
|
+
remoteChatCtx = new import_agents.llm.RemoteChatContext();
|
|
251
|
+
messageChannel = new import_agents.Queue();
|
|
252
|
+
inputResampler;
|
|
253
|
+
instructions;
|
|
254
|
+
oaiRealtimeModel;
|
|
255
|
+
currentGeneration;
|
|
256
|
+
responseCreatedFutures = {};
|
|
257
|
+
textModeRecoveryRetries = 0;
|
|
258
|
+
itemCreateFutures = {};
|
|
259
|
+
itemDeleteFutures = {};
|
|
260
|
+
updateChatCtxLock = new import_mutex.Mutex();
|
|
261
|
+
updateFuncCtxLock = new import_mutex.Mutex();
|
|
262
|
+
// 100ms chunks
|
|
263
|
+
bstream = new import_agents.AudioByteStream(SAMPLE_RATE, NUM_CHANNELS, SAMPLE_RATE / 10);
|
|
264
|
+
pushedDurationMs = 0;
|
|
265
|
+
#logger = (0, import_agents.log)();
|
|
266
|
+
#task;
|
|
267
|
+
#closed = false;
|
|
268
|
+
constructor(realtimeModel) {
|
|
269
|
+
super(realtimeModel);
|
|
270
|
+
this.oaiRealtimeModel = realtimeModel;
|
|
271
|
+
this.#task = import_agents.Task.from(({ signal }) => this.#mainTask(signal));
|
|
272
|
+
this.sendEvent(this.createSessionUpdateEvent());
|
|
273
|
+
}
|
|
274
|
+
sendEvent(command) {
|
|
275
|
+
this.messageChannel.put(command);
|
|
276
|
+
}
|
|
277
|
+
createSessionUpdateEvent() {
|
|
278
|
+
const modalities = this.oaiRealtimeModel._options.modalities.includes("audio") ? ["text", "audio"] : ["text"];
|
|
279
|
+
return {
|
|
280
|
+
type: "session.update",
|
|
281
|
+
session: {
|
|
282
|
+
model: this.oaiRealtimeModel._options.model,
|
|
283
|
+
voice: this.oaiRealtimeModel._options.voice,
|
|
284
|
+
input_audio_format: "pcm16",
|
|
285
|
+
output_audio_format: "pcm16",
|
|
286
|
+
modalities,
|
|
287
|
+
turn_detection: this.oaiRealtimeModel._options.turnDetection,
|
|
288
|
+
input_audio_transcription: this.oaiRealtimeModel._options.inputAudioTranscription,
|
|
289
|
+
// TODO(shubhra): add inputAudioNoiseReduction
|
|
290
|
+
temperature: this.oaiRealtimeModel._options.temperature,
|
|
291
|
+
tool_choice: toOaiToolChoice(this.oaiRealtimeModel._options.toolChoice),
|
|
292
|
+
max_response_output_tokens: this.oaiRealtimeModel._options.maxResponseOutputTokens === Infinity ? "inf" : this.oaiRealtimeModel._options.maxResponseOutputTokens,
|
|
293
|
+
// TODO(shubhra): add tracing options
|
|
294
|
+
instructions: this.instructions,
|
|
295
|
+
speed: this.oaiRealtimeModel._options.speed
|
|
296
|
+
}
|
|
297
|
+
};
|
|
298
|
+
}
|
|
299
|
+
get chatCtx() {
|
|
300
|
+
return this.remoteChatCtx.toChatCtx();
|
|
301
|
+
}
|
|
302
|
+
get tools() {
|
|
303
|
+
return { ...this._tools };
|
|
304
|
+
}
|
|
305
|
+
async updateChatCtx(_chatCtx) {
|
|
306
|
+
const unlock = await this.updateChatCtxLock.lock();
|
|
307
|
+
const events = this.createChatCtxUpdateEvents(_chatCtx);
|
|
308
|
+
const futures = [];
|
|
309
|
+
for (const event of events) {
|
|
310
|
+
const future = new import_agents.Future();
|
|
311
|
+
futures.push(future);
|
|
312
|
+
if (event.type === "conversation.item.create") {
|
|
313
|
+
this.itemCreateFutures[event.item.id] = future;
|
|
314
|
+
} else if (event.type == "conversation.item.delete") {
|
|
315
|
+
this.itemDeleteFutures[event.item_id] = future;
|
|
316
|
+
}
|
|
317
|
+
this.sendEvent(event);
|
|
318
|
+
}
|
|
319
|
+
if (futures.length === 0) {
|
|
320
|
+
unlock();
|
|
321
|
+
return;
|
|
322
|
+
}
|
|
323
|
+
try {
|
|
324
|
+
await Promise.race([
|
|
325
|
+
Promise.all(futures),
|
|
326
|
+
(0, import_agents.delay)(5e3).then(() => {
|
|
327
|
+
throw new Error("Chat ctx update events timed out");
|
|
328
|
+
})
|
|
329
|
+
]);
|
|
330
|
+
} catch (e) {
|
|
331
|
+
this.#logger.error(e.message);
|
|
332
|
+
throw e;
|
|
333
|
+
} finally {
|
|
334
|
+
unlock();
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
createChatCtxUpdateEvents(chatCtx, addMockAudio = false) {
|
|
338
|
+
const newChatCtx = chatCtx.copy();
|
|
339
|
+
if (addMockAudio) {
|
|
340
|
+
newChatCtx.items.push(createMockAudioItem());
|
|
341
|
+
} else {
|
|
342
|
+
newChatCtx.items = newChatCtx.items.filter(
|
|
343
|
+
(item) => !item.id.startsWith(MOCK_AUDIO_ID_PREFIX)
|
|
344
|
+
);
|
|
345
|
+
}
|
|
346
|
+
const events = [];
|
|
347
|
+
const diffOps = import_agents.llm.computeChatCtxDiff(this.chatCtx, newChatCtx);
|
|
348
|
+
for (const op of diffOps.toRemove) {
|
|
349
|
+
events.push({
|
|
350
|
+
type: "conversation.item.delete",
|
|
351
|
+
item_id: op,
|
|
352
|
+
event_id: (0, import_agents.shortuuid)("chat_ctx_delete_")
|
|
353
|
+
});
|
|
354
|
+
}
|
|
355
|
+
for (const [previousId, id] of diffOps.toCreate) {
|
|
356
|
+
const chatItem = newChatCtx.getById(id);
|
|
357
|
+
if (!chatItem) {
|
|
358
|
+
throw new Error(`Chat item ${id} not found`);
|
|
359
|
+
}
|
|
360
|
+
events.push({
|
|
361
|
+
type: "conversation.item.create",
|
|
362
|
+
item: livekitItemToOpenAIItem(chatItem),
|
|
363
|
+
previous_item_id: previousId ?? void 0,
|
|
364
|
+
event_id: (0, import_agents.shortuuid)("chat_ctx_create_")
|
|
365
|
+
});
|
|
366
|
+
}
|
|
367
|
+
return events;
|
|
368
|
+
}
|
|
369
|
+
async updateTools(_tools) {
|
|
370
|
+
const unlock = await this.updateFuncCtxLock.lock();
|
|
371
|
+
const ev = this.createToolsUpdateEvent(_tools);
|
|
372
|
+
this.sendEvent(ev);
|
|
373
|
+
if (!ev.session.tools) {
|
|
374
|
+
throw new Error("Tools are missing in the session update event");
|
|
375
|
+
}
|
|
376
|
+
const retainedToolNames = new Set(ev.session.tools.map((tool) => tool.name));
|
|
377
|
+
const retainedTools = Object.fromEntries(
|
|
378
|
+
Object.entries(_tools).filter(
|
|
379
|
+
([name, tool]) => import_agents.llm.isFunctionTool(tool) && retainedToolNames.has(name)
|
|
380
|
+
)
|
|
381
|
+
);
|
|
382
|
+
this._tools = retainedTools;
|
|
383
|
+
unlock();
|
|
384
|
+
}
|
|
385
|
+
createToolsUpdateEvent(_tools) {
|
|
386
|
+
const oaiTools = [];
|
|
387
|
+
for (const [name, tool] of Object.entries(_tools)) {
|
|
388
|
+
if (!import_agents.llm.isFunctionTool(tool)) {
|
|
389
|
+
this.#logger.error({ name, tool }, "OpenAI Realtime API doesn't support this tool type");
|
|
390
|
+
continue;
|
|
391
|
+
}
|
|
392
|
+
const { parameters: toolParameters, description } = tool;
|
|
393
|
+
try {
|
|
394
|
+
const parameters = import_agents.llm.toJsonSchema(
|
|
395
|
+
toolParameters
|
|
396
|
+
);
|
|
397
|
+
oaiTools.push({
|
|
398
|
+
name,
|
|
399
|
+
description,
|
|
400
|
+
parameters,
|
|
401
|
+
type: "function"
|
|
402
|
+
});
|
|
403
|
+
} catch (e) {
|
|
404
|
+
this.#logger.error({ name, tool }, "OpenAI Realtime API doesn't support this tool type");
|
|
405
|
+
continue;
|
|
406
|
+
}
|
|
407
|
+
}
|
|
408
|
+
return {
|
|
409
|
+
type: "session.update",
|
|
410
|
+
session: {
|
|
411
|
+
model: this.oaiRealtimeModel._options.model,
|
|
412
|
+
tools: oaiTools
|
|
413
|
+
},
|
|
414
|
+
event_id: (0, import_agents.shortuuid)("tools_update_")
|
|
415
|
+
};
|
|
416
|
+
}
|
|
417
|
+
async updateInstructions(_instructions) {
|
|
418
|
+
const eventId = (0, import_agents.shortuuid)("instructions_update_");
|
|
419
|
+
this.sendEvent({
|
|
420
|
+
type: "session.update",
|
|
421
|
+
session: {
|
|
422
|
+
instructions: _instructions
|
|
423
|
+
},
|
|
424
|
+
event_id: eventId
|
|
425
|
+
});
|
|
426
|
+
this.instructions = _instructions;
|
|
427
|
+
}
|
|
428
|
+
updateOptions({ toolChoice }) {
|
|
429
|
+
const options = {};
|
|
430
|
+
this.oaiRealtimeModel._options.toolChoice = toolChoice;
|
|
431
|
+
options.tool_choice = toOaiToolChoice(toolChoice);
|
|
432
|
+
this.sendEvent({
|
|
433
|
+
type: "session.update",
|
|
434
|
+
session: options,
|
|
435
|
+
event_id: (0, import_agents.shortuuid)("options_update_")
|
|
436
|
+
});
|
|
437
|
+
}
|
|
438
|
+
pushAudio(frame) {
|
|
439
|
+
for (const f of this.resampleAudio(frame)) {
|
|
440
|
+
for (const nf of this.bstream.write(f.data.buffer)) {
|
|
441
|
+
this.sendEvent({
|
|
442
|
+
type: "input_audio_buffer.append",
|
|
443
|
+
audio: Buffer.from(nf.data.buffer).toString("base64")
|
|
444
|
+
});
|
|
445
|
+
this.pushedDurationMs += nf.samplesPerChannel / nf.sampleRate * 1e3;
|
|
446
|
+
}
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
async commitAudio() {
|
|
450
|
+
if (this.pushedDurationMs > 100) {
|
|
451
|
+
this.sendEvent({
|
|
452
|
+
type: "input_audio_buffer.commit"
|
|
453
|
+
});
|
|
454
|
+
this.pushedDurationMs = 0;
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
async clearAudio() {
|
|
458
|
+
this.sendEvent({
|
|
459
|
+
type: "input_audio_buffer.clear"
|
|
460
|
+
});
|
|
461
|
+
this.pushedDurationMs = 0;
|
|
462
|
+
}
|
|
463
|
+
async generateReply(instructions) {
|
|
464
|
+
const handle = this.createResponse({ instructions, userInitiated: true });
|
|
465
|
+
this.textModeRecoveryRetries = 0;
|
|
466
|
+
return handle.doneFut.await;
|
|
467
|
+
}
|
|
468
|
+
async interrupt() {
|
|
469
|
+
this.sendEvent({
|
|
470
|
+
type: "response.cancel"
|
|
471
|
+
});
|
|
472
|
+
}
|
|
473
|
+
async truncate(_options) {
|
|
474
|
+
if (!_options.modalities || _options.modalities.includes("audio")) {
|
|
475
|
+
this.sendEvent({
|
|
476
|
+
type: "conversation.item.truncate",
|
|
477
|
+
content_index: 0,
|
|
478
|
+
item_id: _options.messageId,
|
|
479
|
+
audio_end_ms: _options.audioEndMs
|
|
480
|
+
});
|
|
481
|
+
} else if (_options.audioTranscript !== void 0) {
|
|
482
|
+
const chatCtx = this.chatCtx.copy();
|
|
483
|
+
const idx = chatCtx.indexById(_options.messageId);
|
|
484
|
+
if (idx !== void 0) {
|
|
485
|
+
const item = chatCtx.items[idx];
|
|
486
|
+
if (item && item.type === "message") {
|
|
487
|
+
const newItem = import_agents.llm.ChatMessage.create({
|
|
488
|
+
...item,
|
|
489
|
+
content: [_options.audioTranscript]
|
|
490
|
+
});
|
|
491
|
+
chatCtx.items[idx] = newItem;
|
|
492
|
+
const events = this.createChatCtxUpdateEvents(chatCtx);
|
|
493
|
+
for (const ev of events) {
|
|
494
|
+
this.sendEvent(ev);
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
loggableEvent(event) {
|
|
501
|
+
const untypedEvent = {};
|
|
502
|
+
for (const [key, value] of Object.entries(event)) {
|
|
503
|
+
if (value !== void 0) {
|
|
504
|
+
untypedEvent[key] = value;
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
if (untypedEvent.audio && typeof untypedEvent.audio === "string") {
|
|
508
|
+
return { ...untypedEvent, audio: "..." };
|
|
509
|
+
}
|
|
510
|
+
if (untypedEvent.delta && typeof untypedEvent.delta === "string" && event.type === "response.audio.delta") {
|
|
511
|
+
return { ...untypedEvent, delta: "..." };
|
|
512
|
+
}
|
|
513
|
+
return untypedEvent;
|
|
514
|
+
}
|
|
515
|
+
async createWsConn() {
|
|
516
|
+
const headers = {
|
|
517
|
+
"User-Agent": "LiveKit-Agents-JS"
|
|
518
|
+
};
|
|
519
|
+
if (this.oaiRealtimeModel._options.isAzure) {
|
|
520
|
+
if (this.oaiRealtimeModel._options.entraToken) {
|
|
521
|
+
headers.Authorization = `Bearer ${this.oaiRealtimeModel._options.entraToken}`;
|
|
522
|
+
} else if (this.oaiRealtimeModel._options.apiKey) {
|
|
523
|
+
headers["api-key"] = this.oaiRealtimeModel._options.apiKey;
|
|
524
|
+
} else {
|
|
525
|
+
throw new Error("Microsoft API key or entraToken is required");
|
|
526
|
+
}
|
|
527
|
+
} else {
|
|
528
|
+
headers.Authorization = `Bearer ${this.oaiRealtimeModel._options.apiKey}`;
|
|
529
|
+
headers["OpenAI-Beta"] = "realtime=v1";
|
|
530
|
+
}
|
|
531
|
+
const url = processBaseURL({
|
|
532
|
+
baseURL: this.oaiRealtimeModel._options.baseURL,
|
|
533
|
+
model: this.oaiRealtimeModel._options.model,
|
|
534
|
+
isAzure: this.oaiRealtimeModel._options.isAzure,
|
|
535
|
+
apiVersion: this.oaiRealtimeModel._options.apiVersion,
|
|
536
|
+
azureDeployment: this.oaiRealtimeModel._options.azureDeployment
|
|
537
|
+
});
|
|
538
|
+
if (lkOaiDebug) {
|
|
539
|
+
this.#logger.debug(`Connecting to OpenAI Realtime API at ${url}`);
|
|
540
|
+
}
|
|
541
|
+
return new Promise((resolve, reject) => {
|
|
542
|
+
const ws = new import_ws.WebSocket(url, { headers });
|
|
543
|
+
let waiting = true;
|
|
544
|
+
const timeout = setTimeout(() => {
|
|
545
|
+
ws.close();
|
|
546
|
+
reject(new Error("WebSocket connection timeout"));
|
|
547
|
+
}, this.oaiRealtimeModel._options.connOptions.timeoutMs);
|
|
548
|
+
ws.once("open", () => {
|
|
549
|
+
if (!waiting) return;
|
|
550
|
+
waiting = false;
|
|
551
|
+
clearTimeout(timeout);
|
|
552
|
+
resolve(ws);
|
|
553
|
+
});
|
|
554
|
+
ws.once("close", () => {
|
|
555
|
+
if (!waiting) return;
|
|
556
|
+
waiting = false;
|
|
557
|
+
clearTimeout(timeout);
|
|
558
|
+
reject(new Error("OpenAI Realtime API connection closed"));
|
|
559
|
+
});
|
|
560
|
+
});
|
|
561
|
+
}
|
|
562
|
+
async #mainTask(signal) {
|
|
563
|
+
let reconnecting = false;
|
|
564
|
+
let numRetries = 0;
|
|
565
|
+
let wsConn = null;
|
|
566
|
+
const maxRetries = this.oaiRealtimeModel._options.connOptions.maxRetry;
|
|
567
|
+
const reconnect = async () => {
|
|
568
|
+
this.#logger.debug(
|
|
569
|
+
{
|
|
570
|
+
maxSessionDuration: this.oaiRealtimeModel._options.maxSessionDuration
|
|
571
|
+
},
|
|
572
|
+
"Reconnecting to OpenAI Realtime API"
|
|
573
|
+
);
|
|
574
|
+
const events = [];
|
|
575
|
+
events.push(this.createSessionUpdateEvent());
|
|
576
|
+
if (Object.keys(this._tools).length > 0) {
|
|
577
|
+
events.push(this.createToolsUpdateEvent(this._tools));
|
|
578
|
+
}
|
|
579
|
+
const chatCtx = this.chatCtx.copy({
|
|
580
|
+
excludeFunctionCall: true,
|
|
581
|
+
excludeInstructions: true,
|
|
582
|
+
excludeEmptyMessage: true
|
|
583
|
+
});
|
|
584
|
+
const oldChatCtx = this.remoteChatCtx;
|
|
585
|
+
this.remoteChatCtx = new import_agents.llm.RemoteChatContext();
|
|
586
|
+
events.push(...this.createChatCtxUpdateEvents(chatCtx));
|
|
587
|
+
try {
|
|
588
|
+
for (const ev of events) {
|
|
589
|
+
this.emit("openai_client_event_queued", ev);
|
|
590
|
+
wsConn.send(JSON.stringify(ev));
|
|
591
|
+
}
|
|
592
|
+
} catch (error) {
|
|
593
|
+
this.remoteChatCtx = oldChatCtx;
|
|
594
|
+
throw new import_agents.APIConnectionError({
|
|
595
|
+
message: "Failed to send message to OpenAI Realtime API during session re-connection"
|
|
596
|
+
});
|
|
597
|
+
}
|
|
598
|
+
this.#logger.debug("Reconnected to OpenAI Realtime API");
|
|
599
|
+
this.emit("session_reconnected", {});
|
|
600
|
+
};
|
|
601
|
+
reconnecting = false;
|
|
602
|
+
while (!this.#closed && !signal.aborted) {
|
|
603
|
+
this.#logger.debug("Creating WebSocket connection to OpenAI Realtime API");
|
|
604
|
+
wsConn = await this.createWsConn();
|
|
605
|
+
if (signal.aborted) break;
|
|
606
|
+
try {
|
|
607
|
+
if (reconnecting) {
|
|
608
|
+
await reconnect();
|
|
609
|
+
if (signal.aborted) break;
|
|
610
|
+
numRetries = 0;
|
|
611
|
+
}
|
|
612
|
+
await this.runWs(wsConn);
|
|
613
|
+
if (signal.aborted) break;
|
|
614
|
+
} catch (error) {
|
|
615
|
+
if (!(0, import_agents.isAPIError)(error)) {
|
|
616
|
+
this.emitError({ error, recoverable: false });
|
|
617
|
+
throw error;
|
|
618
|
+
}
|
|
619
|
+
if (maxRetries === 0 || !error.retryable) {
|
|
620
|
+
this.emitError({ error, recoverable: false });
|
|
621
|
+
throw error;
|
|
622
|
+
}
|
|
623
|
+
if (numRetries === maxRetries) {
|
|
624
|
+
this.emitError({ error, recoverable: false });
|
|
625
|
+
throw new import_agents.APIConnectionError({
|
|
626
|
+
message: `OpenAI Realtime API connection failed after ${numRetries} attempts`,
|
|
627
|
+
options: {
|
|
628
|
+
body: error,
|
|
629
|
+
retryable: false
|
|
630
|
+
}
|
|
631
|
+
});
|
|
632
|
+
}
|
|
633
|
+
this.emitError({ error, recoverable: true });
|
|
634
|
+
const retryInterval = numRetries === 0 ? DEFAULT_FIRST_RETRY_INTERVAL_MS : this.oaiRealtimeModel._options.connOptions.retryIntervalMs;
|
|
635
|
+
this.#logger.warn(
|
|
636
|
+
{
|
|
637
|
+
attempt: numRetries,
|
|
638
|
+
maxRetries,
|
|
639
|
+
error
|
|
640
|
+
},
|
|
641
|
+
`OpenAI Realtime API connection failed, retrying in ${retryInterval / 1e3}s`
|
|
642
|
+
);
|
|
643
|
+
await (0, import_agents.delay)(retryInterval);
|
|
644
|
+
numRetries++;
|
|
645
|
+
}
|
|
646
|
+
reconnecting = true;
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
async runWs(wsConn) {
|
|
650
|
+
const forwardEvents = async (signal) => {
|
|
651
|
+
const abortFuture = new import_agents.Future();
|
|
652
|
+
signal.addEventListener("abort", () => abortFuture.resolve());
|
|
653
|
+
while (!this.#closed && wsConn.readyState === import_ws.WebSocket.OPEN && !signal.aborted) {
|
|
654
|
+
try {
|
|
655
|
+
const event = await Promise.race([this.messageChannel.get(), abortFuture.await]);
|
|
656
|
+
if (signal.aborted || abortFuture.done || event === void 0) {
|
|
657
|
+
break;
|
|
658
|
+
}
|
|
659
|
+
if (lkOaiDebug) {
|
|
660
|
+
this.#logger.debug(this.loggableEvent(event), `(client) -> ${event.type}`);
|
|
661
|
+
}
|
|
662
|
+
this.emit("openai_client_event_queued", event);
|
|
663
|
+
wsConn.send(JSON.stringify(event));
|
|
664
|
+
} catch (error) {
|
|
665
|
+
break;
|
|
666
|
+
}
|
|
667
|
+
}
|
|
668
|
+
wsConn.close();
|
|
669
|
+
};
|
|
670
|
+
const wsCloseFuture = new import_agents.Future();
|
|
671
|
+
wsConn.onerror = (error) => {
|
|
672
|
+
wsCloseFuture.resolve(new import_agents.APIConnectionError({ message: error.message }));
|
|
673
|
+
};
|
|
674
|
+
wsConn.onclose = () => {
|
|
675
|
+
wsCloseFuture.resolve();
|
|
676
|
+
};
|
|
677
|
+
wsConn.onmessage = (message) => {
|
|
678
|
+
const event = JSON.parse(message.data);
|
|
679
|
+
this.emit("openai_server_event_received", event);
|
|
680
|
+
if (lkOaiDebug) {
|
|
681
|
+
this.#logger.debug(this.loggableEvent(event), `(server) <- ${event.type}`);
|
|
682
|
+
}
|
|
683
|
+
switch (event.type) {
|
|
684
|
+
case "input_audio_buffer.speech_started":
|
|
685
|
+
this.handleInputAudioBufferSpeechStarted(event);
|
|
686
|
+
break;
|
|
687
|
+
case "input_audio_buffer.speech_stopped":
|
|
688
|
+
this.handleInputAudioBufferSpeechStopped(event);
|
|
689
|
+
break;
|
|
690
|
+
case "response.created":
|
|
691
|
+
this.handleResponseCreated(event);
|
|
692
|
+
break;
|
|
693
|
+
case "response.output_item.added":
|
|
694
|
+
this.handleResponseOutputItemAdded(event);
|
|
695
|
+
break;
|
|
696
|
+
case "conversation.item.created":
|
|
697
|
+
this.handleConversationItemCreated(event);
|
|
698
|
+
break;
|
|
699
|
+
case "conversation.item.deleted":
|
|
700
|
+
this.handleConversationItemDeleted(event);
|
|
701
|
+
break;
|
|
702
|
+
case "conversation.item.input_audio_transcription.completed":
|
|
703
|
+
this.handleConversationItemInputAudioTranscriptionCompleted(event);
|
|
704
|
+
break;
|
|
705
|
+
case "conversation.item.input_audio_transcription.failed":
|
|
706
|
+
this.handleConversationItemInputAudioTranscriptionFailed(event);
|
|
707
|
+
break;
|
|
708
|
+
case "response.content_part.added":
|
|
709
|
+
this.handleResponseContentPartAdded(event);
|
|
710
|
+
break;
|
|
711
|
+
case "response.content_part.done":
|
|
712
|
+
this.handleResponseContentPartDone(event);
|
|
713
|
+
break;
|
|
714
|
+
case "response.text.delta":
|
|
715
|
+
this.handleResponseTextDelta(event);
|
|
716
|
+
break;
|
|
717
|
+
case "response.text.done":
|
|
718
|
+
this.handleResponseTextDone(event);
|
|
719
|
+
break;
|
|
720
|
+
case "response.audio_transcript.delta":
|
|
721
|
+
this.handleResponseAudioTranscriptDelta(event);
|
|
722
|
+
break;
|
|
723
|
+
case "response.audio.delta":
|
|
724
|
+
this.handleResponseAudioDelta(event);
|
|
725
|
+
break;
|
|
726
|
+
case "response.audio_transcript.done":
|
|
727
|
+
this.handleResponseAudioTranscriptDone(event);
|
|
728
|
+
break;
|
|
729
|
+
case "response.audio.done":
|
|
730
|
+
this.handleResponseAudioDone(event);
|
|
731
|
+
break;
|
|
732
|
+
case "response.output_item.done":
|
|
733
|
+
this.handleResponseOutputItemDone(event);
|
|
734
|
+
break;
|
|
735
|
+
case "response.done":
|
|
736
|
+
this.handleResponseDone(event);
|
|
737
|
+
break;
|
|
738
|
+
case "error":
|
|
739
|
+
this.handleError(event);
|
|
740
|
+
break;
|
|
741
|
+
default:
|
|
742
|
+
if (lkOaiDebug) {
|
|
743
|
+
this.#logger.debug(`unhandled event: ${event.type}`);
|
|
744
|
+
}
|
|
745
|
+
break;
|
|
746
|
+
}
|
|
747
|
+
};
|
|
748
|
+
const sendTask = import_agents.Task.from(({ signal }) => forwardEvents(signal));
|
|
749
|
+
const wsTask = import_agents.Task.from(({ signal }) => {
|
|
750
|
+
const abortPromise = new Promise((resolve) => {
|
|
751
|
+
signal.addEventListener("abort", () => {
|
|
752
|
+
resolve();
|
|
753
|
+
});
|
|
754
|
+
});
|
|
755
|
+
return Promise.race([wsCloseFuture.await, abortPromise]);
|
|
756
|
+
});
|
|
757
|
+
const waitReconnectTask = import_agents.Task.from(async ({ signal }) => {
|
|
758
|
+
await (0, import_agents.delay)(this.oaiRealtimeModel._options.maxSessionDuration, { signal });
|
|
759
|
+
return new import_agents.APIConnectionError({
|
|
760
|
+
message: "OpenAI Realtime API connection timeout"
|
|
761
|
+
});
|
|
762
|
+
});
|
|
763
|
+
try {
|
|
764
|
+
const result = await Promise.race([wsTask.result, sendTask.result, waitReconnectTask.result]);
|
|
765
|
+
if (waitReconnectTask.done && this.currentGeneration) {
|
|
766
|
+
await this.currentGeneration._doneFut.await;
|
|
767
|
+
}
|
|
768
|
+
if (result instanceof Error) {
|
|
769
|
+
throw result;
|
|
770
|
+
}
|
|
771
|
+
} finally {
|
|
772
|
+
await (0, import_agents.cancelAndWait)([wsTask, sendTask, waitReconnectTask], 2e3);
|
|
773
|
+
wsConn.close();
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
async close() {
|
|
777
|
+
super.close();
|
|
778
|
+
this.#closed = true;
|
|
779
|
+
await this.#task;
|
|
780
|
+
}
|
|
781
|
+
handleInputAudioBufferSpeechStarted(_event) {
|
|
782
|
+
this.emit("input_speech_started", {});
|
|
783
|
+
}
|
|
784
|
+
handleInputAudioBufferSpeechStopped(_event) {
|
|
785
|
+
this.emit("input_speech_stopped", {
|
|
786
|
+
userTranscriptionEnabled: this.oaiRealtimeModel._options.inputAudioTranscription !== null
|
|
787
|
+
});
|
|
788
|
+
}
|
|
789
|
+
handleResponseCreated(event) {
|
|
790
|
+
var _a;
|
|
791
|
+
if (!event.response.id) {
|
|
792
|
+
throw new Error("response.id is missing");
|
|
793
|
+
}
|
|
794
|
+
this.currentGeneration = {
|
|
795
|
+
messageChannel: import_agents.stream.createStreamChannel(),
|
|
796
|
+
functionChannel: import_agents.stream.createStreamChannel(),
|
|
797
|
+
messages: /* @__PURE__ */ new Map(),
|
|
798
|
+
_doneFut: new import_agents.Future(),
|
|
799
|
+
_createdTimestamp: Date.now()
|
|
800
|
+
};
|
|
801
|
+
const generationEv = {
|
|
802
|
+
messageStream: this.currentGeneration.messageChannel.stream(),
|
|
803
|
+
functionStream: this.currentGeneration.functionChannel.stream(),
|
|
804
|
+
userInitiated: false,
|
|
805
|
+
responseId: event.response.id
|
|
806
|
+
};
|
|
807
|
+
const clientEventId = (_a = event.response.metadata) == null ? void 0 : _a.client_event_id;
|
|
808
|
+
if (clientEventId) {
|
|
809
|
+
const handle = this.responseCreatedFutures[clientEventId];
|
|
810
|
+
if (handle) {
|
|
811
|
+
delete this.responseCreatedFutures[clientEventId];
|
|
812
|
+
generationEv.userInitiated = true;
|
|
813
|
+
if (!handle.doneFut.done) {
|
|
814
|
+
handle.doneFut.resolve(generationEv);
|
|
815
|
+
}
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
this.emit("generation_created", generationEv);
|
|
819
|
+
}
|
|
820
|
+
handleResponseOutputItemAdded(event) {
|
|
821
|
+
if (!this.currentGeneration) {
|
|
822
|
+
throw new Error("currentGeneration is not set");
|
|
823
|
+
}
|
|
824
|
+
if (!event.item.type) {
|
|
825
|
+
throw new Error("item.type is not set");
|
|
826
|
+
}
|
|
827
|
+
if (!event.response_id) {
|
|
828
|
+
throw new Error("response_id is not set");
|
|
829
|
+
}
|
|
830
|
+
const itemType = event.item.type;
|
|
831
|
+
const responseId = event.response_id;
|
|
832
|
+
if (itemType !== "message") {
|
|
833
|
+
this.resolveGeneration(responseId);
|
|
834
|
+
this.textModeRecoveryRetries = 0;
|
|
835
|
+
return;
|
|
836
|
+
}
|
|
837
|
+
const itemId = event.item.id;
|
|
838
|
+
if (!itemId) {
|
|
839
|
+
throw new Error("item.id is not set");
|
|
840
|
+
}
|
|
841
|
+
const modalitiesFut = new import_agents.Future();
|
|
842
|
+
const itemGeneration = {
|
|
843
|
+
messageId: itemId,
|
|
844
|
+
textChannel: import_agents.stream.createStreamChannel(),
|
|
845
|
+
audioChannel: import_agents.stream.createStreamChannel(),
|
|
846
|
+
audioTranscript: "",
|
|
847
|
+
modalities: modalitiesFut
|
|
848
|
+
};
|
|
849
|
+
if (!this.oaiRealtimeModel.capabilities.audioOutput) {
|
|
850
|
+
itemGeneration.audioChannel.close();
|
|
851
|
+
modalitiesFut.resolve(["text"]);
|
|
852
|
+
}
|
|
853
|
+
this.currentGeneration.messageChannel.write({
|
|
854
|
+
messageId: itemId,
|
|
855
|
+
textStream: itemGeneration.textChannel.stream(),
|
|
856
|
+
audioStream: itemGeneration.audioChannel.stream(),
|
|
857
|
+
modalities: modalitiesFut.await
|
|
858
|
+
});
|
|
859
|
+
this.currentGeneration.messages.set(itemId, itemGeneration);
|
|
860
|
+
}
|
|
861
|
+
handleConversationItemCreated(event) {
|
|
862
|
+
if (!event.item.id) {
|
|
863
|
+
throw new Error("item.id is not set");
|
|
864
|
+
}
|
|
865
|
+
try {
|
|
866
|
+
this.remoteChatCtx.insert(event.previous_item_id, openAIItemToLivekitItem(event.item));
|
|
867
|
+
} catch (error) {
|
|
868
|
+
this.#logger.error({ error, itemId: event.item.id }, "failed to insert conversation item");
|
|
869
|
+
}
|
|
870
|
+
const fut = this.itemCreateFutures[event.item.id];
|
|
871
|
+
if (fut) {
|
|
872
|
+
fut.resolve();
|
|
873
|
+
delete this.itemCreateFutures[event.item.id];
|
|
874
|
+
}
|
|
875
|
+
}
|
|
876
|
+
handleConversationItemDeleted(event) {
|
|
877
|
+
if (!event.item_id) {
|
|
878
|
+
throw new Error("item_id is not set");
|
|
879
|
+
}
|
|
880
|
+
try {
|
|
881
|
+
this.remoteChatCtx.delete(event.item_id);
|
|
882
|
+
} catch (error) {
|
|
883
|
+
this.#logger.error({ error, itemId: event.item_id }, "failed to delete conversation item");
|
|
884
|
+
}
|
|
885
|
+
const fut = this.itemDeleteFutures[event.item_id];
|
|
886
|
+
if (fut) {
|
|
887
|
+
fut.resolve();
|
|
888
|
+
delete this.itemDeleteFutures[event.item_id];
|
|
889
|
+
}
|
|
890
|
+
}
|
|
891
|
+
handleConversationItemInputAudioTranscriptionCompleted(event) {
|
|
892
|
+
const remoteItem = this.remoteChatCtx.get(event.item_id);
|
|
893
|
+
if (!remoteItem) {
|
|
894
|
+
return;
|
|
895
|
+
}
|
|
896
|
+
const item = remoteItem.item;
|
|
897
|
+
if (item instanceof import_agents.llm.ChatMessage) {
|
|
898
|
+
item.content.push(event.transcript);
|
|
899
|
+
} else {
|
|
900
|
+
throw new Error("item is not a chat message");
|
|
901
|
+
}
|
|
902
|
+
this.emit("input_audio_transcription_completed", {
|
|
903
|
+
itemId: event.item_id,
|
|
904
|
+
transcript: event.transcript,
|
|
905
|
+
isFinal: true
|
|
906
|
+
});
|
|
907
|
+
}
|
|
908
|
+
handleConversationItemInputAudioTranscriptionFailed(event) {
|
|
909
|
+
this.#logger.error(
|
|
910
|
+
{ error: event.error },
|
|
911
|
+
"OpenAI Realtime API failed to transcribe input audio"
|
|
912
|
+
);
|
|
913
|
+
}
|
|
914
|
+
handleResponseContentPartAdded(event) {
|
|
915
|
+
if (!this.currentGeneration) {
|
|
916
|
+
throw new Error("currentGeneration is not set");
|
|
917
|
+
}
|
|
918
|
+
const itemId = event.item_id;
|
|
919
|
+
const itemType = event.part.type;
|
|
920
|
+
const itemGeneration = this.currentGeneration.messages.get(itemId);
|
|
921
|
+
if (!itemGeneration) {
|
|
922
|
+
this.#logger.warn(`itemGeneration not found for itemId=${itemId}`);
|
|
923
|
+
return;
|
|
924
|
+
}
|
|
925
|
+
if (itemType === "text" && this.oaiRealtimeModel.capabilities.audioOutput) {
|
|
926
|
+
this.#logger.warn("Text response received from OpenAI Realtime API in audio modality.");
|
|
927
|
+
}
|
|
928
|
+
if (!itemGeneration.modalities.done) {
|
|
929
|
+
const modalityResult = itemType === "text" ? ["text"] : ["audio", "text"];
|
|
930
|
+
itemGeneration.modalities.resolve(modalityResult);
|
|
931
|
+
}
|
|
932
|
+
if (this.currentGeneration._firstTokenTimestamp === void 0) {
|
|
933
|
+
this.currentGeneration._firstTokenTimestamp = Date.now();
|
|
934
|
+
}
|
|
935
|
+
}
|
|
936
|
+
handleResponseContentPartDone(event) {
|
|
937
|
+
if (!event.part) {
|
|
938
|
+
return;
|
|
939
|
+
}
|
|
940
|
+
if (event.part.type !== "text") {
|
|
941
|
+
return;
|
|
942
|
+
}
|
|
943
|
+
if (!this.currentGeneration) {
|
|
944
|
+
throw new Error("currentGeneration is not set");
|
|
945
|
+
}
|
|
946
|
+
}
|
|
947
|
+
handleResponseTextDelta(event) {
|
|
948
|
+
if (!this.currentGeneration) {
|
|
949
|
+
throw new Error("currentGeneration is not set");
|
|
950
|
+
}
|
|
951
|
+
const itemGeneration = this.currentGeneration.messages.get(event.item_id);
|
|
952
|
+
if (!itemGeneration) {
|
|
953
|
+
throw new Error("itemGeneration is not set");
|
|
954
|
+
}
|
|
955
|
+
if (!this.oaiRealtimeModel.capabilities.audioOutput && !this.currentGeneration._firstTokenTimestamp) {
|
|
956
|
+
this.currentGeneration._firstTokenTimestamp = Date.now();
|
|
957
|
+
}
|
|
958
|
+
itemGeneration.textChannel.write(event.delta);
|
|
959
|
+
itemGeneration.audioTranscript += event.delta;
|
|
960
|
+
}
|
|
961
|
+
handleResponseTextDone(_event) {
|
|
962
|
+
if (!this.currentGeneration) {
|
|
963
|
+
throw new Error("currentGeneration is not set");
|
|
964
|
+
}
|
|
965
|
+
}
|
|
966
|
+
handleResponseAudioTranscriptDelta(event) {
|
|
967
|
+
if (!this.currentGeneration) {
|
|
968
|
+
throw new Error("currentGeneration is not set");
|
|
969
|
+
}
|
|
970
|
+
const itemId = event.item_id;
|
|
971
|
+
const delta = event.delta;
|
|
972
|
+
const itemGeneration = this.currentGeneration.messages.get(itemId);
|
|
973
|
+
if (!itemGeneration) {
|
|
974
|
+
throw new Error("itemGeneration is not set");
|
|
975
|
+
} else {
|
|
976
|
+
itemGeneration.textChannel.write(delta);
|
|
977
|
+
itemGeneration.audioTranscript += delta;
|
|
978
|
+
}
|
|
979
|
+
}
|
|
980
|
+
handleResponseAudioDelta(event) {
|
|
981
|
+
if (!this.currentGeneration) {
|
|
982
|
+
throw new Error("currentGeneration is not set");
|
|
983
|
+
}
|
|
984
|
+
const itemGeneration = this.currentGeneration.messages.get(event.item_id);
|
|
985
|
+
if (!itemGeneration) {
|
|
986
|
+
throw new Error("itemGeneration is not set");
|
|
987
|
+
}
|
|
988
|
+
if (this.currentGeneration._firstTokenTimestamp === void 0) {
|
|
989
|
+
this.currentGeneration._firstTokenTimestamp = Date.now();
|
|
990
|
+
}
|
|
991
|
+
if (!itemGeneration.modalities.done) {
|
|
992
|
+
itemGeneration.modalities.resolve(["audio", "text"]);
|
|
993
|
+
}
|
|
994
|
+
const binaryString = atob(event.delta);
|
|
995
|
+
const len = binaryString.length;
|
|
996
|
+
const bytes = new Uint8Array(len);
|
|
997
|
+
for (let i = 0; i < len; i++) {
|
|
998
|
+
bytes[i] = binaryString.charCodeAt(i);
|
|
999
|
+
}
|
|
1000
|
+
itemGeneration.audioChannel.write(
|
|
1001
|
+
new import_rtc_node.AudioFrame(
|
|
1002
|
+
new Int16Array(bytes.buffer),
|
|
1003
|
+
api_proto.SAMPLE_RATE,
|
|
1004
|
+
api_proto.NUM_CHANNELS,
|
|
1005
|
+
bytes.length / 2
|
|
1006
|
+
)
|
|
1007
|
+
);
|
|
1008
|
+
}
|
|
1009
|
+
handleResponseAudioTranscriptDone(_event) {
|
|
1010
|
+
if (!this.currentGeneration) {
|
|
1011
|
+
throw new Error("currentGeneration is not set");
|
|
1012
|
+
}
|
|
1013
|
+
}
|
|
1014
|
+
handleResponseAudioDone(_event) {
|
|
1015
|
+
if (!this.currentGeneration) {
|
|
1016
|
+
throw new Error("currentGeneration is not set");
|
|
1017
|
+
}
|
|
1018
|
+
}
|
|
1019
|
+
handleResponseOutputItemDone(event) {
|
|
1020
|
+
if (!this.currentGeneration) {
|
|
1021
|
+
throw new Error("currentGeneration is not set");
|
|
1022
|
+
}
|
|
1023
|
+
const itemId = event.item.id;
|
|
1024
|
+
const itemType = event.item.type;
|
|
1025
|
+
if (itemType === "function_call") {
|
|
1026
|
+
const item = event.item;
|
|
1027
|
+
if (!item.call_id || !item.name || !item.arguments) {
|
|
1028
|
+
throw new Error("item is not a function call");
|
|
1029
|
+
}
|
|
1030
|
+
this.currentGeneration.functionChannel.write(
|
|
1031
|
+
import_agents.llm.FunctionCall.create({
|
|
1032
|
+
callId: item.call_id,
|
|
1033
|
+
name: item.name,
|
|
1034
|
+
args: item.arguments
|
|
1035
|
+
})
|
|
1036
|
+
);
|
|
1037
|
+
} else if (itemType === "message") {
|
|
1038
|
+
const itemGeneration = this.currentGeneration.messages.get(itemId);
|
|
1039
|
+
if (!itemGeneration) {
|
|
1040
|
+
return;
|
|
1041
|
+
}
|
|
1042
|
+
itemGeneration.textChannel.close();
|
|
1043
|
+
itemGeneration.audioChannel.close();
|
|
1044
|
+
if (!itemGeneration.modalities.done) {
|
|
1045
|
+
itemGeneration.modalities.resolve(this.oaiRealtimeModel._options.modalities);
|
|
1046
|
+
}
|
|
1047
|
+
}
|
|
1048
|
+
}
|
|
1049
|
+
handleResponseDone(_event) {
|
|
1050
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _i, _j, _k, _l;
|
|
1051
|
+
if (!this.currentGeneration) {
|
|
1052
|
+
return;
|
|
1053
|
+
}
|
|
1054
|
+
const createdTimestamp = this.currentGeneration._createdTimestamp;
|
|
1055
|
+
const firstTokenTimestamp = this.currentGeneration._firstTokenTimestamp;
|
|
1056
|
+
this.#logger.debug(
|
|
1057
|
+
{
|
|
1058
|
+
messageCount: this.currentGeneration.messages.size
|
|
1059
|
+
},
|
|
1060
|
+
"Closing generation channels in handleResponseDone"
|
|
1061
|
+
);
|
|
1062
|
+
for (const generation of this.currentGeneration.messages.values()) {
|
|
1063
|
+
generation.textChannel.close();
|
|
1064
|
+
generation.audioChannel.close();
|
|
1065
|
+
if (!generation.modalities.done) {
|
|
1066
|
+
generation.modalities.resolve(this.oaiRealtimeModel._options.modalities);
|
|
1067
|
+
}
|
|
1068
|
+
}
|
|
1069
|
+
this.currentGeneration.functionChannel.close();
|
|
1070
|
+
this.currentGeneration.messageChannel.close();
|
|
1071
|
+
for (const itemId of this.currentGeneration.messages.keys()) {
|
|
1072
|
+
const remoteItem = this.remoteChatCtx.get(itemId);
|
|
1073
|
+
if (remoteItem && remoteItem.item instanceof import_agents.llm.ChatMessage) {
|
|
1074
|
+
remoteItem.item.content.push(this.currentGeneration.messages.get(itemId).audioTranscript);
|
|
1075
|
+
}
|
|
1076
|
+
}
|
|
1077
|
+
this.currentGeneration._doneFut.resolve();
|
|
1078
|
+
this.currentGeneration = void 0;
|
|
1079
|
+
const usage = _event.response.usage;
|
|
1080
|
+
const ttftMs = firstTokenTimestamp ? firstTokenTimestamp - createdTimestamp : -1;
|
|
1081
|
+
const durationMs = Date.now() - createdTimestamp;
|
|
1082
|
+
const realtimeMetrics = {
|
|
1083
|
+
type: "realtime_model_metrics",
|
|
1084
|
+
timestamp: createdTimestamp,
|
|
1085
|
+
requestId: _event.response.id || "",
|
|
1086
|
+
ttftMs,
|
|
1087
|
+
durationMs,
|
|
1088
|
+
cancelled: _event.response.status === "cancelled",
|
|
1089
|
+
label: "openai_realtime",
|
|
1090
|
+
inputTokens: (usage == null ? void 0 : usage.input_tokens) ?? 0,
|
|
1091
|
+
outputTokens: (usage == null ? void 0 : usage.output_tokens) ?? 0,
|
|
1092
|
+
totalTokens: (usage == null ? void 0 : usage.total_tokens) ?? 0,
|
|
1093
|
+
tokensPerSecond: durationMs > 0 ? ((usage == null ? void 0 : usage.output_tokens) ?? 0) / (durationMs / 1e3) : 0,
|
|
1094
|
+
inputTokenDetails: {
|
|
1095
|
+
audioTokens: ((_a = usage == null ? void 0 : usage.input_token_details) == null ? void 0 : _a.audio_tokens) ?? 0,
|
|
1096
|
+
textTokens: ((_b = usage == null ? void 0 : usage.input_token_details) == null ? void 0 : _b.text_tokens) ?? 0,
|
|
1097
|
+
imageTokens: 0,
|
|
1098
|
+
// Not supported yet
|
|
1099
|
+
cachedTokens: ((_c = usage == null ? void 0 : usage.input_token_details) == null ? void 0 : _c.cached_tokens) ?? 0,
|
|
1100
|
+
cachedTokensDetails: ((_d = usage == null ? void 0 : usage.input_token_details) == null ? void 0 : _d.cached_tokens_details) ? {
|
|
1101
|
+
audioTokens: ((_f = (_e = usage == null ? void 0 : usage.input_token_details) == null ? void 0 : _e.cached_tokens_details) == null ? void 0 : _f.audio_tokens) ?? 0,
|
|
1102
|
+
textTokens: ((_h = (_g = usage == null ? void 0 : usage.input_token_details) == null ? void 0 : _g.cached_tokens_details) == null ? void 0 : _h.text_tokens) ?? 0,
|
|
1103
|
+
imageTokens: ((_j = (_i = usage == null ? void 0 : usage.input_token_details) == null ? void 0 : _i.cached_tokens_details) == null ? void 0 : _j.image_tokens) ?? 0
|
|
1104
|
+
} : void 0
|
|
1105
|
+
},
|
|
1106
|
+
outputTokenDetails: {
|
|
1107
|
+
textTokens: ((_k = usage == null ? void 0 : usage.output_token_details) == null ? void 0 : _k.text_tokens) ?? 0,
|
|
1108
|
+
audioTokens: ((_l = usage == null ? void 0 : usage.output_token_details) == null ? void 0 : _l.audio_tokens) ?? 0,
|
|
1109
|
+
imageTokens: 0
|
|
1110
|
+
}
|
|
1111
|
+
};
|
|
1112
|
+
this.emit("metrics_collected", realtimeMetrics);
|
|
1113
|
+
}
|
|
1114
|
+
handleError(event) {
|
|
1115
|
+
if (event.error.message.startsWith("Cancellation failed")) {
|
|
1116
|
+
return;
|
|
1117
|
+
}
|
|
1118
|
+
this.#logger.error({ error: event.error }, "OpenAI Realtime API returned an error");
|
|
1119
|
+
this.emitError({
|
|
1120
|
+
error: new import_agents.APIError(event.error.message, {
|
|
1121
|
+
body: event.error,
|
|
1122
|
+
retryable: true
|
|
1123
|
+
}),
|
|
1124
|
+
recoverable: true
|
|
1125
|
+
});
|
|
1126
|
+
}
|
|
1127
|
+
emitError({ error, recoverable }) {
|
|
1128
|
+
this.emit("error", {
|
|
1129
|
+
timestamp: Date.now(),
|
|
1130
|
+
// TODO(brian): add label
|
|
1131
|
+
label: "",
|
|
1132
|
+
error,
|
|
1133
|
+
recoverable
|
|
1134
|
+
});
|
|
1135
|
+
}
|
|
1136
|
+
*resampleAudio(frame) {
|
|
1137
|
+
yield frame;
|
|
1138
|
+
}
|
|
1139
|
+
createResponse({
|
|
1140
|
+
userInitiated,
|
|
1141
|
+
instructions,
|
|
1142
|
+
oldHandle
|
|
1143
|
+
}) {
|
|
1144
|
+
const handle = oldHandle || new CreateResponseHandle({ instructions });
|
|
1145
|
+
if (oldHandle && instructions) {
|
|
1146
|
+
handle.instructions = instructions;
|
|
1147
|
+
}
|
|
1148
|
+
const eventId = (0, import_agents.shortuuid)("response_create_");
|
|
1149
|
+
if (userInitiated) {
|
|
1150
|
+
this.responseCreatedFutures[eventId] = handle;
|
|
1151
|
+
}
|
|
1152
|
+
const response = {};
|
|
1153
|
+
if (instructions) response.instructions = instructions;
|
|
1154
|
+
if (userInitiated) response.metadata = { client_event_id: eventId };
|
|
1155
|
+
this.sendEvent({
|
|
1156
|
+
type: "response.create",
|
|
1157
|
+
event_id: eventId,
|
|
1158
|
+
response: Object.keys(response).length > 0 ? response : void 0
|
|
1159
|
+
});
|
|
1160
|
+
return handle;
|
|
1161
|
+
}
|
|
1162
|
+
resolveGeneration(responseId) {
|
|
1163
|
+
if (!this.currentGeneration) {
|
|
1164
|
+
throw new Error("currentGeneration is not set");
|
|
1165
|
+
}
|
|
1166
|
+
const generation_ev = {
|
|
1167
|
+
messageStream: this.currentGeneration.messageChannel.stream(),
|
|
1168
|
+
functionStream: this.currentGeneration.functionChannel.stream(),
|
|
1169
|
+
userInitiated: false,
|
|
1170
|
+
responseId
|
|
1171
|
+
};
|
|
1172
|
+
const handle = this.responseCreatedFutures[responseId];
|
|
1173
|
+
if (handle) {
|
|
1174
|
+
delete this.responseCreatedFutures[responseId];
|
|
1175
|
+
generation_ev.userInitiated = true;
|
|
1176
|
+
if (handle.doneFut.done) {
|
|
1177
|
+
this.#logger.warn({ responseId }, "response received after timeout");
|
|
1178
|
+
} else {
|
|
1179
|
+
handle.doneFut.resolve(generation_ev);
|
|
1180
|
+
}
|
|
1181
|
+
}
|
|
1182
|
+
}
|
|
1183
|
+
}
|
|
1184
|
+
function livekitItemToOpenAIItem(item) {
|
|
1185
|
+
switch (item.type) {
|
|
1186
|
+
case "function_call":
|
|
1187
|
+
return {
|
|
1188
|
+
id: item.id,
|
|
1189
|
+
type: "function_call",
|
|
1190
|
+
call_id: item.callId,
|
|
1191
|
+
name: item.name,
|
|
1192
|
+
arguments: item.args
|
|
1193
|
+
};
|
|
1194
|
+
case "function_call_output":
|
|
1195
|
+
return {
|
|
1196
|
+
id: item.id,
|
|
1197
|
+
type: "function_call_output",
|
|
1198
|
+
call_id: item.callId,
|
|
1199
|
+
output: item.output
|
|
1200
|
+
};
|
|
1201
|
+
case "message":
|
|
1202
|
+
const role = item.role === "developer" ? "system" : item.role;
|
|
1203
|
+
const contentList = [];
|
|
1204
|
+
for (const c of item.content) {
|
|
1205
|
+
if (typeof c === "string") {
|
|
1206
|
+
contentList.push({
|
|
1207
|
+
type: role === "assistant" ? "text" : "input_text",
|
|
1208
|
+
text: c
|
|
1209
|
+
});
|
|
1210
|
+
} else if (c.type === "image_content") {
|
|
1211
|
+
continue;
|
|
1212
|
+
} else if (c.type === "audio_content") {
|
|
1213
|
+
if (role === "user") {
|
|
1214
|
+
const encodedAudio = Buffer.from((0, import_rtc_node.combineAudioFrames)(c.frame).data).toString("base64");
|
|
1215
|
+
contentList.push({
|
|
1216
|
+
type: "input_audio",
|
|
1217
|
+
audio: encodedAudio
|
|
1218
|
+
});
|
|
1219
|
+
}
|
|
1220
|
+
}
|
|
1221
|
+
}
|
|
1222
|
+
return {
|
|
1223
|
+
id: item.id,
|
|
1224
|
+
type: "message",
|
|
1225
|
+
role,
|
|
1226
|
+
content: contentList
|
|
1227
|
+
};
|
|
1228
|
+
default:
|
|
1229
|
+
throw new Error(`Unsupported item type: ${item.type}`);
|
|
1230
|
+
}
|
|
1231
|
+
}
|
|
1232
|
+
function openAIItemToLivekitItem(item) {
|
|
1233
|
+
if (!item.id) {
|
|
1234
|
+
throw new Error("item.id is not set");
|
|
1235
|
+
}
|
|
1236
|
+
switch (item.type) {
|
|
1237
|
+
case "function_call":
|
|
1238
|
+
return import_agents.llm.FunctionCall.create({
|
|
1239
|
+
id: item.id,
|
|
1240
|
+
callId: item.call_id,
|
|
1241
|
+
name: item.name,
|
|
1242
|
+
args: item.arguments
|
|
1243
|
+
});
|
|
1244
|
+
case "function_call_output":
|
|
1245
|
+
return import_agents.llm.FunctionCallOutput.create({
|
|
1246
|
+
id: item.id,
|
|
1247
|
+
callId: item.call_id,
|
|
1248
|
+
output: item.output,
|
|
1249
|
+
isError: false
|
|
1250
|
+
});
|
|
1251
|
+
case "message":
|
|
1252
|
+
const content = [];
|
|
1253
|
+
const contents = Array.isArray(item.content) ? item.content : [item.content];
|
|
1254
|
+
for (const c of contents) {
|
|
1255
|
+
if (c.type === "text" || c.type === "input_text") {
|
|
1256
|
+
content.push(c.text);
|
|
1257
|
+
}
|
|
1258
|
+
}
|
|
1259
|
+
return import_agents.llm.ChatMessage.create({
|
|
1260
|
+
id: item.id,
|
|
1261
|
+
role: item.role,
|
|
1262
|
+
content
|
|
1263
|
+
});
|
|
1264
|
+
}
|
|
1265
|
+
}
|
|
1266
|
+
function createMockAudioItem(durationSeconds = 2) {
|
|
1267
|
+
const audioData = Buffer.alloc(durationSeconds * SAMPLE_RATE);
|
|
1268
|
+
return import_agents.llm.ChatMessage.create({
|
|
1269
|
+
id: (0, import_agents.shortuuid)(MOCK_AUDIO_ID_PREFIX),
|
|
1270
|
+
role: "user",
|
|
1271
|
+
content: [
|
|
1272
|
+
{
|
|
1273
|
+
type: "audio_content",
|
|
1274
|
+
frame: [
|
|
1275
|
+
new import_rtc_node.AudioFrame(
|
|
1276
|
+
new Int16Array(audioData.buffer),
|
|
1277
|
+
SAMPLE_RATE,
|
|
1278
|
+
NUM_CHANNELS,
|
|
1279
|
+
audioData.length / 2
|
|
1280
|
+
)
|
|
1281
|
+
]
|
|
1282
|
+
}
|
|
1283
|
+
]
|
|
1284
|
+
});
|
|
1285
|
+
}
|
|
1286
|
+
function toOaiToolChoice(toolChoice) {
|
|
1287
|
+
if (typeof toolChoice === "string") {
|
|
1288
|
+
return toolChoice;
|
|
1289
|
+
}
|
|
1290
|
+
if ((toolChoice == null ? void 0 : toolChoice.type) === "function") {
|
|
1291
|
+
return toolChoice.function.name;
|
|
1292
|
+
}
|
|
1293
|
+
return "auto";
|
|
1294
|
+
}
|
|
1295
|
+
// Annotate the CommonJS export names for ESM import in node:
|
|
1296
|
+
0 && (module.exports = {
|
|
1297
|
+
RealtimeModel,
|
|
1298
|
+
RealtimeSession
|
|
1299
|
+
});
|
|
1300
|
+
//# sourceMappingURL=realtime_model_beta.cjs.map
|