@keyframelabs/elements 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -28
- package/dist/agents/audio-utils.d.ts +1 -1
- package/dist/agents/elevenlabs.d.ts +11 -0
- package/dist/agents/index.d.ts +8 -16
- package/dist/agents/openai-realtime.d.ts +60 -0
- package/dist/agents/types.d.ts +2 -2
- package/dist/index.d.ts +2 -2
- package/dist/index.js +355 -371
- package/dist/types.d.ts +4 -1
- package/package.json +1 -1
- package/dist/agents/cartesia.d.ts +0 -32
- package/dist/agents/gemini-live.d.ts +0 -25
- package/dist/agents/vapi.d.ts +0 -30
package/dist/index.js
CHANGED
|
@@ -1,60 +1,60 @@
|
|
|
1
|
-
import { createClient as
|
|
2
|
-
const
|
|
3
|
-
function
|
|
4
|
-
const e = atob(
|
|
5
|
-
for (let
|
|
6
|
-
t[
|
|
1
|
+
import { createClient as _ } from "@keyframelabs/sdk";
|
|
2
|
+
const u = 24e3;
|
|
3
|
+
function f(s) {
|
|
4
|
+
const e = atob(s), t = new Uint8Array(e.length);
|
|
5
|
+
for (let n = 0; n < e.length; n++)
|
|
6
|
+
t[n] = e.charCodeAt(n);
|
|
7
7
|
return t;
|
|
8
8
|
}
|
|
9
|
-
function
|
|
9
|
+
function S(s) {
|
|
10
10
|
let e = "";
|
|
11
|
-
for (let t = 0; t <
|
|
12
|
-
e += String.fromCharCode(
|
|
11
|
+
for (let t = 0; t < s.length; t++)
|
|
12
|
+
e += String.fromCharCode(s[t]);
|
|
13
13
|
return btoa(e);
|
|
14
14
|
}
|
|
15
|
-
function
|
|
15
|
+
function p(s, e, t) {
|
|
16
16
|
if (e === t)
|
|
17
|
-
return
|
|
18
|
-
const
|
|
19
|
-
for (let
|
|
20
|
-
const
|
|
21
|
-
|
|
22
|
-
|
|
17
|
+
return s;
|
|
18
|
+
const n = new Int16Array(s.buffer, s.byteOffset, s.length / 2), a = e / t, i = Math.floor(n.length / a), c = new Int16Array(i);
|
|
19
|
+
for (let l = 0; l < i; l++) {
|
|
20
|
+
const m = l * a, d = Math.floor(m), w = Math.min(d + 1, n.length - 1), g = m - d;
|
|
21
|
+
c[l] = Math.round(
|
|
22
|
+
n[d] * (1 - g) + n[w] * g
|
|
23
23
|
);
|
|
24
24
|
}
|
|
25
|
-
return new Uint8Array(
|
|
25
|
+
return new Uint8Array(c.buffer);
|
|
26
26
|
}
|
|
27
|
-
function
|
|
28
|
-
const
|
|
27
|
+
function C() {
|
|
28
|
+
const s = /* @__PURE__ */ new Map();
|
|
29
29
|
return {
|
|
30
30
|
on(e, t) {
|
|
31
|
-
|
|
31
|
+
s.has(e) || s.set(e, /* @__PURE__ */ new Set()), s.get(e).add(t);
|
|
32
32
|
},
|
|
33
33
|
off(e, t) {
|
|
34
|
-
|
|
34
|
+
s.get(e)?.delete(t);
|
|
35
35
|
},
|
|
36
36
|
emit(e, t) {
|
|
37
|
-
|
|
37
|
+
s.get(e)?.forEach((n) => n(t));
|
|
38
38
|
},
|
|
39
39
|
removeAllListeners() {
|
|
40
|
-
|
|
40
|
+
s.clear();
|
|
41
41
|
}
|
|
42
42
|
};
|
|
43
43
|
}
|
|
44
|
-
function
|
|
45
|
-
const e = new Int16Array(
|
|
46
|
-
for (let t = 0; t <
|
|
47
|
-
const
|
|
48
|
-
e[t] =
|
|
44
|
+
function v(s) {
|
|
45
|
+
const e = new Int16Array(s.length);
|
|
46
|
+
for (let t = 0; t < s.length; t++) {
|
|
47
|
+
const n = Math.max(-1, Math.min(1, s[t]));
|
|
48
|
+
e[t] = n < 0 ? n * 32768 : n * 32767;
|
|
49
49
|
}
|
|
50
50
|
return new Uint8Array(e.buffer);
|
|
51
51
|
}
|
|
52
|
-
const
|
|
53
|
-
class
|
|
52
|
+
const E = 16e3;
|
|
53
|
+
class y {
|
|
54
54
|
ws = null;
|
|
55
55
|
_state = "idle";
|
|
56
|
-
events =
|
|
57
|
-
inputSampleRate =
|
|
56
|
+
events = C();
|
|
57
|
+
inputSampleRate = E;
|
|
58
58
|
/** Current agent state */
|
|
59
59
|
get state() {
|
|
60
60
|
return this._state;
|
|
@@ -113,84 +113,8 @@ class u {
|
|
|
113
113
|
this.events.emit("closed", { code: e, reason: t });
|
|
114
114
|
}
|
|
115
115
|
}
|
|
116
|
-
const A = "
|
|
117
|
-
class R extends
|
|
118
|
-
agentName = "GeminiLive";
|
|
119
|
-
async connect(e) {
|
|
120
|
-
if (this.ws)
|
|
121
|
-
throw new Error("Already connected");
|
|
122
|
-
if (!e.apiKey)
|
|
123
|
-
throw new Error("Gemini API key is required");
|
|
124
|
-
e.inputSampleRate && (this.inputSampleRate = e.inputSampleRate);
|
|
125
|
-
const t = e.model ?? A, n = (e.authType ?? "api_key") === "ephemeral_token" ? `${k}?access_token=${encodeURIComponent(e.apiKey)}` : `${I}?key=${encodeURIComponent(e.apiKey)}`;
|
|
126
|
-
return new Promise((a, d) => {
|
|
127
|
-
this.ws = new WebSocket(n), this.ws.onopen = () => {
|
|
128
|
-
const o = {
|
|
129
|
-
setup: {
|
|
130
|
-
model: `models/${t}`,
|
|
131
|
-
generationConfig: {
|
|
132
|
-
responseModalities: ["AUDIO"]
|
|
133
|
-
},
|
|
134
|
-
systemInstruction: e.systemPrompt ? { parts: [{ text: e.systemPrompt }] } : void 0
|
|
135
|
-
}
|
|
136
|
-
};
|
|
137
|
-
this.ws.send(JSON.stringify(o)), this.setState("listening"), a();
|
|
138
|
-
}, this.ws.onerror = () => {
|
|
139
|
-
d(new Error("Failed to connect to Gemini Live"));
|
|
140
|
-
}, this.ws.onclose = (o) => {
|
|
141
|
-
this.ws = null, this.setState("idle"), this.emitClosed(o.code, o.reason);
|
|
142
|
-
}, this.ws.onmessage = (o) => {
|
|
143
|
-
this.handleMessage(o.data);
|
|
144
|
-
};
|
|
145
|
-
});
|
|
146
|
-
}
|
|
147
|
-
handleParsedMessage(e) {
|
|
148
|
-
const s = e.serverContent;
|
|
149
|
-
if (s) {
|
|
150
|
-
if (s.interrupted) {
|
|
151
|
-
this.events.emit("interrupted", void 0), this.setState("listening");
|
|
152
|
-
return;
|
|
153
|
-
}
|
|
154
|
-
if (s.turnComplete) {
|
|
155
|
-
this.events.emit("turnEnd", void 0), this.setState("listening");
|
|
156
|
-
return;
|
|
157
|
-
}
|
|
158
|
-
if (s.modelTurn?.parts) {
|
|
159
|
-
this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
|
|
160
|
-
for (const n of s.modelTurn.parts) {
|
|
161
|
-
if (n.inlineData?.data) {
|
|
162
|
-
const a = g(n.inlineData.data);
|
|
163
|
-
this.events.emit("audio", a);
|
|
164
|
-
}
|
|
165
|
-
n.text && this.events.emit("transcript", {
|
|
166
|
-
role: "assistant",
|
|
167
|
-
text: n.text,
|
|
168
|
-
isFinal: !0
|
|
169
|
-
});
|
|
170
|
-
}
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
sendAudio(e) {
|
|
175
|
-
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
|
|
176
|
-
console.warn("[GeminiLive] Cannot send audio: not connected");
|
|
177
|
-
return;
|
|
178
|
-
}
|
|
179
|
-
const t = {
|
|
180
|
-
realtimeInput: {
|
|
181
|
-
mediaChunks: [
|
|
182
|
-
{
|
|
183
|
-
mimeType: `audio/pcm;rate=${this.inputSampleRate}`,
|
|
184
|
-
data: m(e)
|
|
185
|
-
}
|
|
186
|
-
]
|
|
187
|
-
}
|
|
188
|
-
};
|
|
189
|
-
this.ws.send(JSON.stringify(t));
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
const M = ["neutral", "angry", "sad", "happy"], T = "wss://api.elevenlabs.io/v1/convai/conversation";
|
|
193
|
-
class _ extends u {
|
|
116
|
+
const A = ["neutral", "angry", "sad", "happy"], I = "wss://api.elevenlabs.io/v1/convai/conversation";
|
|
117
|
+
class R extends y {
|
|
194
118
|
agentName = "ElevenLabs";
|
|
195
119
|
outputSampleRate = 24e3;
|
|
196
120
|
// Default, updated from metadata
|
|
@@ -202,6 +126,12 @@ class _ extends u {
|
|
|
202
126
|
// True after conversation_initiation_metadata received
|
|
203
127
|
lastInterruptId = 0;
|
|
204
128
|
// Track interruptions to filter stale audio
|
|
129
|
+
// Virtual buffer turn-end detection: track audio duration and emit turnEnd
|
|
130
|
+
// when agent_response has arrived and all audio "would have" finished playing.
|
|
131
|
+
agentResponseReceived = !1;
|
|
132
|
+
turnStartTime = 0;
|
|
133
|
+
accumulatedDurationMs = 0;
|
|
134
|
+
turnEndTimer = null;
|
|
205
135
|
async connect(e) {
|
|
206
136
|
if (this.ws)
|
|
207
137
|
throw new Error("Already connected");
|
|
@@ -209,15 +139,15 @@ class _ extends u {
|
|
|
209
139
|
throw new Error("ElevenLabs agent ID or signed URL is required");
|
|
210
140
|
e.inputSampleRate && (this.sourceInputSampleRate = e.inputSampleRate);
|
|
211
141
|
let t;
|
|
212
|
-
return e.signedUrl ? t = e.signedUrl : (t = `${
|
|
142
|
+
return e.signedUrl ? t = e.signedUrl : (t = `${I}?agent_id=${e.agentId}`, e.apiKey && (t += `&xi-api-key=${e.apiKey}`)), new Promise((n, a) => {
|
|
213
143
|
this.ws = new WebSocket(t), this.ws.onopen = () => {
|
|
214
|
-
this.setState("listening"),
|
|
144
|
+
this.setState("listening"), n();
|
|
215
145
|
}, this.ws.onerror = () => {
|
|
216
|
-
|
|
217
|
-
}, this.ws.onclose = (
|
|
218
|
-
this.ws = null, this.setState("idle"), this.emitClosed(
|
|
219
|
-
}, this.ws.onmessage = (
|
|
220
|
-
this.handleMessage(
|
|
146
|
+
a(new Error("Failed to connect to ElevenLabs"));
|
|
147
|
+
}, this.ws.onclose = (i) => {
|
|
148
|
+
this.ws = null, this.setState("idle"), this.emitClosed(i.code, i.reason);
|
|
149
|
+
}, this.ws.onmessage = (i) => {
|
|
150
|
+
this.handleMessage(i.data);
|
|
221
151
|
};
|
|
222
152
|
});
|
|
223
153
|
}
|
|
@@ -254,12 +184,12 @@ class _ extends u {
|
|
|
254
184
|
const t = e.conversation_initiation_metadata_event;
|
|
255
185
|
if (t) {
|
|
256
186
|
if (t.agent_output_audio_format) {
|
|
257
|
-
const
|
|
258
|
-
|
|
187
|
+
const n = t.agent_output_audio_format.match(/pcm_(\d+)/);
|
|
188
|
+
n && (this.outputSampleRate = parseInt(n[1], 10));
|
|
259
189
|
}
|
|
260
190
|
if (t.user_input_audio_format) {
|
|
261
|
-
const
|
|
262
|
-
|
|
191
|
+
const n = t.user_input_audio_format.match(/pcm_(\d+)/);
|
|
192
|
+
n && (this.expectedInputSampleRate = parseInt(n[1], 10));
|
|
263
193
|
}
|
|
264
194
|
this.initialized = !0;
|
|
265
195
|
}
|
|
@@ -275,8 +205,12 @@ class _ extends u {
|
|
|
275
205
|
if (!t?.audio_base_64 || (t.event_id ?? 0) <= this.lastInterruptId)
|
|
276
206
|
return;
|
|
277
207
|
this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
|
|
278
|
-
let
|
|
279
|
-
this.outputSampleRate !==
|
|
208
|
+
let a = f(t.audio_base_64);
|
|
209
|
+
this.outputSampleRate !== u && (a = p(a, this.outputSampleRate, u)), this.events.emit("audio", a);
|
|
210
|
+
const i = a.length / 2 / u * 1e3;
|
|
211
|
+
this.turnStartTime === 0 && (this.turnStartTime = Date.now()), this.accumulatedDurationMs += i, console.debug(
|
|
212
|
+
`[ElevenLabs] audio chunk: ${a.length} bytes, +${i.toFixed(0)}ms, totalDuration=${this.accumulatedDurationMs.toFixed(0)}ms, agentResponse=${this.agentResponseReceived}`
|
|
213
|
+
), this.scheduleVirtualBufferCheck();
|
|
280
214
|
}
|
|
281
215
|
handleUserTranscript(e) {
|
|
282
216
|
const t = e.user_transcription_event;
|
|
@@ -288,18 +222,38 @@ class _ extends u {
|
|
|
288
222
|
}
|
|
289
223
|
handleAgentResponse(e) {
|
|
290
224
|
const t = e.agent_response_event;
|
|
291
|
-
t?.agent_response && (this.events.emit("
|
|
225
|
+
t?.agent_response && (this.events.emit("transcript", {
|
|
292
226
|
role: "assistant",
|
|
293
227
|
text: t.agent_response,
|
|
294
228
|
isFinal: !0
|
|
295
|
-
})
|
|
229
|
+
}), this.agentResponseReceived = !0, console.debug(
|
|
230
|
+
`[ElevenLabs] agent_response received: totalDuration=${this.accumulatedDurationMs.toFixed(0)}ms, text="${t.agent_response.slice(0, 60)}${t.agent_response.length > 60 ? "..." : ""}"`
|
|
231
|
+
), this.scheduleVirtualBufferCheck());
|
|
232
|
+
}
|
|
233
|
+
/**
|
|
234
|
+
* Schedule a timer to emit turnEnd when the virtual audio buffer
|
|
235
|
+
* "would have" finished playing. Replicates the ElevenLabs SDK's
|
|
236
|
+
* AudioWorklet buffer-empty detection without requiring local playback.
|
|
237
|
+
*/
|
|
238
|
+
scheduleVirtualBufferCheck() {
|
|
239
|
+
if (!this.agentResponseReceived || this.turnStartTime === 0) return;
|
|
240
|
+
this.turnEndTimer !== null && clearTimeout(this.turnEndTimer);
|
|
241
|
+
const e = Date.now() - this.turnStartTime, t = Math.max(0, this.accumulatedDurationMs - e);
|
|
242
|
+
console.debug(
|
|
243
|
+
`[ElevenLabs] virtual buffer: elapsed=${e.toFixed(0)}ms, accumulated=${this.accumulatedDurationMs.toFixed(0)}ms, remaining=${t.toFixed(0)}ms`
|
|
244
|
+
), this.turnEndTimer = setTimeout(() => {
|
|
245
|
+
this.turnEndTimer = null, console.debug("[ElevenLabs] virtual buffer drained, emitting turnEnd"), this.resetTurnState(), this.events.emit("turnEnd", void 0), this.setState("listening");
|
|
246
|
+
}, t);
|
|
247
|
+
}
|
|
248
|
+
resetTurnState() {
|
|
249
|
+
this.agentResponseReceived = !1, this.turnStartTime = 0, this.accumulatedDurationMs = 0, this.turnEndTimer !== null && (clearTimeout(this.turnEndTimer), this.turnEndTimer = null);
|
|
296
250
|
}
|
|
297
251
|
handleClientToolCall(e) {
|
|
298
252
|
const t = e.client_tool_call;
|
|
299
253
|
if (t) {
|
|
300
254
|
if (t.tool_name === "set_emotion") {
|
|
301
|
-
const
|
|
302
|
-
|
|
255
|
+
const n = t.parameters?.emotion?.toLowerCase();
|
|
256
|
+
n && A.includes(n) && this.events.emit("emotion", n);
|
|
303
257
|
}
|
|
304
258
|
this.ws && this.ws.readyState === WebSocket.OPEN && this.ws.send(JSON.stringify({
|
|
305
259
|
type: "client_tool_result",
|
|
@@ -311,14 +265,16 @@ class _ extends u {
|
|
|
311
265
|
}
|
|
312
266
|
handleInterruption(e) {
|
|
313
267
|
const t = e.interruption_event;
|
|
314
|
-
t?.event_id && (this.lastInterruptId = t.event_id), this.
|
|
268
|
+
t?.event_id && (this.lastInterruptId = t.event_id), (this.agentResponseReceived || this.accumulatedDurationMs > 0) && console.debug(
|
|
269
|
+
`[ElevenLabs] interruption: discarding pending turn (duration=${this.accumulatedDurationMs.toFixed(0)}ms, agentResponse=${this.agentResponseReceived})`
|
|
270
|
+
), this.resetTurnState(), this.events.emit("interrupted", void 0), this.setState("listening");
|
|
315
271
|
}
|
|
316
272
|
sendAudio(e) {
|
|
317
273
|
if (!this.ws || this.ws.readyState !== WebSocket.OPEN || !this.initialized)
|
|
318
274
|
return;
|
|
319
275
|
let t = e;
|
|
320
|
-
this.sourceInputSampleRate !== this.expectedInputSampleRate && (t =
|
|
321
|
-
user_audio_chunk:
|
|
276
|
+
this.sourceInputSampleRate !== this.expectedInputSampleRate && (t = p(e, this.sourceInputSampleRate, this.expectedInputSampleRate)), this.ws.send(JSON.stringify({
|
|
277
|
+
user_audio_chunk: S(t)
|
|
322
278
|
}));
|
|
323
279
|
}
|
|
324
280
|
/**
|
|
@@ -348,217 +304,256 @@ class _ extends u {
|
|
|
348
304
|
}));
|
|
349
305
|
}
|
|
350
306
|
close() {
|
|
351
|
-
this.initialized = !1, this.lastInterruptId = 0, super.close();
|
|
307
|
+
this.initialized = !1, this.lastInterruptId = 0, this.resetTurnState(), super.close();
|
|
352
308
|
}
|
|
353
309
|
}
|
|
354
|
-
const
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
310
|
+
const T = ["neutral", "angry", "sad", "happy"], k = "wss://api.openai.com/v1/realtime", M = "gpt-realtime", h = 24e3, O = {
|
|
311
|
+
type: "function",
|
|
312
|
+
name: "set_emotion",
|
|
313
|
+
description: "Set the emotional expression of the avatar. Call this on every turn to reflect the tone of your response.",
|
|
314
|
+
parameters: {
|
|
315
|
+
type: "object",
|
|
316
|
+
properties: {
|
|
317
|
+
emotion: {
|
|
318
|
+
type: "string",
|
|
319
|
+
enum: ["neutral", "angry", "sad", "happy"],
|
|
320
|
+
description: "The emotion to display"
|
|
321
|
+
}
|
|
322
|
+
},
|
|
323
|
+
required: ["emotion"]
|
|
324
|
+
}
|
|
325
|
+
};
|
|
326
|
+
class P extends y {
|
|
327
|
+
agentName = "OpenAIRealtime";
|
|
328
|
+
connectResolve = null;
|
|
329
|
+
connectReject = null;
|
|
330
|
+
connectTimeout = null;
|
|
331
|
+
initialSessionUpdate = null;
|
|
332
|
+
currentResponseHasAudio = !1;
|
|
333
|
+
currentTranscript = "";
|
|
334
|
+
handledFunctionCallIds = /* @__PURE__ */ new Set();
|
|
335
|
+
sourceInputSampleRate = 16e3;
|
|
336
|
+
pendingFunctionCallStartedAtMs = null;
|
|
337
|
+
pendingFunctionCallNames = [];
|
|
366
338
|
async connect(e) {
|
|
367
339
|
if (this.ws)
|
|
368
340
|
throw new Error("Already connected");
|
|
369
|
-
if (!e.agentId)
|
|
370
|
-
throw new Error("Cartesia Agent ID is required");
|
|
371
341
|
if (!e.apiKey)
|
|
372
|
-
throw new Error("
|
|
373
|
-
e.inputSampleRate && (this.
|
|
374
|
-
const t =
|
|
375
|
-
return new Promise((
|
|
376
|
-
this.
|
|
377
|
-
this.
|
|
342
|
+
throw new Error("OpenAI Realtime token is required");
|
|
343
|
+
e.inputSampleRate && (this.sourceInputSampleRate = e.inputSampleRate);
|
|
344
|
+
const t = e.model ?? M;
|
|
345
|
+
return this.initialSessionUpdate = this.buildSessionUpdate(e, t), new Promise((n, a) => {
|
|
346
|
+
this.connectResolve = n, this.connectReject = a, this.connectTimeout = setTimeout(() => {
|
|
347
|
+
this.rejectPendingConnect(new Error("Timed out waiting for OpenAI Realtime session setup")), this.close();
|
|
348
|
+
}, 1e4), this.ws = new WebSocket(
|
|
349
|
+
`${k}?model=${encodeURIComponent(t)}`,
|
|
350
|
+
["realtime", `openai-insecure-api-key.${e.apiKey}`]
|
|
351
|
+
), this.ws.onopen = () => {
|
|
378
352
|
}, this.ws.onerror = () => {
|
|
379
|
-
|
|
380
|
-
}, this.ws.onclose = (
|
|
381
|
-
this.
|
|
382
|
-
|
|
383
|
-
|
|
353
|
+
this.rejectPendingConnect(new Error("Failed to connect to OpenAI Realtime"));
|
|
354
|
+
}, this.ws.onclose = (i) => {
|
|
355
|
+
if (this.clearConnectTimeout(), this.connectReject) {
|
|
356
|
+
const c = i.reason ? `: ${i.reason}` : "";
|
|
357
|
+
this.rejectPendingConnect(new Error(`OpenAI Realtime closed before initialization (${i.code}${c})`));
|
|
358
|
+
}
|
|
359
|
+
this.resetTurnState(), this.initialSessionUpdate = null, this.ws = null, this.setState("idle"), this.emitClosed(i.code, i.reason);
|
|
360
|
+
}, this.ws.onmessage = (i) => {
|
|
361
|
+
this.handleMessage(i.data);
|
|
384
362
|
};
|
|
385
363
|
});
|
|
386
364
|
}
|
|
387
|
-
sendStartEvent() {
|
|
388
|
-
if (!this.ws) return;
|
|
389
|
-
const e = {
|
|
390
|
-
event: "start",
|
|
391
|
-
config: {
|
|
392
|
-
input_format: this.cartesiaInputFormat
|
|
393
|
-
}
|
|
394
|
-
};
|
|
395
|
-
this.ws.send(JSON.stringify(e));
|
|
396
|
-
}
|
|
397
|
-
/**
|
|
398
|
-
* Keep connection alive with periodic custom events.
|
|
399
|
-
* Cartesia requires activity every 30s.
|
|
400
|
-
*/
|
|
401
|
-
startHeartbeat() {
|
|
402
|
-
this.pingInterval = window.setInterval(() => {
|
|
403
|
-
this.ws?.readyState === WebSocket.OPEN && this.streamId && this.ws.send(JSON.stringify({
|
|
404
|
-
event: "custom",
|
|
405
|
-
stream_id: this.streamId,
|
|
406
|
-
metadata: { keepalive: !0 }
|
|
407
|
-
}));
|
|
408
|
-
}, 2e4);
|
|
409
|
-
}
|
|
410
|
-
stopHeartbeat() {
|
|
411
|
-
this.pingInterval && (clearInterval(this.pingInterval), this.pingInterval = null);
|
|
412
|
-
}
|
|
413
365
|
handleParsedMessage(e) {
|
|
414
366
|
const t = e;
|
|
415
|
-
switch (t.
|
|
416
|
-
case "
|
|
417
|
-
this.
|
|
367
|
+
switch (t.type) {
|
|
368
|
+
case "session.created":
|
|
369
|
+
this.sendInitialSessionUpdate();
|
|
418
370
|
break;
|
|
419
|
-
case "
|
|
420
|
-
this.
|
|
371
|
+
case "session.updated":
|
|
372
|
+
this.clearConnectTimeout(), this.setState("listening"), this.resolvePendingConnect();
|
|
421
373
|
break;
|
|
422
|
-
case "
|
|
423
|
-
|
|
374
|
+
case "response.output_audio.delta":
|
|
375
|
+
case "response.audio.delta":
|
|
376
|
+
if (!t.delta)
|
|
377
|
+
return;
|
|
378
|
+
if (!this.currentResponseHasAudio) {
|
|
379
|
+
if (this.pendingFunctionCallStartedAtMs !== null) {
|
|
380
|
+
const n = performance.now() - this.pendingFunctionCallStartedAtMs;
|
|
381
|
+
console.debug("[OpenAIRealtime] Function call latency", {
|
|
382
|
+
calls: this.pendingFunctionCallNames,
|
|
383
|
+
latencyMs: Math.round(n)
|
|
384
|
+
}), this.pendingFunctionCallStartedAtMs = null, this.pendingFunctionCallNames = [];
|
|
385
|
+
}
|
|
386
|
+
this.currentResponseHasAudio = !0, this.events.emit("turnStart", void 0), this.setState("speaking");
|
|
387
|
+
}
|
|
388
|
+
this.events.emit("audio", f(t.delta));
|
|
389
|
+
break;
|
|
390
|
+
case "response.output_audio_transcript.delta":
|
|
391
|
+
if (!t.delta)
|
|
392
|
+
return;
|
|
393
|
+
this.currentTranscript += t.delta, this.events.emit("transcript", {
|
|
394
|
+
role: "assistant",
|
|
395
|
+
text: this.currentTranscript,
|
|
396
|
+
isFinal: !1
|
|
397
|
+
});
|
|
398
|
+
break;
|
|
399
|
+
case "response.output_audio_transcript.done":
|
|
400
|
+
if (!t.transcript)
|
|
401
|
+
return;
|
|
402
|
+
this.currentTranscript = t.transcript, this.events.emit("transcript", {
|
|
403
|
+
role: "assistant",
|
|
404
|
+
text: t.transcript,
|
|
405
|
+
isFinal: !0
|
|
406
|
+
});
|
|
407
|
+
break;
|
|
408
|
+
case "input_audio_buffer.speech_started":
|
|
409
|
+
this.resetTurnState(), this.events.emit("interrupted", void 0), this.setState("listening");
|
|
424
410
|
break;
|
|
425
|
-
case "
|
|
426
|
-
|
|
411
|
+
case "response.done":
|
|
412
|
+
this.handleResponseDone(t.response);
|
|
427
413
|
break;
|
|
414
|
+
case "error": {
|
|
415
|
+
const n = t.error?.message ?? t.message ?? "Unknown OpenAI Realtime error";
|
|
416
|
+
this.rejectPendingConnect(new Error(n)), console.error("[OpenAIRealtime] Server error:", t);
|
|
417
|
+
break;
|
|
418
|
+
}
|
|
428
419
|
}
|
|
429
420
|
}
|
|
430
|
-
handleAck(e) {
|
|
431
|
-
this.streamId = e.stream_id || null, this.isReady = !0, this.setState("listening");
|
|
432
|
-
}
|
|
433
|
-
handleMediaOutput(e) {
|
|
434
|
-
if (!e.media?.payload) return;
|
|
435
|
-
this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
|
|
436
|
-
let t = g(e.media.payload);
|
|
437
|
-
this.cartesiaOutputRate !== l && (t = h(t, this.cartesiaOutputRate, l)), this.events.emit("audio", t);
|
|
438
|
-
}
|
|
439
|
-
handleClear() {
|
|
440
|
-
this.events.emit("interrupted", void 0), this.setState("listening");
|
|
441
|
-
}
|
|
442
421
|
sendAudio(e) {
|
|
443
|
-
if (!this.ws || this.ws.readyState !== WebSocket.OPEN
|
|
422
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
|
|
423
|
+
console.warn("[OpenAIRealtime] Cannot send audio: not connected");
|
|
444
424
|
return;
|
|
425
|
+
}
|
|
445
426
|
let t = e;
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
stream_id: this.streamId,
|
|
450
|
-
media: {
|
|
451
|
-
payload: m(t)
|
|
452
|
-
}
|
|
427
|
+
this.sourceInputSampleRate !== h && (t = p(e, this.sourceInputSampleRate, h)), this.ws.send(JSON.stringify({
|
|
428
|
+
type: "input_audio_buffer.append",
|
|
429
|
+
audio: S(t)
|
|
453
430
|
}));
|
|
454
431
|
}
|
|
455
432
|
close() {
|
|
456
|
-
this.
|
|
433
|
+
this.rejectPendingConnect(new Error("Connection closed")), this.clearConnectTimeout(), this.resetTurnState(), this.initialSessionUpdate = null, this.handledFunctionCallIds.clear(), super.close();
|
|
434
|
+
}
|
|
435
|
+
buildSessionUpdate(e, t) {
|
|
436
|
+
const n = e.turnDetection ?? { type: "semantic_vad", eagerness: "high" };
|
|
437
|
+
return {
|
|
438
|
+
type: "session.update",
|
|
439
|
+
session: {
|
|
440
|
+
type: "realtime",
|
|
441
|
+
model: t,
|
|
442
|
+
output_modalities: ["audio"],
|
|
443
|
+
instructions: e.systemPrompt,
|
|
444
|
+
audio: {
|
|
445
|
+
input: {
|
|
446
|
+
format: {
|
|
447
|
+
type: "audio/pcm",
|
|
448
|
+
rate: h
|
|
449
|
+
},
|
|
450
|
+
turn_detection: n
|
|
451
|
+
},
|
|
452
|
+
output: {
|
|
453
|
+
format: {
|
|
454
|
+
type: "audio/pcm",
|
|
455
|
+
rate: u
|
|
456
|
+
},
|
|
457
|
+
...e.voice ? { voice: e.voice } : {}
|
|
458
|
+
}
|
|
459
|
+
},
|
|
460
|
+
tools: [O],
|
|
461
|
+
tool_choice: "auto"
|
|
462
|
+
}
|
|
463
|
+
};
|
|
457
464
|
}
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
agentName = "Vapi";
|
|
461
|
-
// Audio configuration - Vapi uses 16kHz PCM by default
|
|
462
|
-
vapiSampleRate = 16e3;
|
|
463
|
-
async connect(e) {
|
|
464
|
-
if (this.ws)
|
|
465
|
-
throw new Error("Already connected");
|
|
466
|
-
if (!e.signedUrl)
|
|
467
|
-
throw new Error("Vapi signed URL is required");
|
|
468
|
-
return e.inputSampleRate && (this.inputSampleRate = e.inputSampleRate), new Promise((t, s) => {
|
|
469
|
-
this.ws = new WebSocket(e.signedUrl), this.ws.binaryType = "arraybuffer", this.ws.onopen = () => {
|
|
470
|
-
this.setState("listening"), t();
|
|
471
|
-
}, this.ws.onerror = () => {
|
|
472
|
-
s(new Error("Failed to connect to Vapi"));
|
|
473
|
-
}, this.ws.onclose = (n) => {
|
|
474
|
-
this.ws = null, this.setState("idle"), this.emitClosed(n.code, n.reason);
|
|
475
|
-
}, this.ws.onmessage = (n) => {
|
|
476
|
-
n.data instanceof ArrayBuffer ? this.handleBinaryAudio(n.data) : this.handleMessage(n.data);
|
|
477
|
-
};
|
|
478
|
-
});
|
|
465
|
+
sendInitialSessionUpdate() {
|
|
466
|
+
this.initialSessionUpdate && this.sendEvent(this.initialSessionUpdate);
|
|
479
467
|
}
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
468
|
+
handleResponseDone(e) {
|
|
469
|
+
if (!e?.output?.length) {
|
|
470
|
+
this.currentResponseHasAudio && this.finishAudioTurn();
|
|
471
|
+
return;
|
|
472
|
+
}
|
|
473
|
+
const t = e.output.filter(x);
|
|
474
|
+
if (t.length > 0) {
|
|
475
|
+
this.handleFunctionCalls(t);
|
|
476
|
+
return;
|
|
477
|
+
}
|
|
478
|
+
this.currentResponseHasAudio && this.finishAudioTurn();
|
|
479
|
+
}
|
|
480
|
+
handleFunctionCalls(e) {
|
|
481
|
+
let t = !1;
|
|
482
|
+
const n = [];
|
|
483
|
+
for (const a of e) {
|
|
484
|
+
if (!a.call_id || this.handledFunctionCallIds.has(a.call_id))
|
|
485
|
+
continue;
|
|
486
|
+
this.handledFunctionCallIds.add(a.call_id), n.push(a.name ?? "unknown");
|
|
487
|
+
const i = this.handleFunctionCall(a);
|
|
488
|
+
this.sendEvent({
|
|
489
|
+
type: "conversation.item.create",
|
|
490
|
+
item: {
|
|
491
|
+
type: "function_call_output",
|
|
492
|
+
call_id: a.call_id,
|
|
493
|
+
output: JSON.stringify(i)
|
|
494
|
+
}
|
|
495
|
+
}), t = !0;
|
|
496
|
+
}
|
|
497
|
+
t && (this.pendingFunctionCallStartedAtMs = performance.now(), this.pendingFunctionCallNames = n, console.debug("[OpenAIRealtime] Function call received", {
|
|
498
|
+
calls: n
|
|
499
|
+
}), this.sendEvent({ type: "response.create" }));
|
|
488
500
|
}
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
}) : t.role === "assistant" && t.transcript && this.events.emit("transcript", {
|
|
498
|
-
role: "assistant",
|
|
499
|
-
text: t.transcript,
|
|
500
|
-
isFinal: !0
|
|
501
|
-
});
|
|
502
|
-
break;
|
|
503
|
-
case "speech-update":
|
|
504
|
-
t.status === "started" ? (this.events.emit("turnStart", void 0), this.setState("speaking")) : t.status === "stopped" && (this.events.emit("turnEnd", void 0), this.setState("listening"));
|
|
505
|
-
break;
|
|
506
|
-
case "transcript":
|
|
507
|
-
this.events.emit("transcript", {
|
|
508
|
-
role: t.role === "user" ? "user" : "assistant",
|
|
509
|
-
text: t.transcript || "",
|
|
510
|
-
isFinal: t.transcriptType === "final"
|
|
511
|
-
});
|
|
512
|
-
break;
|
|
513
|
-
case "hang":
|
|
514
|
-
case "end-of-call-report":
|
|
515
|
-
this.events.emit("turnEnd", void 0), this.setState("idle");
|
|
516
|
-
break;
|
|
517
|
-
case "error":
|
|
518
|
-
console.error("[Vapi] Server error:", t);
|
|
519
|
-
break;
|
|
501
|
+
handleFunctionCall(e) {
|
|
502
|
+
if (e.name !== "set_emotion")
|
|
503
|
+
return { error: `Unsupported function: ${e.name}` };
|
|
504
|
+
try {
|
|
505
|
+
const n = (e.arguments ? JSON.parse(e.arguments) : {}).emotion?.toLowerCase();
|
|
506
|
+
return n && T.includes(n) ? (this.events.emit("emotion", n), { result: "ok" }) : { error: "Invalid emotion" };
|
|
507
|
+
} catch {
|
|
508
|
+
return { error: "Invalid function arguments" };
|
|
520
509
|
}
|
|
521
510
|
}
|
|
522
|
-
|
|
523
|
-
|
|
511
|
+
finishAudioTurn() {
|
|
512
|
+
this.resetTurnState(), this.events.emit("turnEnd", void 0), this.setState("listening");
|
|
513
|
+
}
|
|
514
|
+
resetTurnState() {
|
|
515
|
+
this.currentResponseHasAudio = !1, this.currentTranscript = "";
|
|
516
|
+
}
|
|
517
|
+
sendEvent(e) {
|
|
518
|
+
!this.ws || this.ws.readyState !== WebSocket.OPEN || this.ws.send(JSON.stringify(e));
|
|
519
|
+
}
|
|
520
|
+
resolvePendingConnect() {
|
|
521
|
+
if (!this.connectResolve)
|
|
524
522
|
return;
|
|
525
|
-
|
|
526
|
-
this.
|
|
523
|
+
const e = this.connectResolve;
|
|
524
|
+
this.connectResolve = null, this.connectReject = null, e();
|
|
527
525
|
}
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
this.
|
|
526
|
+
rejectPendingConnect(e) {
|
|
527
|
+
if (!this.connectReject)
|
|
528
|
+
return;
|
|
529
|
+
const t = this.connectReject;
|
|
530
|
+
this.connectResolve = null, this.connectReject = null, t(e);
|
|
533
531
|
}
|
|
534
|
-
|
|
535
|
-
this.
|
|
532
|
+
clearConnectTimeout() {
|
|
533
|
+
this.connectTimeout !== null && (clearTimeout(this.connectTimeout), this.connectTimeout = null);
|
|
536
534
|
}
|
|
537
535
|
}
|
|
538
|
-
|
|
539
|
-
|
|
536
|
+
function x(s) {
|
|
537
|
+
return s.type === "function_call";
|
|
538
|
+
}
|
|
539
|
+
const D = [
|
|
540
540
|
{ id: "elevenlabs", name: "ElevenLabs", description: "ElevenLabs Conversational AI" },
|
|
541
|
-
{ id: "
|
|
542
|
-
{ id: "vapi", name: "Vapi", description: "Vapi WebSocket Transport" }
|
|
541
|
+
{ id: "openai", name: "OpenAI Realtime", description: "OpenAI Realtime API" }
|
|
543
542
|
];
|
|
544
|
-
function
|
|
545
|
-
switch (
|
|
546
|
-
case "gemini":
|
|
547
|
-
return new R();
|
|
543
|
+
function b(s) {
|
|
544
|
+
switch (s) {
|
|
548
545
|
case "elevenlabs":
|
|
549
|
-
return new
|
|
550
|
-
case "
|
|
551
|
-
return new
|
|
552
|
-
case "vapi":
|
|
553
|
-
return new N();
|
|
546
|
+
return new R();
|
|
547
|
+
case "openai":
|
|
548
|
+
return new P();
|
|
554
549
|
default:
|
|
555
|
-
throw new Error(`Unknown agent type: ${
|
|
550
|
+
throw new Error(`Unknown agent type: ${s}`);
|
|
556
551
|
}
|
|
557
552
|
}
|
|
558
|
-
function
|
|
559
|
-
return
|
|
553
|
+
function N(s) {
|
|
554
|
+
return D.find((e) => e.id === s);
|
|
560
555
|
}
|
|
561
|
-
class
|
|
556
|
+
class F extends Error {
|
|
562
557
|
status;
|
|
563
558
|
payload;
|
|
564
559
|
url;
|
|
@@ -566,8 +561,8 @@ class D extends Error {
|
|
|
566
561
|
super(e.message), this.name = "ApiError", this.status = e.status, this.payload = e.payload, this.url = e.url;
|
|
567
562
|
}
|
|
568
563
|
}
|
|
569
|
-
const
|
|
570
|
-
class
|
|
564
|
+
const o = /* @__PURE__ */ new Set();
|
|
565
|
+
class L {
|
|
571
566
|
apiBaseUrl;
|
|
572
567
|
publishableKey;
|
|
573
568
|
callbacks;
|
|
@@ -611,31 +606,31 @@ class K {
|
|
|
611
606
|
}
|
|
612
607
|
/** Connect to the embed session */
|
|
613
608
|
async connect() {
|
|
614
|
-
if (
|
|
609
|
+
if (o.has(this.publishableKey)) {
|
|
615
610
|
console.log("[PersonaEmbed] Connection already in progress, skipping");
|
|
616
611
|
return;
|
|
617
612
|
}
|
|
618
|
-
|
|
613
|
+
o.add(this.publishableKey), this.mounted = !0, this.abortController = new AbortController(), this.setStatus("connecting");
|
|
619
614
|
try {
|
|
620
615
|
const e = await this.fetchSession(this.abortController.signal);
|
|
621
616
|
if (!this.mounted) {
|
|
622
|
-
|
|
617
|
+
o.delete(this.publishableKey);
|
|
623
618
|
return;
|
|
624
619
|
}
|
|
625
620
|
if (await this.initSession(e), await this.initMicrophone(), await this.connectAgent(e.voice_agent_details), !this.mounted) {
|
|
626
|
-
this.cleanup(),
|
|
621
|
+
this.cleanup(), o.delete(this.publishableKey);
|
|
627
622
|
return;
|
|
628
623
|
}
|
|
629
624
|
this.setStatus("connected");
|
|
630
625
|
} catch (e) {
|
|
631
|
-
if (
|
|
626
|
+
if (o.delete(this.publishableKey), e instanceof Error && e.name === "AbortError")
|
|
632
627
|
return;
|
|
633
628
|
console.error("[PersonaEmbed]", e), this.mounted && (this.setStatus("error"), this.callbacks.onError?.(e));
|
|
634
629
|
}
|
|
635
630
|
}
|
|
636
631
|
/** Disconnect and cleanup */
|
|
637
632
|
disconnect() {
|
|
638
|
-
this.mounted = !1, this.abortController?.abort(), this.abortController = null,
|
|
633
|
+
this.mounted = !1, this.abortController?.abort(), this.abortController = null, o.delete(this.publishableKey), this.cleanup(), this.setStatus("disconnected");
|
|
639
634
|
}
|
|
640
635
|
/** Toggle microphone mute */
|
|
641
636
|
toggleMute() {
|
|
@@ -655,31 +650,31 @@ class K {
|
|
|
655
650
|
signal: e
|
|
656
651
|
});
|
|
657
652
|
if (!t.ok) {
|
|
658
|
-
let
|
|
653
|
+
let n;
|
|
659
654
|
try {
|
|
660
|
-
|
|
655
|
+
n = await t.json();
|
|
661
656
|
} catch {
|
|
662
657
|
}
|
|
663
|
-
throw new
|
|
664
|
-
message:
|
|
658
|
+
throw new F({
|
|
659
|
+
message: n?.message ?? "create_session failed",
|
|
665
660
|
status: t.status,
|
|
666
|
-
payload:
|
|
661
|
+
payload: n,
|
|
667
662
|
url: t.url
|
|
668
663
|
});
|
|
669
664
|
}
|
|
670
665
|
if (!t.ok) {
|
|
671
|
-
const
|
|
672
|
-
throw new Error(`create_session failed: ${t.status} ${JSON.stringify(
|
|
666
|
+
const n = await t.json().catch(() => null);
|
|
667
|
+
throw new Error(`create_session failed: ${t.status} ${JSON.stringify(n)}`);
|
|
673
668
|
}
|
|
674
669
|
return t.json();
|
|
675
670
|
}
|
|
676
671
|
async initSession(e) {
|
|
677
|
-
this.session =
|
|
672
|
+
this.session = _({
|
|
678
673
|
serverUrl: e.session_details.server_url,
|
|
679
674
|
participantToken: e.session_details.participant_token,
|
|
680
675
|
agentIdentity: e.session_details.agent_identity,
|
|
681
676
|
onVideoTrack: (t) => {
|
|
682
|
-
console.log("[PersonaEmbed] Setting video track", t.readyState, t.enabled), this._video.srcObject = new MediaStream([t]), this._video.play().catch((
|
|
677
|
+
console.log("[PersonaEmbed] Setting video track", t.readyState, t.enabled), this._video.srcObject = new MediaStream([t]), this._video.play().catch((n) => console.warn("[PersonaEmbed] Video play failed:", n));
|
|
683
678
|
},
|
|
684
679
|
onAudioTrack: (t) => {
|
|
685
680
|
this._audio.srcObject = new MediaStream([t]), this._audio.play().catch(() => {
|
|
@@ -697,11 +692,11 @@ class K {
|
|
|
697
692
|
onClose: () => {
|
|
698
693
|
this.mounted && this.callbacks.onDisconnect?.();
|
|
699
694
|
}
|
|
700
|
-
}), this.agent =
|
|
695
|
+
}), this.agent = b(e.voice_agent_details.type), this.agent.on("audio", (t) => this.session?.sendAudio(t)), this.agent.on("turnEnd", () => this.session?.endAudioTurn()), this.agent.on("interrupted", () => {
|
|
701
696
|
this.session?.endAudioTurn(), this.session?.interrupt();
|
|
702
697
|
}), this.agent.on("closed", () => {
|
|
703
698
|
this.mounted && this.callbacks.onDisconnect?.();
|
|
704
|
-
}), this.agent
|
|
699
|
+
}), this.agent.on("emotion", (t) => this.session?.setEmotion(t)), await this.session.connect();
|
|
705
700
|
}
|
|
706
701
|
async initMicrophone() {
|
|
707
702
|
this.stream = await navigator.mediaDevices.getUserMedia({
|
|
@@ -710,37 +705,32 @@ class K {
|
|
|
710
705
|
const e = this.audioContext.createMediaStreamSource(this.stream);
|
|
711
706
|
this.processor = this.audioContext.createScriptProcessor(4096, 1, 1), this.processor.onaudioprocess = (t) => {
|
|
712
707
|
if (!this._isMuted) {
|
|
713
|
-
const
|
|
714
|
-
this.agent?.sendAudio(
|
|
708
|
+
const n = v(t.inputBuffer.getChannelData(0));
|
|
709
|
+
this.agent?.sendAudio(n);
|
|
715
710
|
}
|
|
716
711
|
}, e.connect(this.processor), this.processor.connect(this.audioContext.destination);
|
|
717
712
|
}
|
|
718
713
|
async connectAgent(e) {
|
|
719
714
|
if (!this.agent) return;
|
|
720
715
|
const t = { inputSampleRate: 16e3 };
|
|
721
|
-
e.type === "
|
|
722
|
-
...t,
|
|
723
|
-
apiKey: e.token,
|
|
724
|
-
authType: "ephemeral_token"
|
|
725
|
-
}) : e.type === "elevenlabs" ? await this.agent.connect({
|
|
716
|
+
e.type === "elevenlabs" ? await this.agent.connect({
|
|
726
717
|
...t,
|
|
727
718
|
agentId: e.agent_id,
|
|
728
719
|
signedUrl: e.signed_url
|
|
729
|
-
}) : e.type === "
|
|
730
|
-
...t,
|
|
731
|
-
agentId: e.agent_id,
|
|
732
|
-
apiKey: e.token
|
|
733
|
-
}) : e.type === "vapi" && await this.agent.connect({
|
|
720
|
+
}) : e.type === "openai" && await this.agent.connect({
|
|
734
721
|
...t,
|
|
735
|
-
|
|
722
|
+
apiKey: e.token,
|
|
723
|
+
systemPrompt: e.system_prompt,
|
|
724
|
+
voice: e.voice,
|
|
725
|
+
turnDetection: e.turn_detection
|
|
736
726
|
});
|
|
737
727
|
}
|
|
738
728
|
cleanup() {
|
|
739
729
|
this.stream?.getTracks().forEach((e) => e.stop()), this.processor?.disconnect(), this.audioContext?.close(), this.agent?.close(), this.session?.close(), this.stream = null, this.processor = null, this.audioContext = null, this.agent = null, this.session = null;
|
|
740
730
|
}
|
|
741
731
|
}
|
|
742
|
-
const
|
|
743
|
-
class
|
|
732
|
+
const r = /* @__PURE__ */ new Set();
|
|
733
|
+
class $ {
|
|
744
734
|
voiceAgentDetails;
|
|
745
735
|
sessionDetails;
|
|
746
736
|
callbacks;
|
|
@@ -784,24 +774,24 @@ class B {
|
|
|
784
774
|
}
|
|
785
775
|
/** Connect to the session */
|
|
786
776
|
async connect() {
|
|
787
|
-
if (
|
|
777
|
+
if (r.has(this.connectionId)) {
|
|
788
778
|
console.log("[PersonaView] Connection already in progress, skipping");
|
|
789
779
|
return;
|
|
790
780
|
}
|
|
791
|
-
|
|
781
|
+
r.add(this.connectionId), this.mounted = !0, this.setStatus("connecting");
|
|
792
782
|
try {
|
|
793
783
|
if (await this.initSession(), await this.initMicrophone(), await this.connectAgent(), !this.mounted) {
|
|
794
|
-
this.cleanup(),
|
|
784
|
+
this.cleanup(), r.delete(this.connectionId);
|
|
795
785
|
return;
|
|
796
786
|
}
|
|
797
787
|
this.setStatus("connected");
|
|
798
788
|
} catch (e) {
|
|
799
|
-
|
|
789
|
+
r.delete(this.connectionId), console.error("[PersonaView]", e), this.mounted && (this.setStatus("error"), this.callbacks.onError?.(e));
|
|
800
790
|
}
|
|
801
791
|
}
|
|
802
792
|
/** Disconnect and cleanup */
|
|
803
793
|
disconnect() {
|
|
804
|
-
this.mounted = !1,
|
|
794
|
+
this.mounted = !1, r.delete(this.connectionId), this.cleanup(), this.setStatus("disconnected");
|
|
805
795
|
}
|
|
806
796
|
/** Toggle microphone mute */
|
|
807
797
|
toggleMute() {
|
|
@@ -814,7 +804,7 @@ class B {
|
|
|
814
804
|
this._agentState !== e && (this._agentState = e, this.callbacks.onAgentStateChange?.(e));
|
|
815
805
|
}
|
|
816
806
|
async initSession() {
|
|
817
|
-
this.session =
|
|
807
|
+
this.session = _({
|
|
818
808
|
serverUrl: this.sessionDetails.server_url,
|
|
819
809
|
participantToken: this.sessionDetails.participant_token,
|
|
820
810
|
agentIdentity: this.sessionDetails.agent_identity,
|
|
@@ -837,11 +827,11 @@ class B {
|
|
|
837
827
|
onClose: () => {
|
|
838
828
|
this.mounted && this.callbacks.onDisconnect?.();
|
|
839
829
|
}
|
|
840
|
-
}), this.agent =
|
|
830
|
+
}), this.agent = b(this.voiceAgentDetails.type), this.agent.on("audio", (e) => this.session?.sendAudio(e)), this.agent.on("turnEnd", () => this.session?.endAudioTurn()), this.agent.on("interrupted", () => {
|
|
841
831
|
this.session?.endAudioTurn(), this.session?.interrupt();
|
|
842
832
|
}), this.agent.on("closed", () => {
|
|
843
833
|
this.mounted && this.callbacks.onDisconnect?.();
|
|
844
|
-
}), this.agent
|
|
834
|
+
}), this.agent.on("emotion", (e) => this.session?.setEmotion(e)), await this.session.connect();
|
|
845
835
|
}
|
|
846
836
|
async initMicrophone() {
|
|
847
837
|
this.stream = await navigator.mediaDevices.getUserMedia({
|
|
@@ -850,29 +840,24 @@ class B {
|
|
|
850
840
|
const e = this.audioContext.createMediaStreamSource(this.stream);
|
|
851
841
|
this.processor = this.audioContext.createScriptProcessor(4096, 1, 1), this.processor.onaudioprocess = (t) => {
|
|
852
842
|
if (!this._isMuted) {
|
|
853
|
-
const
|
|
854
|
-
this.agent?.sendAudio(
|
|
843
|
+
const n = v(t.inputBuffer.getChannelData(0));
|
|
844
|
+
this.agent?.sendAudio(n);
|
|
855
845
|
}
|
|
856
846
|
}, e.connect(this.processor), this.processor.connect(this.audioContext.destination);
|
|
857
847
|
}
|
|
858
848
|
async connectAgent() {
|
|
859
849
|
if (!this.agent) return;
|
|
860
850
|
const e = this.voiceAgentDetails, t = { inputSampleRate: 16e3 };
|
|
861
|
-
e.type === "
|
|
862
|
-
...t,
|
|
863
|
-
apiKey: e.token,
|
|
864
|
-
authType: "ephemeral_token"
|
|
865
|
-
}) : e.type === "elevenlabs" ? await this.agent.connect({
|
|
851
|
+
e.type === "elevenlabs" ? await this.agent.connect({
|
|
866
852
|
...t,
|
|
867
853
|
agentId: e.agent_id,
|
|
868
854
|
signedUrl: e.signed_url
|
|
869
|
-
}) : e.type === "
|
|
855
|
+
}) : e.type === "openai" && await this.agent.connect({
|
|
870
856
|
...t,
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
signedUrl: e.signed_url
|
|
857
|
+
apiKey: e.token,
|
|
858
|
+
systemPrompt: e.system_prompt,
|
|
859
|
+
voice: e.voice,
|
|
860
|
+
turnDetection: e.turn_detection
|
|
876
861
|
});
|
|
877
862
|
}
|
|
878
863
|
cleanup() {
|
|
@@ -880,20 +865,19 @@ class B {
|
|
|
880
865
|
}
|
|
881
866
|
}
|
|
882
867
|
export {
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
h as resamplePcm
|
|
868
|
+
D as AGENT_REGISTRY,
|
|
869
|
+
y as BaseAgent,
|
|
870
|
+
R as ElevenLabsAgent,
|
|
871
|
+
F as KeyframeApiError,
|
|
872
|
+
P as OpenAIRealtimeAgent,
|
|
873
|
+
L as PersonaEmbed,
|
|
874
|
+
$ as PersonaView,
|
|
875
|
+
u as SAMPLE_RATE,
|
|
876
|
+
f as base64ToBytes,
|
|
877
|
+
S as bytesToBase64,
|
|
878
|
+
b as createAgent,
|
|
879
|
+
C as createEventEmitter,
|
|
880
|
+
v as floatTo16BitPCM,
|
|
881
|
+
N as getAgentInfo,
|
|
882
|
+
p as resamplePcm
|
|
899
883
|
};
|