@keyframelabs/elements 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +16 -8
- package/dist/agents/elevenlabs.d.ts +12 -1
- package/dist/index.js +123 -83
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -81,19 +81,27 @@ For `PersonaView`, this is determined by `voiceAgentDetails`.
|
|
|
81
81
|
|
|
82
82
|
The avatar can display emotional expressions (`neutral`, `angry`, `sad`, `happy`) that affect its facial expression and demeanor.
|
|
83
83
|
|
|
84
|
-
###
|
|
84
|
+
### ElevenLabs: `set_emotion` Tool Call
|
|
85
85
|
|
|
86
|
-
When using ElevenLabs as the voice agent, emotions are
|
|
86
|
+
When using ElevenLabs as the voice agent, emotions are driven by a **client tool call** named `set_emotion`. The ElevenLabs agent parses incoming `client_tool_call` WebSocket messages and, when the tool name is `set_emotion`, updates the avatar's expression accordingly.
|
|
87
87
|
|
|
88
|
-
|
|
88
|
+
> **Important:** Transcripts from the ElevenLabs agent are **not** automatically consumed. The `transcript` event is emitted, but it is up to you to subscribe to it if you need transcript data.
|
|
89
|
+
|
|
90
|
+
#### Setup
|
|
91
|
+
|
|
92
|
+
You must create a `set_emotion` tool in the [ElevenLabs API](https://elevenlabs.io/docs) for your agent. The tool should accept a single parameter:
|
|
93
|
+
|
|
94
|
+
| Parameter | Type | Description |
|
|
95
|
+
| --------- | -------- | -------------------------------------------------------- |
|
|
96
|
+
| `emotion` | `enum` | One of `neutral`, `angry`, `sad`, `happy`. |
|
|
97
|
+
|
|
98
|
+
Then instruct your agent (via its system prompt) to call `set_emotion` on each turn with the appropriate emotion. The client library handles the rest — it validates the emotion, emits an `emotion` event, and sends a `client_tool_result` back to ElevenLabs.
|
|
89
99
|
|
|
90
100
|
### Manual Emotion Control
|
|
91
101
|
|
|
92
102
|
For other agents or custom emotion logic, you can access the underlying session to set emotions manually:
|
|
93
103
|
|
|
94
104
|
```typescript
|
|
95
|
-
// Access the underlying SDK session for manual control
|
|
96
|
-
// (Available when using @keyframelabs/sdk directly)
|
|
97
105
|
import { createClient } from '@keyframelabs/sdk';
|
|
98
106
|
|
|
99
107
|
const session = createClient({ ... });
|
|
@@ -102,15 +110,15 @@ await session.setEmotion('happy');
|
|
|
102
110
|
|
|
103
111
|
### Agent Events
|
|
104
112
|
|
|
105
|
-
The `emotion` event is emitted when
|
|
113
|
+
The `emotion` event is emitted when the agent triggers a `set_emotion` tool call:
|
|
106
114
|
|
|
107
115
|
```typescript
|
|
108
116
|
agent.on('emotion', (emotion) => {
|
|
109
|
-
console.log('Emotion
|
|
117
|
+
console.log('Emotion changed:', emotion); // 'neutral' | 'angry' | 'sad' | 'happy'
|
|
110
118
|
});
|
|
111
119
|
```
|
|
112
120
|
|
|
113
|
-
Currently, only the ElevenLabs agent emits emotion events.
|
|
121
|
+
Currently, only the ElevenLabs agent emits emotion events via tool calls.
|
|
114
122
|
|
|
115
123
|
## API
|
|
116
124
|
|
|
@@ -20,7 +20,10 @@ export declare class ElevenLabsAgent extends BaseAgent {
|
|
|
20
20
|
private sourceInputSampleRate;
|
|
21
21
|
private initialized;
|
|
22
22
|
private lastInterruptId;
|
|
23
|
-
private
|
|
23
|
+
private agentResponseReceived;
|
|
24
|
+
private turnStartTime;
|
|
25
|
+
private accumulatedDurationMs;
|
|
26
|
+
private turnEndTimer;
|
|
24
27
|
connect(config: ElevenLabsConfig): Promise<void>;
|
|
25
28
|
protected handleParsedMessage(message: unknown): void;
|
|
26
29
|
private handleInitMetadata;
|
|
@@ -28,6 +31,14 @@ export declare class ElevenLabsAgent extends BaseAgent {
|
|
|
28
31
|
private handleAudio;
|
|
29
32
|
private handleUserTranscript;
|
|
30
33
|
private handleAgentResponse;
|
|
34
|
+
/**
|
|
35
|
+
* Schedule a timer to emit turnEnd when the virtual audio buffer
|
|
36
|
+
* "would have" finished playing. Replicates the ElevenLabs SDK's
|
|
37
|
+
* AudioWorklet buffer-empty detection without requiring local playback.
|
|
38
|
+
*/
|
|
39
|
+
private scheduleVirtualBufferCheck;
|
|
40
|
+
private resetTurnState;
|
|
41
|
+
private handleClientToolCall;
|
|
31
42
|
private handleInterruption;
|
|
32
43
|
sendAudio(pcmData: Uint8Array): void;
|
|
33
44
|
/**
|
package/dist/index.js
CHANGED
|
@@ -1,28 +1,28 @@
|
|
|
1
1
|
import { createClient as f } from "@keyframelabs/sdk";
|
|
2
|
-
const
|
|
3
|
-
function
|
|
2
|
+
const o = 24e3;
|
|
3
|
+
function m(i) {
|
|
4
4
|
const e = atob(i), t = new Uint8Array(e.length);
|
|
5
5
|
for (let s = 0; s < e.length; s++)
|
|
6
6
|
t[s] = e.charCodeAt(s);
|
|
7
7
|
return t;
|
|
8
8
|
}
|
|
9
|
-
function
|
|
9
|
+
function g(i) {
|
|
10
10
|
let e = "";
|
|
11
11
|
for (let t = 0; t < i.length; t++)
|
|
12
12
|
e += String.fromCharCode(i[t]);
|
|
13
13
|
return btoa(e);
|
|
14
14
|
}
|
|
15
|
-
function
|
|
15
|
+
function h(i, e, t) {
|
|
16
16
|
if (e === t)
|
|
17
17
|
return i;
|
|
18
|
-
const s = new Int16Array(i.buffer, i.byteOffset, i.length / 2), n = e / t, a = Math.floor(s.length / n),
|
|
19
|
-
for (let
|
|
20
|
-
const _ =
|
|
21
|
-
r
|
|
18
|
+
const s = new Int16Array(i.buffer, i.byteOffset, i.length / 2), n = e / t, a = Math.floor(s.length / n), d = new Int16Array(a);
|
|
19
|
+
for (let r = 0; r < a; r++) {
|
|
20
|
+
const _ = r * n, p = Math.floor(_), b = Math.min(p + 1, s.length - 1), v = _ - p;
|
|
21
|
+
d[r] = Math.round(
|
|
22
22
|
s[p] * (1 - v) + s[b] * v
|
|
23
23
|
);
|
|
24
24
|
}
|
|
25
|
-
return new Uint8Array(
|
|
25
|
+
return new Uint8Array(d.buffer);
|
|
26
26
|
}
|
|
27
27
|
function E() {
|
|
28
28
|
const i = /* @__PURE__ */ new Map();
|
|
@@ -49,12 +49,12 @@ function w(i) {
|
|
|
49
49
|
}
|
|
50
50
|
return new Uint8Array(e.buffer);
|
|
51
51
|
}
|
|
52
|
-
const
|
|
52
|
+
const C = 16e3;
|
|
53
53
|
class u {
|
|
54
54
|
ws = null;
|
|
55
55
|
_state = "idle";
|
|
56
56
|
events = E();
|
|
57
|
-
inputSampleRate =
|
|
57
|
+
inputSampleRate = C;
|
|
58
58
|
/** Current agent state */
|
|
59
59
|
get state() {
|
|
60
60
|
return this._state;
|
|
@@ -113,7 +113,7 @@ class u {
|
|
|
113
113
|
this.events.emit("closed", { code: e, reason: t });
|
|
114
114
|
}
|
|
115
115
|
}
|
|
116
|
-
const
|
|
116
|
+
const k = "gemini-2.5-flash-native-audio-preview-12-2025", A = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent", I = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContentConstrained";
|
|
117
117
|
class R extends u {
|
|
118
118
|
agentName = "GeminiLive";
|
|
119
119
|
async connect(e) {
|
|
@@ -122,10 +122,10 @@ class R extends u {
|
|
|
122
122
|
if (!e.apiKey)
|
|
123
123
|
throw new Error("Gemini API key is required");
|
|
124
124
|
e.inputSampleRate && (this.inputSampleRate = e.inputSampleRate);
|
|
125
|
-
const t = e.model ??
|
|
126
|
-
return new Promise((a,
|
|
125
|
+
const t = e.model ?? k, n = (e.authType ?? "api_key") === "ephemeral_token" ? `${I}?access_token=${encodeURIComponent(e.apiKey)}` : `${A}?key=${encodeURIComponent(e.apiKey)}`;
|
|
126
|
+
return new Promise((a, d) => {
|
|
127
127
|
this.ws = new WebSocket(n), this.ws.onopen = () => {
|
|
128
|
-
const
|
|
128
|
+
const r = {
|
|
129
129
|
setup: {
|
|
130
130
|
model: `models/${t}`,
|
|
131
131
|
generationConfig: {
|
|
@@ -134,13 +134,13 @@ class R extends u {
|
|
|
134
134
|
systemInstruction: e.systemPrompt ? { parts: [{ text: e.systemPrompt }] } : void 0
|
|
135
135
|
}
|
|
136
136
|
};
|
|
137
|
-
this.ws.send(JSON.stringify(
|
|
137
|
+
this.ws.send(JSON.stringify(r)), this.setState("listening"), a();
|
|
138
138
|
}, this.ws.onerror = () => {
|
|
139
|
-
|
|
140
|
-
}, this.ws.onclose = (
|
|
141
|
-
this.ws = null, this.setState("idle"), this.emitClosed(
|
|
142
|
-
}, this.ws.onmessage = (
|
|
143
|
-
this.handleMessage(
|
|
139
|
+
d(new Error("Failed to connect to Gemini Live"));
|
|
140
|
+
}, this.ws.onclose = (r) => {
|
|
141
|
+
this.ws = null, this.setState("idle"), this.emitClosed(r.code, r.reason);
|
|
142
|
+
}, this.ws.onmessage = (r) => {
|
|
143
|
+
this.handleMessage(r.data);
|
|
144
144
|
};
|
|
145
145
|
});
|
|
146
146
|
}
|
|
@@ -159,7 +159,7 @@ class R extends u {
|
|
|
159
159
|
this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
|
|
160
160
|
for (const n of s.modelTurn.parts) {
|
|
161
161
|
if (n.inlineData?.data) {
|
|
162
|
-
const a =
|
|
162
|
+
const a = m(n.inlineData.data);
|
|
163
163
|
this.events.emit("audio", a);
|
|
164
164
|
}
|
|
165
165
|
n.text && this.events.emit("transcript", {
|
|
@@ -181,7 +181,7 @@ class R extends u {
|
|
|
181
181
|
mediaChunks: [
|
|
182
182
|
{
|
|
183
183
|
mimeType: `audio/pcm;rate=${this.inputSampleRate}`,
|
|
184
|
-
data:
|
|
184
|
+
data: g(e)
|
|
185
185
|
}
|
|
186
186
|
]
|
|
187
187
|
}
|
|
@@ -189,7 +189,7 @@ class R extends u {
|
|
|
189
189
|
this.ws.send(JSON.stringify(t));
|
|
190
190
|
}
|
|
191
191
|
}
|
|
192
|
-
const M = ["neutral", "angry", "sad", "happy"],
|
|
192
|
+
const M = ["neutral", "angry", "sad", "happy"], T = "wss://api.elevenlabs.io/v1/convai/conversation";
|
|
193
193
|
class S extends u {
|
|
194
194
|
agentName = "ElevenLabs";
|
|
195
195
|
outputSampleRate = 24e3;
|
|
@@ -202,8 +202,12 @@ class S extends u {
|
|
|
202
202
|
// True after conversation_initiation_metadata received
|
|
203
203
|
lastInterruptId = 0;
|
|
204
204
|
// Track interruptions to filter stale audio
|
|
205
|
-
|
|
206
|
-
//
|
|
205
|
+
// Virtual buffer turn-end detection: track audio duration and emit turnEnd
|
|
206
|
+
// when agent_response has arrived and all audio "would have" finished playing.
|
|
207
|
+
agentResponseReceived = !1;
|
|
208
|
+
turnStartTime = 0;
|
|
209
|
+
accumulatedDurationMs = 0;
|
|
210
|
+
turnEndTimer = null;
|
|
207
211
|
async connect(e) {
|
|
208
212
|
if (this.ws)
|
|
209
213
|
throw new Error("Already connected");
|
|
@@ -211,7 +215,7 @@ class S extends u {
|
|
|
211
215
|
throw new Error("ElevenLabs agent ID or signed URL is required");
|
|
212
216
|
e.inputSampleRate && (this.sourceInputSampleRate = e.inputSampleRate);
|
|
213
217
|
let t;
|
|
214
|
-
return e.signedUrl ? t = e.signedUrl : (t = `${
|
|
218
|
+
return e.signedUrl ? t = e.signedUrl : (t = `${T}?agent_id=${e.agentId}`, e.apiKey && (t += `&xi-api-key=${e.apiKey}`)), new Promise((s, n) => {
|
|
215
219
|
this.ws = new WebSocket(t), this.ws.onopen = () => {
|
|
216
220
|
this.setState("listening"), s();
|
|
217
221
|
}, this.ws.onerror = () => {
|
|
@@ -244,6 +248,9 @@ class S extends u {
|
|
|
244
248
|
case "interruption":
|
|
245
249
|
this.handleInterruption(t);
|
|
246
250
|
break;
|
|
251
|
+
case "client_tool_call":
|
|
252
|
+
this.handleClientToolCall(t);
|
|
253
|
+
break;
|
|
247
254
|
case "agent_response_correction":
|
|
248
255
|
this.setState("listening");
|
|
249
256
|
break;
|
|
@@ -271,19 +278,15 @@ class S extends u {
|
|
|
271
278
|
}
|
|
272
279
|
handleAudio(e) {
|
|
273
280
|
const t = e.audio_event;
|
|
274
|
-
if (!t?.audio_base_64)
|
|
275
|
-
const s = t.event_id ?? 0;
|
|
276
|
-
if (s <= this.lastInterruptId)
|
|
281
|
+
if (!t?.audio_base_64 || (t.event_id ?? 0) <= this.lastInterruptId)
|
|
277
282
|
return;
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
let n = g(t.audio_base_64);
|
|
286
|
-
this.outputSampleRate !== l && (n = c(n, this.outputSampleRate, l)), this.events.emit("audio", n);
|
|
283
|
+
this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
|
|
284
|
+
let n = m(t.audio_base_64);
|
|
285
|
+
this.outputSampleRate !== o && (n = h(n, this.outputSampleRate, o)), this.events.emit("audio", n);
|
|
286
|
+
const a = n.length / 2 / o * 1e3;
|
|
287
|
+
this.turnStartTime === 0 && (this.turnStartTime = Date.now()), this.accumulatedDurationMs += a, console.debug(
|
|
288
|
+
`[ElevenLabs] audio chunk: ${n.length} bytes, +${a.toFixed(0)}ms, totalDuration=${this.accumulatedDurationMs.toFixed(0)}ms, agentResponse=${this.agentResponseReceived}`
|
|
289
|
+
), this.scheduleVirtualBufferCheck();
|
|
287
290
|
}
|
|
288
291
|
handleUserTranscript(e) {
|
|
289
292
|
const t = e.user_transcription_event;
|
|
@@ -295,22 +298,59 @@ class S extends u {
|
|
|
295
298
|
}
|
|
296
299
|
handleAgentResponse(e) {
|
|
297
300
|
const t = e.agent_response_event;
|
|
298
|
-
t?.agent_response && (this.events.emit("
|
|
301
|
+
t?.agent_response && (this.events.emit("transcript", {
|
|
299
302
|
role: "assistant",
|
|
300
303
|
text: t.agent_response,
|
|
301
304
|
isFinal: !0
|
|
302
|
-
})
|
|
305
|
+
}), this.agentResponseReceived = !0, console.debug(
|
|
306
|
+
`[ElevenLabs] agent_response received: totalDuration=${this.accumulatedDurationMs.toFixed(0)}ms, text="${t.agent_response.slice(0, 60)}${t.agent_response.length > 60 ? "..." : ""}"`
|
|
307
|
+
), this.scheduleVirtualBufferCheck());
|
|
308
|
+
}
|
|
309
|
+
/**
|
|
310
|
+
* Schedule a timer to emit turnEnd when the virtual audio buffer
|
|
311
|
+
* "would have" finished playing. Replicates the ElevenLabs SDK's
|
|
312
|
+
* AudioWorklet buffer-empty detection without requiring local playback.
|
|
313
|
+
*/
|
|
314
|
+
scheduleVirtualBufferCheck() {
|
|
315
|
+
if (!this.agentResponseReceived || this.turnStartTime === 0) return;
|
|
316
|
+
this.turnEndTimer !== null && clearTimeout(this.turnEndTimer);
|
|
317
|
+
const e = Date.now() - this.turnStartTime, t = Math.max(0, this.accumulatedDurationMs - e);
|
|
318
|
+
console.debug(
|
|
319
|
+
`[ElevenLabs] virtual buffer: elapsed=${e.toFixed(0)}ms, accumulated=${this.accumulatedDurationMs.toFixed(0)}ms, remaining=${t.toFixed(0)}ms`
|
|
320
|
+
), this.turnEndTimer = setTimeout(() => {
|
|
321
|
+
this.turnEndTimer = null, console.debug("[ElevenLabs] virtual buffer drained, emitting turnEnd"), this.resetTurnState(), this.events.emit("turnEnd", void 0), this.setState("listening");
|
|
322
|
+
}, t);
|
|
323
|
+
}
|
|
324
|
+
resetTurnState() {
|
|
325
|
+
this.agentResponseReceived = !1, this.turnStartTime = 0, this.accumulatedDurationMs = 0, this.turnEndTimer !== null && (clearTimeout(this.turnEndTimer), this.turnEndTimer = null);
|
|
326
|
+
}
|
|
327
|
+
handleClientToolCall(e) {
|
|
328
|
+
const t = e.client_tool_call;
|
|
329
|
+
if (t) {
|
|
330
|
+
if (t.tool_name === "set_emotion") {
|
|
331
|
+
const s = t.parameters?.emotion?.toLowerCase();
|
|
332
|
+
s && M.includes(s) && this.events.emit("emotion", s);
|
|
333
|
+
}
|
|
334
|
+
this.ws && this.ws.readyState === WebSocket.OPEN && this.ws.send(JSON.stringify({
|
|
335
|
+
type: "client_tool_result",
|
|
336
|
+
tool_call_id: t.tool_call_id,
|
|
337
|
+
result: "ok",
|
|
338
|
+
is_error: !1
|
|
339
|
+
}));
|
|
340
|
+
}
|
|
303
341
|
}
|
|
304
342
|
handleInterruption(e) {
|
|
305
343
|
const t = e.interruption_event;
|
|
306
|
-
t?.event_id && (this.lastInterruptId = t.event_id), this.
|
|
344
|
+
t?.event_id && (this.lastInterruptId = t.event_id), (this.agentResponseReceived || this.accumulatedDurationMs > 0) && console.debug(
|
|
345
|
+
`[ElevenLabs] interruption: discarding pending turn (duration=${this.accumulatedDurationMs.toFixed(0)}ms, agentResponse=${this.agentResponseReceived})`
|
|
346
|
+
), this.resetTurnState(), this.events.emit("interrupted", void 0), this.setState("listening");
|
|
307
347
|
}
|
|
308
348
|
sendAudio(e) {
|
|
309
349
|
if (!this.ws || this.ws.readyState !== WebSocket.OPEN || !this.initialized)
|
|
310
350
|
return;
|
|
311
351
|
let t = e;
|
|
312
|
-
this.sourceInputSampleRate !== this.expectedInputSampleRate && (t =
|
|
313
|
-
user_audio_chunk:
|
|
352
|
+
this.sourceInputSampleRate !== this.expectedInputSampleRate && (t = h(e, this.sourceInputSampleRate, this.expectedInputSampleRate)), this.ws.send(JSON.stringify({
|
|
353
|
+
user_audio_chunk: g(t)
|
|
314
354
|
}));
|
|
315
355
|
}
|
|
316
356
|
/**
|
|
@@ -340,11 +380,11 @@ class S extends u {
|
|
|
340
380
|
}));
|
|
341
381
|
}
|
|
342
382
|
close() {
|
|
343
|
-
this.initialized = !1, this.lastInterruptId = 0, super.close();
|
|
383
|
+
this.initialized = !1, this.lastInterruptId = 0, this.resetTurnState(), super.close();
|
|
344
384
|
}
|
|
345
385
|
}
|
|
346
|
-
const
|
|
347
|
-
class
|
|
386
|
+
const x = "wss://api.cartesia.ai/agents/stream", P = "2025-04-16";
|
|
387
|
+
class D extends u {
|
|
348
388
|
agentName = "Cartesia";
|
|
349
389
|
// Audio configuration
|
|
350
390
|
cartesiaInputFormat = "pcm_16000";
|
|
@@ -363,7 +403,7 @@ class O extends u {
|
|
|
363
403
|
if (!e.apiKey)
|
|
364
404
|
throw new Error("Cartesia API Key is required");
|
|
365
405
|
e.inputSampleRate && (this.inputSampleRate = e.inputSampleRate), this.inputSampleRate === 16e3 ? this.cartesiaInputFormat = "pcm_16000" : this.inputSampleRate === 24e3 ? this.cartesiaInputFormat = "pcm_24000" : this.inputSampleRate === 44100 ? this.cartesiaInputFormat = "pcm_44100" : this.cartesiaInputFormat = "pcm_16000";
|
|
366
|
-
const t = `${
|
|
406
|
+
const t = `${x}/${e.agentId}?api_key=${e.apiKey}&cartesia_version=${P}`;
|
|
367
407
|
return new Promise((s, n) => {
|
|
368
408
|
this.ws = new WebSocket(t), this.ws.onopen = () => {
|
|
369
409
|
this.sendStartEvent(), this.startHeartbeat(), s();
|
|
@@ -425,8 +465,8 @@ class O extends u {
|
|
|
425
465
|
handleMediaOutput(e) {
|
|
426
466
|
if (!e.media?.payload) return;
|
|
427
467
|
this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
|
|
428
|
-
let t =
|
|
429
|
-
this.cartesiaOutputRate !==
|
|
468
|
+
let t = m(e.media.payload);
|
|
469
|
+
this.cartesiaOutputRate !== o && (t = h(t, this.cartesiaOutputRate, o)), this.events.emit("audio", t);
|
|
430
470
|
}
|
|
431
471
|
handleClear() {
|
|
432
472
|
this.events.emit("interrupted", void 0), this.setState("listening");
|
|
@@ -436,11 +476,11 @@ class O extends u {
|
|
|
436
476
|
return;
|
|
437
477
|
let t = e;
|
|
438
478
|
const s = parseInt(this.cartesiaInputFormat.split("_")[1]);
|
|
439
|
-
this.inputSampleRate !== s && (t =
|
|
479
|
+
this.inputSampleRate !== s && (t = h(e, this.inputSampleRate, s)), this.ws.send(JSON.stringify({
|
|
440
480
|
event: "media_input",
|
|
441
481
|
stream_id: this.streamId,
|
|
442
482
|
media: {
|
|
443
|
-
payload:
|
|
483
|
+
payload: g(t)
|
|
444
484
|
}
|
|
445
485
|
}));
|
|
446
486
|
}
|
|
@@ -448,7 +488,7 @@ class O extends u {
|
|
|
448
488
|
this.stopHeartbeat(), this.isReady = !1, this.streamId = null, super.close();
|
|
449
489
|
}
|
|
450
490
|
}
|
|
451
|
-
class
|
|
491
|
+
class O extends u {
|
|
452
492
|
agentName = "Vapi";
|
|
453
493
|
// Audio configuration - Vapi uses 16kHz PCM by default
|
|
454
494
|
vapiSampleRate = 16e3;
|
|
@@ -475,7 +515,7 @@ class N extends u {
|
|
|
475
515
|
*/
|
|
476
516
|
handleBinaryAudio(e) {
|
|
477
517
|
this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
|
|
478
|
-
const t = new Uint8Array(e), s = this.vapiSampleRate !==
|
|
518
|
+
const t = new Uint8Array(e), s = this.vapiSampleRate !== o ? h(t, this.vapiSampleRate, o) : t;
|
|
479
519
|
this.events.emit("audio", s);
|
|
480
520
|
}
|
|
481
521
|
handleParsedMessage(e) {
|
|
@@ -515,7 +555,7 @@ class N extends u {
|
|
|
515
555
|
if (!this.ws || this.ws.readyState !== WebSocket.OPEN)
|
|
516
556
|
return;
|
|
517
557
|
let t = e;
|
|
518
|
-
this.inputSampleRate !== this.vapiSampleRate && (t =
|
|
558
|
+
this.inputSampleRate !== this.vapiSampleRate && (t = h(e, this.inputSampleRate, this.vapiSampleRate)), this.ws.send(t.buffer);
|
|
519
559
|
}
|
|
520
560
|
/**
|
|
521
561
|
* Send a control message to end the call.
|
|
@@ -527,7 +567,7 @@ class N extends u {
|
|
|
527
567
|
this.hangup(), super.close();
|
|
528
568
|
}
|
|
529
569
|
}
|
|
530
|
-
const
|
|
570
|
+
const N = [
|
|
531
571
|
{ id: "gemini", name: "Gemini Live", description: "Google Gemini Live API" },
|
|
532
572
|
{ id: "elevenlabs", name: "ElevenLabs", description: "ElevenLabs Conversational AI" },
|
|
533
573
|
{ id: "cartesia", name: "Cartesia", description: "Cartesia Agents API" },
|
|
@@ -540,17 +580,17 @@ function y(i) {
|
|
|
540
580
|
case "elevenlabs":
|
|
541
581
|
return new S();
|
|
542
582
|
case "cartesia":
|
|
543
|
-
return new
|
|
583
|
+
return new D();
|
|
544
584
|
case "vapi":
|
|
545
|
-
return new
|
|
585
|
+
return new O();
|
|
546
586
|
default:
|
|
547
587
|
throw new Error(`Unknown agent type: ${i}`);
|
|
548
588
|
}
|
|
549
589
|
}
|
|
550
590
|
function F(i) {
|
|
551
|
-
return
|
|
591
|
+
return N.find((e) => e.id === i);
|
|
552
592
|
}
|
|
553
|
-
class
|
|
593
|
+
class L extends Error {
|
|
554
594
|
status;
|
|
555
595
|
payload;
|
|
556
596
|
url;
|
|
@@ -558,8 +598,8 @@ class D extends Error {
|
|
|
558
598
|
super(e.message), this.name = "ApiError", this.status = e.status, this.payload = e.payload, this.url = e.url;
|
|
559
599
|
}
|
|
560
600
|
}
|
|
561
|
-
const
|
|
562
|
-
class
|
|
601
|
+
const l = /* @__PURE__ */ new Set();
|
|
602
|
+
class $ {
|
|
563
603
|
apiBaseUrl;
|
|
564
604
|
publishableKey;
|
|
565
605
|
callbacks;
|
|
@@ -603,31 +643,31 @@ class K {
|
|
|
603
643
|
}
|
|
604
644
|
/** Connect to the embed session */
|
|
605
645
|
async connect() {
|
|
606
|
-
if (
|
|
646
|
+
if (l.has(this.publishableKey)) {
|
|
607
647
|
console.log("[PersonaEmbed] Connection already in progress, skipping");
|
|
608
648
|
return;
|
|
609
649
|
}
|
|
610
|
-
|
|
650
|
+
l.add(this.publishableKey), this.mounted = !0, this.abortController = new AbortController(), this.setStatus("connecting");
|
|
611
651
|
try {
|
|
612
652
|
const e = await this.fetchSession(this.abortController.signal);
|
|
613
653
|
if (!this.mounted) {
|
|
614
|
-
|
|
654
|
+
l.delete(this.publishableKey);
|
|
615
655
|
return;
|
|
616
656
|
}
|
|
617
657
|
if (await this.initSession(e), await this.initMicrophone(), await this.connectAgent(e.voice_agent_details), !this.mounted) {
|
|
618
|
-
this.cleanup(),
|
|
658
|
+
this.cleanup(), l.delete(this.publishableKey);
|
|
619
659
|
return;
|
|
620
660
|
}
|
|
621
661
|
this.setStatus("connected");
|
|
622
662
|
} catch (e) {
|
|
623
|
-
if (
|
|
663
|
+
if (l.delete(this.publishableKey), e instanceof Error && e.name === "AbortError")
|
|
624
664
|
return;
|
|
625
665
|
console.error("[PersonaEmbed]", e), this.mounted && (this.setStatus("error"), this.callbacks.onError?.(e));
|
|
626
666
|
}
|
|
627
667
|
}
|
|
628
668
|
/** Disconnect and cleanup */
|
|
629
669
|
disconnect() {
|
|
630
|
-
this.mounted = !1, this.abortController?.abort(), this.abortController = null,
|
|
670
|
+
this.mounted = !1, this.abortController?.abort(), this.abortController = null, l.delete(this.publishableKey), this.cleanup(), this.setStatus("disconnected");
|
|
631
671
|
}
|
|
632
672
|
/** Toggle microphone mute */
|
|
633
673
|
toggleMute() {
|
|
@@ -652,7 +692,7 @@ class K {
|
|
|
652
692
|
s = await t.json();
|
|
653
693
|
} catch {
|
|
654
694
|
}
|
|
655
|
-
throw new
|
|
695
|
+
throw new L({
|
|
656
696
|
message: s?.message ?? "create_session failed",
|
|
657
697
|
status: t.status,
|
|
658
698
|
payload: s,
|
|
@@ -731,7 +771,7 @@ class K {
|
|
|
731
771
|
this.stream?.getTracks().forEach((e) => e.stop()), this.processor?.disconnect(), this.audioContext?.close(), this.agent?.close(), this.session?.close(), this.stream = null, this.processor = null, this.audioContext = null, this.agent = null, this.session = null;
|
|
732
772
|
}
|
|
733
773
|
}
|
|
734
|
-
const
|
|
774
|
+
const c = /* @__PURE__ */ new Set();
|
|
735
775
|
class B {
|
|
736
776
|
voiceAgentDetails;
|
|
737
777
|
sessionDetails;
|
|
@@ -776,24 +816,24 @@ class B {
|
|
|
776
816
|
}
|
|
777
817
|
/** Connect to the session */
|
|
778
818
|
async connect() {
|
|
779
|
-
if (
|
|
819
|
+
if (c.has(this.connectionId)) {
|
|
780
820
|
console.log("[PersonaView] Connection already in progress, skipping");
|
|
781
821
|
return;
|
|
782
822
|
}
|
|
783
|
-
|
|
823
|
+
c.add(this.connectionId), this.mounted = !0, this.setStatus("connecting");
|
|
784
824
|
try {
|
|
785
825
|
if (await this.initSession(), await this.initMicrophone(), await this.connectAgent(), !this.mounted) {
|
|
786
|
-
this.cleanup(),
|
|
826
|
+
this.cleanup(), c.delete(this.connectionId);
|
|
787
827
|
return;
|
|
788
828
|
}
|
|
789
829
|
this.setStatus("connected");
|
|
790
830
|
} catch (e) {
|
|
791
|
-
|
|
831
|
+
c.delete(this.connectionId), console.error("[PersonaView]", e), this.mounted && (this.setStatus("error"), this.callbacks.onError?.(e));
|
|
792
832
|
}
|
|
793
833
|
}
|
|
794
834
|
/** Disconnect and cleanup */
|
|
795
835
|
disconnect() {
|
|
796
|
-
this.mounted = !1,
|
|
836
|
+
this.mounted = !1, c.delete(this.connectionId), this.cleanup(), this.setStatus("disconnected");
|
|
797
837
|
}
|
|
798
838
|
/** Toggle microphone mute */
|
|
799
839
|
toggleMute() {
|
|
@@ -872,20 +912,20 @@ class B {
|
|
|
872
912
|
}
|
|
873
913
|
}
|
|
874
914
|
export {
|
|
875
|
-
|
|
915
|
+
N as AGENT_REGISTRY,
|
|
876
916
|
u as BaseAgent,
|
|
877
|
-
|
|
917
|
+
D as CartesiaAgent,
|
|
878
918
|
S as ElevenLabsAgent,
|
|
879
919
|
R as GeminiLiveAgent,
|
|
880
|
-
|
|
881
|
-
|
|
920
|
+
L as KeyframeApiError,
|
|
921
|
+
$ as PersonaEmbed,
|
|
882
922
|
B as PersonaView,
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
923
|
+
o as SAMPLE_RATE,
|
|
924
|
+
m as base64ToBytes,
|
|
925
|
+
g as bytesToBase64,
|
|
886
926
|
y as createAgent,
|
|
887
927
|
E as createEventEmitter,
|
|
888
928
|
w as floatTo16BitPCM,
|
|
889
929
|
F as getAgentInfo,
|
|
890
|
-
|
|
930
|
+
h as resamplePcm
|
|
891
931
|
};
|