@keyframelabs/elements 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -81,19 +81,27 @@ For `PersonaView`, this is determined by `voiceAgentDetails`.
81
81
 
82
82
  The avatar can display emotional expressions (`neutral`, `angry`, `sad`, `happy`) that affect its facial expression and demeanor.
83
83
 
84
- ### Automatic Emotion Detection (ElevenLabs)
84
+ ### ElevenLabs: `set_emotion` Tool Call
85
85
 
86
- When using ElevenLabs as the voice agent, emotions are automatically detected from the agent's speech. The ElevenLabs agent parses emotion tags from audio alignment data (e.g., `[angry]`, `[happy]`) and the avatar expression updates in real-time.
86
+ When using ElevenLabs as the voice agent, emotions are driven by a **client tool call** named `set_emotion`. The ElevenLabs agent parses incoming `client_tool_call` WebSocket messages and, when the tool name is `set_emotion`, updates the avatar's expression accordingly.
87
87
 
88
- This requires no additional configuration—just configure your ElevenLabs agent to include emotion tags in its responses.
88
+ > **Important:** Transcripts from the ElevenLabs agent are **not** automatically consumed. The `transcript` event is emitted, but it is up to you to subscribe to it if you need transcript data.
89
+
90
+ #### Setup
91
+
92
+ You must create a `set_emotion` tool in the [ElevenLabs API](https://elevenlabs.io/docs) for your agent. The tool should accept a single parameter:
93
+
94
+ | Parameter | Type | Description |
95
+ | --------- | -------- | -------------------------------------------------------- |
96
+ | `emotion` | `enum` | One of `neutral`, `angry`, `sad`, `happy`. |
97
+
98
+ Then instruct your agent (via its system prompt) to call `set_emotion` on each turn with the appropriate emotion. The client library handles the rest — it validates the emotion, emits an `emotion` event, and sends a `client_tool_result` back to ElevenLabs.
89
99
 
90
100
  ### Manual Emotion Control
91
101
 
92
102
  For other agents or custom emotion logic, you can access the underlying session to set emotions manually:
93
103
 
94
104
  ```typescript
95
- // Access the underlying SDK session for manual control
96
- // (Available when using @keyframelabs/sdk directly)
97
105
  import { createClient } from '@keyframelabs/sdk';
98
106
 
99
107
  const session = createClient({ ... });
@@ -102,15 +110,15 @@ await session.setEmotion('happy');
102
110
 
103
111
  ### Agent Events
104
112
 
105
- The `emotion` event is emitted when an agent detects an emotion change:
113
+ The `emotion` event is emitted when the agent triggers a `set_emotion` tool call:
106
114
 
107
115
  ```typescript
108
116
  agent.on('emotion', (emotion) => {
109
- console.log('Emotion detected:', emotion); // 'neutral' | 'angry' | 'sad' | 'happy'
117
+ console.log('Emotion changed:', emotion); // 'neutral' | 'angry' | 'sad' | 'happy'
110
118
  });
111
119
  ```
112
120
 
113
- Currently, only the ElevenLabs agent emits emotion events.
121
+ Currently, only the ElevenLabs agent emits emotion events via tool calls.
114
122
 
115
123
  ## API
116
124
 
@@ -20,7 +20,10 @@ export declare class ElevenLabsAgent extends BaseAgent {
20
20
  private sourceInputSampleRate;
21
21
  private initialized;
22
22
  private lastInterruptId;
23
- private emotionEmittedForEventId;
23
+ private agentResponseReceived;
24
+ private turnStartTime;
25
+ private accumulatedDurationMs;
26
+ private turnEndTimer;
24
27
  connect(config: ElevenLabsConfig): Promise<void>;
25
28
  protected handleParsedMessage(message: unknown): void;
26
29
  private handleInitMetadata;
@@ -28,6 +31,14 @@ export declare class ElevenLabsAgent extends BaseAgent {
28
31
  private handleAudio;
29
32
  private handleUserTranscript;
30
33
  private handleAgentResponse;
34
+ /**
35
+ * Schedule a timer to emit turnEnd when the virtual audio buffer
36
+ * "would have" finished playing. Replicates the ElevenLabs SDK's
37
+ * AudioWorklet buffer-empty detection without requiring local playback.
38
+ */
39
+ private scheduleVirtualBufferCheck;
40
+ private resetTurnState;
41
+ private handleClientToolCall;
31
42
  private handleInterruption;
32
43
  sendAudio(pcmData: Uint8Array): void;
33
44
  /**
package/dist/index.js CHANGED
@@ -1,28 +1,28 @@
1
1
  import { createClient as f } from "@keyframelabs/sdk";
2
- const l = 24e3;
3
- function g(i) {
2
+ const o = 24e3;
3
+ function m(i) {
4
4
  const e = atob(i), t = new Uint8Array(e.length);
5
5
  for (let s = 0; s < e.length; s++)
6
6
  t[s] = e.charCodeAt(s);
7
7
  return t;
8
8
  }
9
- function m(i) {
9
+ function g(i) {
10
10
  let e = "";
11
11
  for (let t = 0; t < i.length; t++)
12
12
  e += String.fromCharCode(i[t]);
13
13
  return btoa(e);
14
14
  }
15
- function c(i, e, t) {
15
+ function h(i, e, t) {
16
16
  if (e === t)
17
17
  return i;
18
- const s = new Int16Array(i.buffer, i.byteOffset, i.length / 2), n = e / t, a = Math.floor(s.length / n), r = new Int16Array(a);
19
- for (let o = 0; o < a; o++) {
20
- const _ = o * n, p = Math.floor(_), b = Math.min(p + 1, s.length - 1), v = _ - p;
21
- r[o] = Math.round(
18
+ const s = new Int16Array(i.buffer, i.byteOffset, i.length / 2), n = e / t, a = Math.floor(s.length / n), d = new Int16Array(a);
19
+ for (let r = 0; r < a; r++) {
20
+ const _ = r * n, p = Math.floor(_), b = Math.min(p + 1, s.length - 1), v = _ - p;
21
+ d[r] = Math.round(
22
22
  s[p] * (1 - v) + s[b] * v
23
23
  );
24
24
  }
25
- return new Uint8Array(r.buffer);
25
+ return new Uint8Array(d.buffer);
26
26
  }
27
27
  function E() {
28
28
  const i = /* @__PURE__ */ new Map();
@@ -49,12 +49,12 @@ function w(i) {
49
49
  }
50
50
  return new Uint8Array(e.buffer);
51
51
  }
52
- const I = 16e3;
52
+ const C = 16e3;
53
53
  class u {
54
54
  ws = null;
55
55
  _state = "idle";
56
56
  events = E();
57
- inputSampleRate = I;
57
+ inputSampleRate = C;
58
58
  /** Current agent state */
59
59
  get state() {
60
60
  return this._state;
@@ -113,7 +113,7 @@ class u {
113
113
  this.events.emit("closed", { code: e, reason: t });
114
114
  }
115
115
  }
116
- const A = "gemini-2.5-flash-native-audio-preview-12-2025", C = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent", k = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContentConstrained";
116
+ const k = "gemini-2.5-flash-native-audio-preview-12-2025", A = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent", I = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContentConstrained";
117
117
  class R extends u {
118
118
  agentName = "GeminiLive";
119
119
  async connect(e) {
@@ -122,10 +122,10 @@ class R extends u {
122
122
  if (!e.apiKey)
123
123
  throw new Error("Gemini API key is required");
124
124
  e.inputSampleRate && (this.inputSampleRate = e.inputSampleRate);
125
- const t = e.model ?? A, n = (e.authType ?? "api_key") === "ephemeral_token" ? `${k}?access_token=${encodeURIComponent(e.apiKey)}` : `${C}?key=${encodeURIComponent(e.apiKey)}`;
126
- return new Promise((a, r) => {
125
+ const t = e.model ?? k, n = (e.authType ?? "api_key") === "ephemeral_token" ? `${I}?access_token=${encodeURIComponent(e.apiKey)}` : `${A}?key=${encodeURIComponent(e.apiKey)}`;
126
+ return new Promise((a, d) => {
127
127
  this.ws = new WebSocket(n), this.ws.onopen = () => {
128
- const o = {
128
+ const r = {
129
129
  setup: {
130
130
  model: `models/${t}`,
131
131
  generationConfig: {
@@ -134,13 +134,13 @@ class R extends u {
134
134
  systemInstruction: e.systemPrompt ? { parts: [{ text: e.systemPrompt }] } : void 0
135
135
  }
136
136
  };
137
- this.ws.send(JSON.stringify(o)), this.setState("listening"), a();
137
+ this.ws.send(JSON.stringify(r)), this.setState("listening"), a();
138
138
  }, this.ws.onerror = () => {
139
- r(new Error("Failed to connect to Gemini Live"));
140
- }, this.ws.onclose = (o) => {
141
- this.ws = null, this.setState("idle"), this.emitClosed(o.code, o.reason);
142
- }, this.ws.onmessage = (o) => {
143
- this.handleMessage(o.data);
139
+ d(new Error("Failed to connect to Gemini Live"));
140
+ }, this.ws.onclose = (r) => {
141
+ this.ws = null, this.setState("idle"), this.emitClosed(r.code, r.reason);
142
+ }, this.ws.onmessage = (r) => {
143
+ this.handleMessage(r.data);
144
144
  };
145
145
  });
146
146
  }
@@ -159,7 +159,7 @@ class R extends u {
159
159
  this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
160
160
  for (const n of s.modelTurn.parts) {
161
161
  if (n.inlineData?.data) {
162
- const a = g(n.inlineData.data);
162
+ const a = m(n.inlineData.data);
163
163
  this.events.emit("audio", a);
164
164
  }
165
165
  n.text && this.events.emit("transcript", {
@@ -181,7 +181,7 @@ class R extends u {
181
181
  mediaChunks: [
182
182
  {
183
183
  mimeType: `audio/pcm;rate=${this.inputSampleRate}`,
184
- data: m(e)
184
+ data: g(e)
185
185
  }
186
186
  ]
187
187
  }
@@ -189,7 +189,7 @@ class R extends u {
189
189
  this.ws.send(JSON.stringify(t));
190
190
  }
191
191
  }
192
- const M = ["neutral", "angry", "sad", "happy"], x = "wss://api.elevenlabs.io/v1/convai/conversation";
192
+ const M = ["neutral", "angry", "sad", "happy"], T = "wss://api.elevenlabs.io/v1/convai/conversation";
193
193
  class S extends u {
194
194
  agentName = "ElevenLabs";
195
195
  outputSampleRate = 24e3;
@@ -202,8 +202,12 @@ class S extends u {
202
202
  // True after conversation_initiation_metadata received
203
203
  lastInterruptId = 0;
204
204
  // Track interruptions to filter stale audio
205
- emotionEmittedForEventId = -1;
206
- // Track which turn's emotion we've already emitted
205
+ // Virtual buffer turn-end detection: track audio duration and emit turnEnd
206
+ // when agent_response has arrived and all audio "would have" finished playing.
207
+ agentResponseReceived = !1;
208
+ turnStartTime = 0;
209
+ accumulatedDurationMs = 0;
210
+ turnEndTimer = null;
207
211
  async connect(e) {
208
212
  if (this.ws)
209
213
  throw new Error("Already connected");
@@ -211,7 +215,7 @@ class S extends u {
211
215
  throw new Error("ElevenLabs agent ID or signed URL is required");
212
216
  e.inputSampleRate && (this.sourceInputSampleRate = e.inputSampleRate);
213
217
  let t;
214
- return e.signedUrl ? t = e.signedUrl : (t = `${x}?agent_id=${e.agentId}`, e.apiKey && (t += `&xi-api-key=${e.apiKey}`)), new Promise((s, n) => {
218
+ return e.signedUrl ? t = e.signedUrl : (t = `${T}?agent_id=${e.agentId}`, e.apiKey && (t += `&xi-api-key=${e.apiKey}`)), new Promise((s, n) => {
215
219
  this.ws = new WebSocket(t), this.ws.onopen = () => {
216
220
  this.setState("listening"), s();
217
221
  }, this.ws.onerror = () => {
@@ -244,6 +248,9 @@ class S extends u {
244
248
  case "interruption":
245
249
  this.handleInterruption(t);
246
250
  break;
251
+ case "client_tool_call":
252
+ this.handleClientToolCall(t);
253
+ break;
247
254
  case "agent_response_correction":
248
255
  this.setState("listening");
249
256
  break;
@@ -271,19 +278,15 @@ class S extends u {
271
278
  }
272
279
  handleAudio(e) {
273
280
  const t = e.audio_event;
274
- if (!t?.audio_base_64) return;
275
- const s = t.event_id ?? 0;
276
- if (s <= this.lastInterruptId)
281
+ if (!t?.audio_base_64 || (t.event_id ?? 0) <= this.lastInterruptId)
277
282
  return;
278
- if (this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking")), this.emotionEmittedForEventId !== s && t.alignment?.chars) {
279
- const r = t.alignment.chars.join("").match(/\[(\w+)\]/);
280
- if (r) {
281
- const o = r[1].toLowerCase();
282
- M.includes(o) && (this.events.emit("emotion", o), this.emotionEmittedForEventId = s);
283
- }
284
- }
285
- let n = g(t.audio_base_64);
286
- this.outputSampleRate !== l && (n = c(n, this.outputSampleRate, l)), this.events.emit("audio", n);
283
+ this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
284
+ let n = m(t.audio_base_64);
285
+ this.outputSampleRate !== o && (n = h(n, this.outputSampleRate, o)), this.events.emit("audio", n);
286
+ const a = n.length / 2 / o * 1e3;
287
+ this.turnStartTime === 0 && (this.turnStartTime = Date.now()), this.accumulatedDurationMs += a, console.debug(
288
+ `[ElevenLabs] audio chunk: ${n.length} bytes, +${a.toFixed(0)}ms, totalDuration=${this.accumulatedDurationMs.toFixed(0)}ms, agentResponse=${this.agentResponseReceived}`
289
+ ), this.scheduleVirtualBufferCheck();
287
290
  }
288
291
  handleUserTranscript(e) {
289
292
  const t = e.user_transcription_event;
@@ -295,22 +298,59 @@ class S extends u {
295
298
  }
296
299
  handleAgentResponse(e) {
297
300
  const t = e.agent_response_event;
298
- t?.agent_response && (this.events.emit("turnEnd", void 0), this.setState("listening"), this.events.emit("transcript", {
301
+ t?.agent_response && (this.events.emit("transcript", {
299
302
  role: "assistant",
300
303
  text: t.agent_response,
301
304
  isFinal: !0
302
- }));
305
+ }), this.agentResponseReceived = !0, console.debug(
306
+ `[ElevenLabs] agent_response received: totalDuration=${this.accumulatedDurationMs.toFixed(0)}ms, text="${t.agent_response.slice(0, 60)}${t.agent_response.length > 60 ? "..." : ""}"`
307
+ ), this.scheduleVirtualBufferCheck());
308
+ }
309
+ /**
310
+ * Schedule a timer to emit turnEnd when the virtual audio buffer
311
+ * "would have" finished playing. Replicates the ElevenLabs SDK's
312
+ * AudioWorklet buffer-empty detection without requiring local playback.
313
+ */
314
+ scheduleVirtualBufferCheck() {
315
+ if (!this.agentResponseReceived || this.turnStartTime === 0) return;
316
+ this.turnEndTimer !== null && clearTimeout(this.turnEndTimer);
317
+ const e = Date.now() - this.turnStartTime, t = Math.max(0, this.accumulatedDurationMs - e);
318
+ console.debug(
319
+ `[ElevenLabs] virtual buffer: elapsed=${e.toFixed(0)}ms, accumulated=${this.accumulatedDurationMs.toFixed(0)}ms, remaining=${t.toFixed(0)}ms`
320
+ ), this.turnEndTimer = setTimeout(() => {
321
+ this.turnEndTimer = null, console.debug("[ElevenLabs] virtual buffer drained, emitting turnEnd"), this.resetTurnState(), this.events.emit("turnEnd", void 0), this.setState("listening");
322
+ }, t);
323
+ }
324
+ resetTurnState() {
325
+ this.agentResponseReceived = !1, this.turnStartTime = 0, this.accumulatedDurationMs = 0, this.turnEndTimer !== null && (clearTimeout(this.turnEndTimer), this.turnEndTimer = null);
326
+ }
327
+ handleClientToolCall(e) {
328
+ const t = e.client_tool_call;
329
+ if (t) {
330
+ if (t.tool_name === "set_emotion") {
331
+ const s = t.parameters?.emotion?.toLowerCase();
332
+ s && M.includes(s) && this.events.emit("emotion", s);
333
+ }
334
+ this.ws && this.ws.readyState === WebSocket.OPEN && this.ws.send(JSON.stringify({
335
+ type: "client_tool_result",
336
+ tool_call_id: t.tool_call_id,
337
+ result: "ok",
338
+ is_error: !1
339
+ }));
340
+ }
303
341
  }
304
342
  handleInterruption(e) {
305
343
  const t = e.interruption_event;
306
- t?.event_id && (this.lastInterruptId = t.event_id), this.events.emit("interrupted", void 0), this.setState("listening");
344
+ t?.event_id && (this.lastInterruptId = t.event_id), (this.agentResponseReceived || this.accumulatedDurationMs > 0) && console.debug(
345
+ `[ElevenLabs] interruption: discarding pending turn (duration=${this.accumulatedDurationMs.toFixed(0)}ms, agentResponse=${this.agentResponseReceived})`
346
+ ), this.resetTurnState(), this.events.emit("interrupted", void 0), this.setState("listening");
307
347
  }
308
348
  sendAudio(e) {
309
349
  if (!this.ws || this.ws.readyState !== WebSocket.OPEN || !this.initialized)
310
350
  return;
311
351
  let t = e;
312
- this.sourceInputSampleRate !== this.expectedInputSampleRate && (t = c(e, this.sourceInputSampleRate, this.expectedInputSampleRate)), this.ws.send(JSON.stringify({
313
- user_audio_chunk: m(t)
352
+ this.sourceInputSampleRate !== this.expectedInputSampleRate && (t = h(e, this.sourceInputSampleRate, this.expectedInputSampleRate)), this.ws.send(JSON.stringify({
353
+ user_audio_chunk: g(t)
314
354
  }));
315
355
  }
316
356
  /**
@@ -340,11 +380,11 @@ class S extends u {
340
380
  }));
341
381
  }
342
382
  close() {
343
- this.initialized = !1, this.lastInterruptId = 0, super.close();
383
+ this.initialized = !1, this.lastInterruptId = 0, this.resetTurnState(), super.close();
344
384
  }
345
385
  }
346
- const P = "wss://api.cartesia.ai/agents/stream", T = "2025-04-16";
347
- class O extends u {
386
+ const x = "wss://api.cartesia.ai/agents/stream", P = "2025-04-16";
387
+ class D extends u {
348
388
  agentName = "Cartesia";
349
389
  // Audio configuration
350
390
  cartesiaInputFormat = "pcm_16000";
@@ -363,7 +403,7 @@ class O extends u {
363
403
  if (!e.apiKey)
364
404
  throw new Error("Cartesia API Key is required");
365
405
  e.inputSampleRate && (this.inputSampleRate = e.inputSampleRate), this.inputSampleRate === 16e3 ? this.cartesiaInputFormat = "pcm_16000" : this.inputSampleRate === 24e3 ? this.cartesiaInputFormat = "pcm_24000" : this.inputSampleRate === 44100 ? this.cartesiaInputFormat = "pcm_44100" : this.cartesiaInputFormat = "pcm_16000";
366
- const t = `${P}/${e.agentId}?api_key=${e.apiKey}&cartesia_version=${T}`;
406
+ const t = `${x}/${e.agentId}?api_key=${e.apiKey}&cartesia_version=${P}`;
367
407
  return new Promise((s, n) => {
368
408
  this.ws = new WebSocket(t), this.ws.onopen = () => {
369
409
  this.sendStartEvent(), this.startHeartbeat(), s();
@@ -425,8 +465,8 @@ class O extends u {
425
465
  handleMediaOutput(e) {
426
466
  if (!e.media?.payload) return;
427
467
  this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
428
- let t = g(e.media.payload);
429
- this.cartesiaOutputRate !== l && (t = c(t, this.cartesiaOutputRate, l)), this.events.emit("audio", t);
468
+ let t = m(e.media.payload);
469
+ this.cartesiaOutputRate !== o && (t = h(t, this.cartesiaOutputRate, o)), this.events.emit("audio", t);
430
470
  }
431
471
  handleClear() {
432
472
  this.events.emit("interrupted", void 0), this.setState("listening");
@@ -436,11 +476,11 @@ class O extends u {
436
476
  return;
437
477
  let t = e;
438
478
  const s = parseInt(this.cartesiaInputFormat.split("_")[1]);
439
- this.inputSampleRate !== s && (t = c(e, this.inputSampleRate, s)), this.ws.send(JSON.stringify({
479
+ this.inputSampleRate !== s && (t = h(e, this.inputSampleRate, s)), this.ws.send(JSON.stringify({
440
480
  event: "media_input",
441
481
  stream_id: this.streamId,
442
482
  media: {
443
- payload: m(t)
483
+ payload: g(t)
444
484
  }
445
485
  }));
446
486
  }
@@ -448,7 +488,7 @@ class O extends u {
448
488
  this.stopHeartbeat(), this.isReady = !1, this.streamId = null, super.close();
449
489
  }
450
490
  }
451
- class N extends u {
491
+ class O extends u {
452
492
  agentName = "Vapi";
453
493
  // Audio configuration - Vapi uses 16kHz PCM by default
454
494
  vapiSampleRate = 16e3;
@@ -475,7 +515,7 @@ class N extends u {
475
515
  */
476
516
  handleBinaryAudio(e) {
477
517
  this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
478
- const t = new Uint8Array(e), s = this.vapiSampleRate !== l ? c(t, this.vapiSampleRate, l) : t;
518
+ const t = new Uint8Array(e), s = this.vapiSampleRate !== o ? h(t, this.vapiSampleRate, o) : t;
479
519
  this.events.emit("audio", s);
480
520
  }
481
521
  handleParsedMessage(e) {
@@ -515,7 +555,7 @@ class N extends u {
515
555
  if (!this.ws || this.ws.readyState !== WebSocket.OPEN)
516
556
  return;
517
557
  let t = e;
518
- this.inputSampleRate !== this.vapiSampleRate && (t = c(e, this.inputSampleRate, this.vapiSampleRate)), this.ws.send(t.buffer);
558
+ this.inputSampleRate !== this.vapiSampleRate && (t = h(e, this.inputSampleRate, this.vapiSampleRate)), this.ws.send(t.buffer);
519
559
  }
520
560
  /**
521
561
  * Send a control message to end the call.
@@ -527,7 +567,7 @@ class N extends u {
527
567
  this.hangup(), super.close();
528
568
  }
529
569
  }
530
- const U = [
570
+ const N = [
531
571
  { id: "gemini", name: "Gemini Live", description: "Google Gemini Live API" },
532
572
  { id: "elevenlabs", name: "ElevenLabs", description: "ElevenLabs Conversational AI" },
533
573
  { id: "cartesia", name: "Cartesia", description: "Cartesia Agents API" },
@@ -540,17 +580,17 @@ function y(i) {
540
580
  case "elevenlabs":
541
581
  return new S();
542
582
  case "cartesia":
543
- return new O();
583
+ return new D();
544
584
  case "vapi":
545
- return new N();
585
+ return new O();
546
586
  default:
547
587
  throw new Error(`Unknown agent type: ${i}`);
548
588
  }
549
589
  }
550
590
  function F(i) {
551
- return U.find((e) => e.id === i);
591
+ return N.find((e) => e.id === i);
552
592
  }
553
- class D extends Error {
593
+ class L extends Error {
554
594
  status;
555
595
  payload;
556
596
  url;
@@ -558,8 +598,8 @@ class D extends Error {
558
598
  super(e.message), this.name = "ApiError", this.status = e.status, this.payload = e.payload, this.url = e.url;
559
599
  }
560
600
  }
561
- const h = /* @__PURE__ */ new Set();
562
- class K {
601
+ const l = /* @__PURE__ */ new Set();
602
+ class $ {
563
603
  apiBaseUrl;
564
604
  publishableKey;
565
605
  callbacks;
@@ -603,31 +643,31 @@ class K {
603
643
  }
604
644
  /** Connect to the embed session */
605
645
  async connect() {
606
- if (h.has(this.publishableKey)) {
646
+ if (l.has(this.publishableKey)) {
607
647
  console.log("[PersonaEmbed] Connection already in progress, skipping");
608
648
  return;
609
649
  }
610
- h.add(this.publishableKey), this.mounted = !0, this.abortController = new AbortController(), this.setStatus("connecting");
650
+ l.add(this.publishableKey), this.mounted = !0, this.abortController = new AbortController(), this.setStatus("connecting");
611
651
  try {
612
652
  const e = await this.fetchSession(this.abortController.signal);
613
653
  if (!this.mounted) {
614
- h.delete(this.publishableKey);
654
+ l.delete(this.publishableKey);
615
655
  return;
616
656
  }
617
657
  if (await this.initSession(e), await this.initMicrophone(), await this.connectAgent(e.voice_agent_details), !this.mounted) {
618
- this.cleanup(), h.delete(this.publishableKey);
658
+ this.cleanup(), l.delete(this.publishableKey);
619
659
  return;
620
660
  }
621
661
  this.setStatus("connected");
622
662
  } catch (e) {
623
- if (h.delete(this.publishableKey), e instanceof Error && e.name === "AbortError")
663
+ if (l.delete(this.publishableKey), e instanceof Error && e.name === "AbortError")
624
664
  return;
625
665
  console.error("[PersonaEmbed]", e), this.mounted && (this.setStatus("error"), this.callbacks.onError?.(e));
626
666
  }
627
667
  }
628
668
  /** Disconnect and cleanup */
629
669
  disconnect() {
630
- this.mounted = !1, this.abortController?.abort(), this.abortController = null, h.delete(this.publishableKey), this.cleanup(), this.setStatus("disconnected");
670
+ this.mounted = !1, this.abortController?.abort(), this.abortController = null, l.delete(this.publishableKey), this.cleanup(), this.setStatus("disconnected");
631
671
  }
632
672
  /** Toggle microphone mute */
633
673
  toggleMute() {
@@ -652,7 +692,7 @@ class K {
652
692
  s = await t.json();
653
693
  } catch {
654
694
  }
655
- throw new D({
695
+ throw new L({
656
696
  message: s?.message ?? "create_session failed",
657
697
  status: t.status,
658
698
  payload: s,
@@ -731,7 +771,7 @@ class K {
731
771
  this.stream?.getTracks().forEach((e) => e.stop()), this.processor?.disconnect(), this.audioContext?.close(), this.agent?.close(), this.session?.close(), this.stream = null, this.processor = null, this.audioContext = null, this.agent = null, this.session = null;
732
772
  }
733
773
  }
734
- const d = /* @__PURE__ */ new Set();
774
+ const c = /* @__PURE__ */ new Set();
735
775
  class B {
736
776
  voiceAgentDetails;
737
777
  sessionDetails;
@@ -776,24 +816,24 @@ class B {
776
816
  }
777
817
  /** Connect to the session */
778
818
  async connect() {
779
- if (d.has(this.connectionId)) {
819
+ if (c.has(this.connectionId)) {
780
820
  console.log("[PersonaView] Connection already in progress, skipping");
781
821
  return;
782
822
  }
783
- d.add(this.connectionId), this.mounted = !0, this.setStatus("connecting");
823
+ c.add(this.connectionId), this.mounted = !0, this.setStatus("connecting");
784
824
  try {
785
825
  if (await this.initSession(), await this.initMicrophone(), await this.connectAgent(), !this.mounted) {
786
- this.cleanup(), d.delete(this.connectionId);
826
+ this.cleanup(), c.delete(this.connectionId);
787
827
  return;
788
828
  }
789
829
  this.setStatus("connected");
790
830
  } catch (e) {
791
- d.delete(this.connectionId), console.error("[PersonaView]", e), this.mounted && (this.setStatus("error"), this.callbacks.onError?.(e));
831
+ c.delete(this.connectionId), console.error("[PersonaView]", e), this.mounted && (this.setStatus("error"), this.callbacks.onError?.(e));
792
832
  }
793
833
  }
794
834
  /** Disconnect and cleanup */
795
835
  disconnect() {
796
- this.mounted = !1, d.delete(this.connectionId), this.cleanup(), this.setStatus("disconnected");
836
+ this.mounted = !1, c.delete(this.connectionId), this.cleanup(), this.setStatus("disconnected");
797
837
  }
798
838
  /** Toggle microphone mute */
799
839
  toggleMute() {
@@ -872,20 +912,20 @@ class B {
872
912
  }
873
913
  }
874
914
  export {
875
- U as AGENT_REGISTRY,
915
+ N as AGENT_REGISTRY,
876
916
  u as BaseAgent,
877
- O as CartesiaAgent,
917
+ D as CartesiaAgent,
878
918
  S as ElevenLabsAgent,
879
919
  R as GeminiLiveAgent,
880
- D as KeyframeApiError,
881
- K as PersonaEmbed,
920
+ L as KeyframeApiError,
921
+ $ as PersonaEmbed,
882
922
  B as PersonaView,
883
- l as SAMPLE_RATE,
884
- g as base64ToBytes,
885
- m as bytesToBase64,
923
+ o as SAMPLE_RATE,
924
+ m as base64ToBytes,
925
+ g as bytesToBase64,
886
926
  y as createAgent,
887
927
  E as createEventEmitter,
888
928
  w as floatTo16BitPCM,
889
929
  F as getAgentInfo,
890
- c as resamplePcm
930
+ h as resamplePcm
891
931
  };
package/package.json CHANGED
@@ -4,7 +4,7 @@
4
4
  "publishConfig": {
5
5
  "access": "public"
6
6
  },
7
- "version": "0.1.0",
7
+ "version": "0.2.1",
8
8
  "type": "module",
9
9
  "main": "./dist/index.js",
10
10
  "types": "./dist/index.d.ts",