@keyframelabs/elements 0.0.9 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -81,19 +81,27 @@ For `PersonaView`, this is determined by `voiceAgentDetails`.
81
81
 
82
82
  The avatar can display emotional expressions (`neutral`, `angry`, `sad`, `happy`) that affect its facial expression and demeanor.
83
83
 
84
- ### Automatic Emotion Detection (ElevenLabs)
84
+ ### ElevenLabs: `set_emotion` Tool Call
85
85
 
86
- When using ElevenLabs as the voice agent, emotions are automatically detected from the agent's speech. The ElevenLabs agent parses emotion tags from audio alignment data (e.g., `[angry]`, `[happy]`) and the avatar expression updates in real-time.
86
+ When using ElevenLabs as the voice agent, emotions are driven by a **client tool call** named `set_emotion`. The ElevenLabs agent parses incoming `client_tool_call` WebSocket messages and, when the tool name is `set_emotion`, updates the avatar's expression accordingly.
87
87
 
88
- This requires no additional configuration—just configure your ElevenLabs agent to include emotion tags in its responses.
88
+ > **Important:** Transcripts from the ElevenLabs agent are **not** automatically consumed. The `transcript` event is emitted, but it is up to you to subscribe to it if you need transcript data.
89
+
90
+ #### Setup
91
+
92
+ You must create a `set_emotion` tool in the [ElevenLabs API](https://elevenlabs.io/docs) for your agent. The tool should accept a single parameter:
93
+
94
+ | Parameter | Type | Description |
95
+ | --------- | -------- | -------------------------------------------------------- |
96
+ | `emotion` | `enum` | One of `neutral`, `angry`, `sad`, `happy`. |
97
+
98
+ Then instruct your agent (via its system prompt) to call `set_emotion` on each turn with the appropriate emotion. The client library handles the rest — it validates the emotion, emits an `emotion` event, and sends a `client_tool_result` back to ElevenLabs.
89
99
 
90
100
  ### Manual Emotion Control
91
101
 
92
102
  For other agents or custom emotion logic, you can access the underlying session to set emotions manually:
93
103
 
94
104
  ```typescript
95
- // Access the underlying SDK session for manual control
96
- // (Available when using @keyframelabs/sdk directly)
97
105
  import { createClient } from '@keyframelabs/sdk';
98
106
 
99
107
  const session = createClient({ ... });
@@ -102,15 +110,15 @@ await session.setEmotion('happy');
102
110
 
103
111
  ### Agent Events
104
112
 
105
- The `emotion` event is emitted when an agent detects an emotion change:
113
+ The `emotion` event is emitted when the agent triggers a `set_emotion` tool call:
106
114
 
107
115
  ```typescript
108
116
  agent.on('emotion', (emotion) => {
109
- console.log('Emotion detected:', emotion); // 'neutral' | 'angry' | 'sad' | 'happy'
117
+ console.log('Emotion changed:', emotion); // 'neutral' | 'angry' | 'sad' | 'happy'
110
118
  });
111
119
  ```
112
120
 
113
- Currently, only the ElevenLabs agent emits emotion events.
121
+ Currently, only the ElevenLabs agent emits emotion events via tool calls.
114
122
 
115
123
  ## API
116
124
 
@@ -125,7 +133,7 @@ Currently, only the ElevenLabs agent emits emotion events.
125
133
  | `apiBaseUrl` | `string` | `'https://api.keyframelabs.com'` | Base URL for the Keyframe API. |
126
134
  | `videoFit` | `'cover' \| 'contain'` | `'cover'` | Video scaling mode (`object-fit`). |
127
135
  | `onStateChange` | `(status: EmbedStatus) => void` | — | Fired when connection status changes. |
128
- | `onAgentStateChange` | `(state: AgentState) => void` | — | Fired when agent state changes. |
136
+ | `onAgentStateChange` | `(state: AgentState) => void` | — | Fired when avatar playback state changes. Signaled by the GPU node via RPC, not the voice agent. |
129
137
  | `onDisconnect` | `() => void` | — | Fired when the session disconnects. |
130
138
  | `onError` | `(err: Error) => void` | — | Fired on fatal errors. |
131
139
 
@@ -142,7 +150,7 @@ Currently, only the ElevenLabs agent emits emotion events.
142
150
  | Property | Type | Description |
143
151
  | -------------- | ------------------ | -------------------------------------------------------------------------------------- |
144
152
  | `status` | `EmbedStatus` | Current connection status: `'connecting' \| 'connected' \| 'disconnected' \| 'error'`. |
145
- | `agentState` | `AgentState` | Current agent state: `'idle' \| 'listening' \| 'thinking' \| 'speaking'`. |
153
+ | `agentState` | `AgentState` | Avatar playback state: `'listening' \| 'speaking'`. Set by the GPU node, not the voice agent. |
146
154
  | `isMuted` | `boolean` | Whether the microphone is currently muted. |
147
155
  | `videoElement` | `HTMLVideoElement` | The underlying video element used for rendering. |
148
156
  | `audioElement` | `HTMLAudioElement` | The underlying audio element used for playback. |
@@ -1,4 +1,4 @@
1
- import { AgentState } from './agents';
1
+ import { AgentState } from '@keyframelabs/sdk';
2
2
  import { EmbedStatus, VideoFit, BaseCallbacks } from './types';
3
3
  export type { EmbedStatus, VideoFit } from './types';
4
4
  export interface PersonaEmbedOptions extends BaseCallbacks {
@@ -1,4 +1,4 @@
1
- import { AgentState } from './agents';
1
+ import { AgentState } from '@keyframelabs/sdk';
2
2
  import { EmbedStatus, VideoFit, VoiceAgentDetails, SessionDetails, BaseCallbacks } from './types';
3
3
  export interface PersonaViewOptions extends BaseCallbacks {
4
4
  /** Target container element */
@@ -20,7 +20,6 @@ export declare class ElevenLabsAgent extends BaseAgent {
20
20
  private sourceInputSampleRate;
21
21
  private initialized;
22
22
  private lastInterruptId;
23
- private emotionEmittedForEventId;
24
23
  connect(config: ElevenLabsConfig): Promise<void>;
25
24
  protected handleParsedMessage(message: unknown): void;
26
25
  private handleInitMetadata;
@@ -28,6 +27,7 @@ export declare class ElevenLabsAgent extends BaseAgent {
28
27
  private handleAudio;
29
28
  private handleUserTranscript;
30
29
  private handleAgentResponse;
30
+ private handleClientToolCall;
31
31
  private handleInterruption;
32
32
  sendAudio(pcmData: Uint8Array): void;
33
33
  /**
package/dist/index.d.ts CHANGED
@@ -4,7 +4,8 @@ export { PersonaView } from './PersonaView';
4
4
  export type { PersonaViewOptions } from './PersonaView';
5
5
  export type { EmbedStatus, VideoFit, VoiceAgentDetails, SessionDetails, BaseCallbacks, } from './types';
6
6
  export { createAgent, GeminiLiveAgent, ElevenLabsAgent, CartesiaAgent, BaseAgent, AGENT_REGISTRY, getAgentInfo, } from './agents';
7
- export type { AgentType, AgentState, AgentConfig, AgentEventMap, Agent, AnyAgent, AgentTypeInfo, GeminiLiveConfig, ElevenLabsConfig, CartesiaConfig, } from './agents';
7
+ export type { AgentType, AgentConfig, AgentEventMap, Agent, AnyAgent, AgentTypeInfo, GeminiLiveConfig, ElevenLabsConfig, CartesiaConfig, } from './agents';
8
+ export type { AgentState } from '@keyframelabs/sdk';
8
9
  export { floatTo16BitPCM, resamplePcm, base64ToBytes, bytesToBase64, SAMPLE_RATE, createEventEmitter, } from './agents';
9
10
  export { ApiError as KeyframeApiError } from './ApiError';
10
11
  export type { ApiErrorPayload as KeyframeApiErrorPayload } from './ApiError';
package/dist/index.js CHANGED
@@ -1,4 +1,4 @@
1
- import { createClient as w } from "@keyframelabs/sdk";
1
+ import { createClient as f } from "@keyframelabs/sdk";
2
2
  const l = 24e3;
3
3
  function g(i) {
4
4
  const e = atob(i), t = new Uint8Array(e.length);
@@ -12,17 +12,17 @@ function m(i) {
12
12
  e += String.fromCharCode(i[t]);
13
13
  return btoa(e);
14
14
  }
15
- function c(i, e, t) {
15
+ function h(i, e, t) {
16
16
  if (e === t)
17
17
  return i;
18
- const s = new Int16Array(i.buffer, i.byteOffset, i.length / 2), n = e / t, a = Math.floor(s.length / n), r = new Int16Array(a);
18
+ const s = new Int16Array(i.buffer, i.byteOffset, i.length / 2), n = e / t, a = Math.floor(s.length / n), d = new Int16Array(a);
19
19
  for (let o = 0; o < a; o++) {
20
- const _ = o * n, p = Math.floor(_), b = Math.min(p + 1, s.length - 1), v = _ - p;
21
- r[o] = Math.round(
20
+ const S = o * n, p = Math.floor(S), b = Math.min(p + 1, s.length - 1), v = S - p;
21
+ d[o] = Math.round(
22
22
  s[p] * (1 - v) + s[b] * v
23
23
  );
24
24
  }
25
- return new Uint8Array(r.buffer);
25
+ return new Uint8Array(d.buffer);
26
26
  }
27
27
  function E() {
28
28
  const i = /* @__PURE__ */ new Map();
@@ -41,7 +41,7 @@ function E() {
41
41
  }
42
42
  };
43
43
  }
44
- function f(i) {
44
+ function w(i) {
45
45
  const e = new Int16Array(i.length);
46
46
  for (let t = 0; t < i.length; t++) {
47
47
  const s = Math.max(-1, Math.min(1, i[t]));
@@ -49,12 +49,12 @@ function f(i) {
49
49
  }
50
50
  return new Uint8Array(e.buffer);
51
51
  }
52
- const I = 16e3;
52
+ const C = 16e3;
53
53
  class u {
54
54
  ws = null;
55
55
  _state = "idle";
56
56
  events = E();
57
- inputSampleRate = I;
57
+ inputSampleRate = C;
58
58
  /** Current agent state */
59
59
  get state() {
60
60
  return this._state;
@@ -113,7 +113,7 @@ class u {
113
113
  this.events.emit("closed", { code: e, reason: t });
114
114
  }
115
115
  }
116
- const A = "gemini-2.5-flash-native-audio-preview-12-2025", C = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent", k = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContentConstrained";
116
+ const A = "gemini-2.5-flash-native-audio-preview-12-2025", I = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent", k = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContentConstrained";
117
117
  class R extends u {
118
118
  agentName = "GeminiLive";
119
119
  async connect(e) {
@@ -122,8 +122,8 @@ class R extends u {
122
122
  if (!e.apiKey)
123
123
  throw new Error("Gemini API key is required");
124
124
  e.inputSampleRate && (this.inputSampleRate = e.inputSampleRate);
125
- const t = e.model ?? A, n = (e.authType ?? "api_key") === "ephemeral_token" ? `${k}?access_token=${encodeURIComponent(e.apiKey)}` : `${C}?key=${encodeURIComponent(e.apiKey)}`;
126
- return new Promise((a, r) => {
125
+ const t = e.model ?? A, n = (e.authType ?? "api_key") === "ephemeral_token" ? `${k}?access_token=${encodeURIComponent(e.apiKey)}` : `${I}?key=${encodeURIComponent(e.apiKey)}`;
126
+ return new Promise((a, d) => {
127
127
  this.ws = new WebSocket(n), this.ws.onopen = () => {
128
128
  const o = {
129
129
  setup: {
@@ -136,7 +136,7 @@ class R extends u {
136
136
  };
137
137
  this.ws.send(JSON.stringify(o)), this.setState("listening"), a();
138
138
  }, this.ws.onerror = () => {
139
- r(new Error("Failed to connect to Gemini Live"));
139
+ d(new Error("Failed to connect to Gemini Live"));
140
140
  }, this.ws.onclose = (o) => {
141
141
  this.ws = null, this.setState("idle"), this.emitClosed(o.code, o.reason);
142
142
  }, this.ws.onmessage = (o) => {
@@ -189,8 +189,8 @@ class R extends u {
189
189
  this.ws.send(JSON.stringify(t));
190
190
  }
191
191
  }
192
- const M = ["neutral", "angry", "sad", "happy"], x = "wss://api.elevenlabs.io/v1/convai/conversation";
193
- class S extends u {
192
+ const M = ["neutral", "angry", "sad", "happy"], T = "wss://api.elevenlabs.io/v1/convai/conversation";
193
+ class _ extends u {
194
194
  agentName = "ElevenLabs";
195
195
  outputSampleRate = 24e3;
196
196
  // Default, updated from metadata
@@ -202,8 +202,6 @@ class S extends u {
202
202
  // True after conversation_initiation_metadata received
203
203
  lastInterruptId = 0;
204
204
  // Track interruptions to filter stale audio
205
- emotionEmittedForEventId = -1;
206
- // Track which turn's emotion we've already emitted
207
205
  async connect(e) {
208
206
  if (this.ws)
209
207
  throw new Error("Already connected");
@@ -211,7 +209,7 @@ class S extends u {
211
209
  throw new Error("ElevenLabs agent ID or signed URL is required");
212
210
  e.inputSampleRate && (this.sourceInputSampleRate = e.inputSampleRate);
213
211
  let t;
214
- return e.signedUrl ? t = e.signedUrl : (t = `${x}?agent_id=${e.agentId}`, e.apiKey && (t += `&xi-api-key=${e.apiKey}`)), new Promise((s, n) => {
212
+ return e.signedUrl ? t = e.signedUrl : (t = `${T}?agent_id=${e.agentId}`, e.apiKey && (t += `&xi-api-key=${e.apiKey}`)), new Promise((s, n) => {
215
213
  this.ws = new WebSocket(t), this.ws.onopen = () => {
216
214
  this.setState("listening"), s();
217
215
  }, this.ws.onerror = () => {
@@ -244,6 +242,9 @@ class S extends u {
244
242
  case "interruption":
245
243
  this.handleInterruption(t);
246
244
  break;
245
+ case "client_tool_call":
246
+ this.handleClientToolCall(t);
247
+ break;
247
248
  case "agent_response_correction":
248
249
  this.setState("listening");
249
250
  break;
@@ -271,19 +272,11 @@ class S extends u {
271
272
  }
272
273
  handleAudio(e) {
273
274
  const t = e.audio_event;
274
- if (!t?.audio_base_64) return;
275
- const s = t.event_id ?? 0;
276
- if (s <= this.lastInterruptId)
275
+ if (!t?.audio_base_64 || (t.event_id ?? 0) <= this.lastInterruptId)
277
276
  return;
278
- if (this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking")), this.emotionEmittedForEventId !== s && t.alignment?.chars) {
279
- const r = t.alignment.chars.join("").match(/\[(\w+)\]/);
280
- if (r) {
281
- const o = r[1].toLowerCase();
282
- M.includes(o) && (this.events.emit("emotion", o), this.emotionEmittedForEventId = s);
283
- }
284
- }
277
+ this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
285
278
  let n = g(t.audio_base_64);
286
- this.outputSampleRate !== l && (n = c(n, this.outputSampleRate, l)), this.events.emit("audio", n);
279
+ this.outputSampleRate !== l && (n = h(n, this.outputSampleRate, l)), this.events.emit("audio", n);
287
280
  }
288
281
  handleUserTranscript(e) {
289
282
  const t = e.user_transcription_event;
@@ -301,6 +294,21 @@ class S extends u {
301
294
  isFinal: !0
302
295
  }));
303
296
  }
297
+ handleClientToolCall(e) {
298
+ const t = e.client_tool_call;
299
+ if (t) {
300
+ if (t.tool_name === "set_emotion") {
301
+ const s = t.parameters?.emotion?.toLowerCase();
302
+ s && M.includes(s) && this.events.emit("emotion", s);
303
+ }
304
+ this.ws && this.ws.readyState === WebSocket.OPEN && this.ws.send(JSON.stringify({
305
+ type: "client_tool_result",
306
+ tool_call_id: t.tool_call_id,
307
+ result: "ok",
308
+ is_error: !1
309
+ }));
310
+ }
311
+ }
304
312
  handleInterruption(e) {
305
313
  const t = e.interruption_event;
306
314
  t?.event_id && (this.lastInterruptId = t.event_id), this.events.emit("interrupted", void 0), this.setState("listening");
@@ -309,7 +317,7 @@ class S extends u {
309
317
  if (!this.ws || this.ws.readyState !== WebSocket.OPEN || !this.initialized)
310
318
  return;
311
319
  let t = e;
312
- this.sourceInputSampleRate !== this.expectedInputSampleRate && (t = c(e, this.sourceInputSampleRate, this.expectedInputSampleRate)), this.ws.send(JSON.stringify({
320
+ this.sourceInputSampleRate !== this.expectedInputSampleRate && (t = h(e, this.sourceInputSampleRate, this.expectedInputSampleRate)), this.ws.send(JSON.stringify({
313
321
  user_audio_chunk: m(t)
314
322
  }));
315
323
  }
@@ -343,7 +351,7 @@ class S extends u {
343
351
  this.initialized = !1, this.lastInterruptId = 0, super.close();
344
352
  }
345
353
  }
346
- const P = "wss://api.cartesia.ai/agents/stream", T = "2025-04-16";
354
+ const P = "wss://api.cartesia.ai/agents/stream", x = "2025-04-16";
347
355
  class O extends u {
348
356
  agentName = "Cartesia";
349
357
  // Audio configuration
@@ -363,7 +371,7 @@ class O extends u {
363
371
  if (!e.apiKey)
364
372
  throw new Error("Cartesia API Key is required");
365
373
  e.inputSampleRate && (this.inputSampleRate = e.inputSampleRate), this.inputSampleRate === 16e3 ? this.cartesiaInputFormat = "pcm_16000" : this.inputSampleRate === 24e3 ? this.cartesiaInputFormat = "pcm_24000" : this.inputSampleRate === 44100 ? this.cartesiaInputFormat = "pcm_44100" : this.cartesiaInputFormat = "pcm_16000";
366
- const t = `${P}/${e.agentId}?api_key=${e.apiKey}&cartesia_version=${T}`;
374
+ const t = `${P}/${e.agentId}?api_key=${e.apiKey}&cartesia_version=${x}`;
367
375
  return new Promise((s, n) => {
368
376
  this.ws = new WebSocket(t), this.ws.onopen = () => {
369
377
  this.sendStartEvent(), this.startHeartbeat(), s();
@@ -426,7 +434,7 @@ class O extends u {
426
434
  if (!e.media?.payload) return;
427
435
  this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
428
436
  let t = g(e.media.payload);
429
- this.cartesiaOutputRate !== l && (t = c(t, this.cartesiaOutputRate, l)), this.events.emit("audio", t);
437
+ this.cartesiaOutputRate !== l && (t = h(t, this.cartesiaOutputRate, l)), this.events.emit("audio", t);
430
438
  }
431
439
  handleClear() {
432
440
  this.events.emit("interrupted", void 0), this.setState("listening");
@@ -436,7 +444,7 @@ class O extends u {
436
444
  return;
437
445
  let t = e;
438
446
  const s = parseInt(this.cartesiaInputFormat.split("_")[1]);
439
- this.inputSampleRate !== s && (t = c(e, this.inputSampleRate, s)), this.ws.send(JSON.stringify({
447
+ this.inputSampleRate !== s && (t = h(e, this.inputSampleRate, s)), this.ws.send(JSON.stringify({
440
448
  event: "media_input",
441
449
  stream_id: this.streamId,
442
450
  media: {
@@ -475,7 +483,7 @@ class N extends u {
475
483
  */
476
484
  handleBinaryAudio(e) {
477
485
  this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
478
- const t = new Uint8Array(e), s = this.vapiSampleRate !== l ? c(t, this.vapiSampleRate, l) : t;
486
+ const t = new Uint8Array(e), s = this.vapiSampleRate !== l ? h(t, this.vapiSampleRate, l) : t;
479
487
  this.events.emit("audio", s);
480
488
  }
481
489
  handleParsedMessage(e) {
@@ -515,7 +523,7 @@ class N extends u {
515
523
  if (!this.ws || this.ws.readyState !== WebSocket.OPEN)
516
524
  return;
517
525
  let t = e;
518
- this.inputSampleRate !== this.vapiSampleRate && (t = c(e, this.inputSampleRate, this.vapiSampleRate)), this.ws.send(t.buffer);
526
+ this.inputSampleRate !== this.vapiSampleRate && (t = h(e, this.inputSampleRate, this.vapiSampleRate)), this.ws.send(t.buffer);
519
527
  }
520
528
  /**
521
529
  * Send a control message to end the call.
@@ -538,7 +546,7 @@ function y(i) {
538
546
  case "gemini":
539
547
  return new R();
540
548
  case "elevenlabs":
541
- return new S();
549
+ return new _();
542
550
  case "cartesia":
543
551
  return new O();
544
552
  case "vapi":
@@ -558,7 +566,7 @@ class D extends Error {
558
566
  super(e.message), this.name = "ApiError", this.status = e.status, this.payload = e.payload, this.url = e.url;
559
567
  }
560
568
  }
561
- const h = /* @__PURE__ */ new Set();
569
+ const r = /* @__PURE__ */ new Set();
562
570
  class K {
563
571
  apiBaseUrl;
564
572
  publishableKey;
@@ -574,7 +582,7 @@ class K {
574
582
  stream = null;
575
583
  abortController = null;
576
584
  _status = "disconnected";
577
- _agentState = "idle";
585
+ _agentState = "listening";
578
586
  _isMuted = !1;
579
587
  mounted = !0;
580
588
  constructor(e) {
@@ -603,31 +611,31 @@ class K {
603
611
  }
604
612
  /** Connect to the embed session */
605
613
  async connect() {
606
- if (h.has(this.publishableKey)) {
614
+ if (r.has(this.publishableKey)) {
607
615
  console.log("[PersonaEmbed] Connection already in progress, skipping");
608
616
  return;
609
617
  }
610
- h.add(this.publishableKey), this.mounted = !0, this.abortController = new AbortController(), this.setStatus("connecting");
618
+ r.add(this.publishableKey), this.mounted = !0, this.abortController = new AbortController(), this.setStatus("connecting");
611
619
  try {
612
620
  const e = await this.fetchSession(this.abortController.signal);
613
621
  if (!this.mounted) {
614
- h.delete(this.publishableKey);
622
+ r.delete(this.publishableKey);
615
623
  return;
616
624
  }
617
625
  if (await this.initSession(e), await this.initMicrophone(), await this.connectAgent(e.voice_agent_details), !this.mounted) {
618
- this.cleanup(), h.delete(this.publishableKey);
626
+ this.cleanup(), r.delete(this.publishableKey);
619
627
  return;
620
628
  }
621
629
  this.setStatus("connected");
622
630
  } catch (e) {
623
- if (h.delete(this.publishableKey), e instanceof Error && e.name === "AbortError")
631
+ if (r.delete(this.publishableKey), e instanceof Error && e.name === "AbortError")
624
632
  return;
625
633
  console.error("[PersonaEmbed]", e), this.mounted && (this.setStatus("error"), this.callbacks.onError?.(e));
626
634
  }
627
635
  }
628
636
  /** Disconnect and cleanup */
629
637
  disconnect() {
630
- this.mounted = !1, this.abortController?.abort(), this.abortController = null, h.delete(this.publishableKey), this.cleanup(), this.setStatus("disconnected");
638
+ this.mounted = !1, this.abortController?.abort(), this.abortController = null, r.delete(this.publishableKey), this.cleanup(), this.setStatus("disconnected");
631
639
  }
632
640
  /** Toggle microphone mute */
633
641
  toggleMute() {
@@ -666,7 +674,7 @@ class K {
666
674
  return t.json();
667
675
  }
668
676
  async initSession(e) {
669
- this.session = w({
677
+ this.session = f({
670
678
  serverUrl: e.session_details.server_url,
671
679
  participantToken: e.session_details.participant_token,
672
680
  agentIdentity: e.session_details.agent_identity,
@@ -680,6 +688,9 @@ class K {
680
688
  onStateChange: (t) => {
681
689
  this.mounted && t === "disconnected" && (this.setStatus("disconnected"), this.callbacks.onDisconnect?.());
682
690
  },
691
+ onAgentStateChange: (t) => {
692
+ this.mounted && this.setAgentState(t);
693
+ },
683
694
  onError: (t) => {
684
695
  this.mounted && this.callbacks.onError?.(t);
685
696
  },
@@ -688,9 +699,9 @@ class K {
688
699
  }
689
700
  }), this.agent = y(e.voice_agent_details.type), this.agent.on("audio", (t) => this.session?.sendAudio(t)), this.agent.on("turnEnd", () => this.session?.endAudioTurn()), this.agent.on("interrupted", () => {
690
701
  this.session?.endAudioTurn(), this.session?.interrupt();
691
- }), this.agent.on("stateChange", (t) => this.setAgentState(t)), this.agent.on("closed", () => {
702
+ }), this.agent.on("closed", () => {
692
703
  this.mounted && this.callbacks.onDisconnect?.();
693
- }), this.agent instanceof S && this.agent.on("emotion", (t) => this.session?.setEmotion(t)), await this.session.connect();
704
+ }), this.agent instanceof _ && this.agent.on("emotion", (t) => this.session?.setEmotion(t)), await this.session.connect();
694
705
  }
695
706
  async initMicrophone() {
696
707
  this.stream = await navigator.mediaDevices.getUserMedia({
@@ -699,7 +710,7 @@ class K {
699
710
  const e = this.audioContext.createMediaStreamSource(this.stream);
700
711
  this.processor = this.audioContext.createScriptProcessor(4096, 1, 1), this.processor.onaudioprocess = (t) => {
701
712
  if (!this._isMuted) {
702
- const s = f(t.inputBuffer.getChannelData(0));
713
+ const s = w(t.inputBuffer.getChannelData(0));
703
714
  this.agent?.sendAudio(s);
704
715
  }
705
716
  }, e.connect(this.processor), this.processor.connect(this.audioContext.destination);
@@ -728,7 +739,7 @@ class K {
728
739
  this.stream?.getTracks().forEach((e) => e.stop()), this.processor?.disconnect(), this.audioContext?.close(), this.agent?.close(), this.session?.close(), this.stream = null, this.processor = null, this.audioContext = null, this.agent = null, this.session = null;
729
740
  }
730
741
  }
731
- const d = /* @__PURE__ */ new Set();
742
+ const c = /* @__PURE__ */ new Set();
732
743
  class B {
733
744
  voiceAgentDetails;
734
745
  sessionDetails;
@@ -744,7 +755,7 @@ class B {
744
755
  processor = null;
745
756
  stream = null;
746
757
  _status = "disconnected";
747
- _agentState = "idle";
758
+ _agentState = "listening";
748
759
  _isMuted = !1;
749
760
  mounted = !0;
750
761
  constructor(e) {
@@ -773,24 +784,24 @@ class B {
773
784
  }
774
785
  /** Connect to the session */
775
786
  async connect() {
776
- if (d.has(this.connectionId)) {
787
+ if (c.has(this.connectionId)) {
777
788
  console.log("[PersonaView] Connection already in progress, skipping");
778
789
  return;
779
790
  }
780
- d.add(this.connectionId), this.mounted = !0, this.setStatus("connecting");
791
+ c.add(this.connectionId), this.mounted = !0, this.setStatus("connecting");
781
792
  try {
782
793
  if (await this.initSession(), await this.initMicrophone(), await this.connectAgent(), !this.mounted) {
783
- this.cleanup(), d.delete(this.connectionId);
794
+ this.cleanup(), c.delete(this.connectionId);
784
795
  return;
785
796
  }
786
797
  this.setStatus("connected");
787
798
  } catch (e) {
788
- d.delete(this.connectionId), console.error("[PersonaView]", e), this.mounted && (this.setStatus("error"), this.callbacks.onError?.(e));
799
+ c.delete(this.connectionId), console.error("[PersonaView]", e), this.mounted && (this.setStatus("error"), this.callbacks.onError?.(e));
789
800
  }
790
801
  }
791
802
  /** Disconnect and cleanup */
792
803
  disconnect() {
793
- this.mounted = !1, d.delete(this.connectionId), this.cleanup(), this.setStatus("disconnected");
804
+ this.mounted = !1, c.delete(this.connectionId), this.cleanup(), this.setStatus("disconnected");
794
805
  }
795
806
  /** Toggle microphone mute */
796
807
  toggleMute() {
@@ -803,7 +814,7 @@ class B {
803
814
  this._agentState !== e && (this._agentState = e, this.callbacks.onAgentStateChange?.(e));
804
815
  }
805
816
  async initSession() {
806
- this.session = w({
817
+ this.session = f({
807
818
  serverUrl: this.sessionDetails.server_url,
808
819
  participantToken: this.sessionDetails.participant_token,
809
820
  agentIdentity: this.sessionDetails.agent_identity,
@@ -817,6 +828,9 @@ class B {
817
828
  onStateChange: (e) => {
818
829
  this.mounted && e === "disconnected" && (this.setStatus("disconnected"), this.callbacks.onDisconnect?.());
819
830
  },
831
+ onAgentStateChange: (e) => {
832
+ this.mounted && this.setAgentState(e);
833
+ },
820
834
  onError: (e) => {
821
835
  this.mounted && this.callbacks.onError?.(e);
822
836
  },
@@ -825,9 +839,9 @@ class B {
825
839
  }
826
840
  }), this.agent = y(this.voiceAgentDetails.type), this.agent.on("audio", (e) => this.session?.sendAudio(e)), this.agent.on("turnEnd", () => this.session?.endAudioTurn()), this.agent.on("interrupted", () => {
827
841
  this.session?.endAudioTurn(), this.session?.interrupt();
828
- }), this.agent.on("stateChange", (e) => this.setAgentState(e)), this.agent.on("closed", () => {
842
+ }), this.agent.on("closed", () => {
829
843
  this.mounted && this.callbacks.onDisconnect?.();
830
- }), this.agent instanceof S && this.agent.on("emotion", (e) => this.session?.setEmotion(e)), await this.session.connect();
844
+ }), this.agent instanceof _ && this.agent.on("emotion", (e) => this.session?.setEmotion(e)), await this.session.connect();
831
845
  }
832
846
  async initMicrophone() {
833
847
  this.stream = await navigator.mediaDevices.getUserMedia({
@@ -836,7 +850,7 @@ class B {
836
850
  const e = this.audioContext.createMediaStreamSource(this.stream);
837
851
  this.processor = this.audioContext.createScriptProcessor(4096, 1, 1), this.processor.onaudioprocess = (t) => {
838
852
  if (!this._isMuted) {
839
- const s = f(t.inputBuffer.getChannelData(0));
853
+ const s = w(t.inputBuffer.getChannelData(0));
840
854
  this.agent?.sendAudio(s);
841
855
  }
842
856
  }, e.connect(this.processor), this.processor.connect(this.audioContext.destination);
@@ -869,7 +883,7 @@ export {
869
883
  U as AGENT_REGISTRY,
870
884
  u as BaseAgent,
871
885
  O as CartesiaAgent,
872
- S as ElevenLabsAgent,
886
+ _ as ElevenLabsAgent,
873
887
  R as GeminiLiveAgent,
874
888
  D as KeyframeApiError,
875
889
  K as PersonaEmbed,
@@ -879,7 +893,7 @@ export {
879
893
  m as bytesToBase64,
880
894
  y as createAgent,
881
895
  E as createEventEmitter,
882
- f as floatTo16BitPCM,
896
+ w as floatTo16BitPCM,
883
897
  F as getAgentInfo,
884
- c as resamplePcm
898
+ h as resamplePcm
885
899
  };
package/dist/types.d.ts CHANGED
@@ -1,4 +1,5 @@
1
- import { AgentType, AgentState } from './agents';
1
+ import { AgentType } from './agents';
2
+ import { AgentState } from '@keyframelabs/sdk';
2
3
  export type EmbedStatus = 'connecting' | 'connected' | 'error' | 'disconnected';
3
4
  export type VideoFit = 'cover' | 'contain';
4
5
  export type VoiceAgentDetails = {
package/package.json CHANGED
@@ -4,7 +4,7 @@
4
4
  "publishConfig": {
5
5
  "access": "public"
6
6
  },
7
- "version": "0.0.9",
7
+ "version": "0.2.0",
8
8
  "type": "module",
9
9
  "main": "./dist/index.js",
10
10
  "types": "./dist/index.d.ts",
@@ -19,7 +19,7 @@
19
19
  ],
20
20
  "sideEffects": false,
21
21
  "dependencies": {
22
- "@keyframelabs/sdk": "0.1.7"
22
+ "@keyframelabs/sdk": "0.1.8"
23
23
  },
24
24
  "devDependencies": {
25
25
  "@types/node": "^25.0.9",