npm - @keyframelabs/elements - Versions diffs - 0.1.0 → 0.2.0 - Mend

@keyframelabs/elements 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md +16 -8
package/dist/agents/elevenlabs.d.ts +1 -1
package/dist/index.js +60 -52
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -81,19 +81,27 @@ For `PersonaView`, this is determined by `voiceAgentDetails`.
 The avatar can display emotional expressions (`neutral`, `angry`, `sad`, `happy`) that affect its facial expression and demeanor.
-### Automatic Emotion Detection (ElevenLabs)
+### ElevenLabs: `set_emotion` Tool Call
-When using ElevenLabs as the voice agent, emotions are automatically detected from the agent's speech. The ElevenLabs agent parses emotion tags from audio alignment data (e.g., `[angry]`, `[happy]`) and the avatar expression updates in real-time.
+When using ElevenLabs as the voice agent, emotions are driven by a **client tool call** named `set_emotion`. The ElevenLabs agent parses incoming `client_tool_call` WebSocket messages and, when the tool name is `set_emotion`, updates the avatar's expression accordingly.
-This requires no additional configuration—just configure your ElevenLabs agent to include emotion tags in its responses.
+> **Important:** Transcripts from the ElevenLabs agent are **not** automatically consumed. The `transcript` event is emitted, but it is up to you to subscribe to it if you need transcript data.
+#### Setup
+You must create a `set_emotion` tool in the [ElevenLabs API](https://elevenlabs.io/docs) for your agent. The tool should accept a single parameter:
+| Parameter | Type     | Description                                              |
+| --------- | -------- | -------------------------------------------------------- |
+| `emotion` | `enum` | One of `neutral`, `angry`, `sad`, `happy`. |
+Then instruct your agent (via its system prompt) to call `set_emotion` on each turn with the appropriate emotion. The client library handles the rest — it validates the emotion, emits an `emotion` event, and sends a `client_tool_result` back to ElevenLabs.
 ### Manual Emotion Control
 For other agents or custom emotion logic, you can access the underlying session to set emotions manually:
 ```typescript
-// Access the underlying SDK session for manual control
-// (Available when using @keyframelabs/sdk directly)
 import { createClient } from '@keyframelabs/sdk';
 const session = createClient({ ... });
@@ -102,15 +110,15 @@ await session.setEmotion('happy');
 ### Agent Events
-The `emotion` event is emitted when an agent detects an emotion change:
+The `emotion` event is emitted when the agent triggers a `set_emotion` tool call:
 ```typescript
 agent.on('emotion', (emotion) => {
-  console.log('Emotion detected:', emotion); // 'neutral' | 'angry' | 'sad' | 'happy'
+  console.log('Emotion changed:', emotion); // 'neutral' | 'angry' | 'sad' | 'happy'
 });
 ```
-Currently, only the ElevenLabs agent emits emotion events.
+Currently, only the ElevenLabs agent emits emotion events via tool calls.
 ## API

package/dist/agents/elevenlabs.d.ts CHANGED Viewed

@@ -20,7 +20,6 @@ export declare class ElevenLabsAgent extends BaseAgent {
     private sourceInputSampleRate;
     private initialized;
     private lastInterruptId;
-    private emotionEmittedForEventId;
     connect(config: ElevenLabsConfig): Promise<void>;
     protected handleParsedMessage(message: unknown): void;
     private handleInitMetadata;
@@ -28,6 +27,7 @@ export declare class ElevenLabsAgent extends BaseAgent {
     private handleAudio;
     private handleUserTranscript;
     private handleAgentResponse;
+    private handleClientToolCall;
     private handleInterruption;
     sendAudio(pcmData: Uint8Array): void;
     /**

package/dist/index.js CHANGED Viewed

@@ -12,17 +12,17 @@ function m(i) {
     e += String.fromCharCode(i[t]);
   return btoa(e);
 }
-function c(i, e, t) {
+function h(i, e, t) {
   if (e === t)
     return i;
-  const s = new Int16Array(i.buffer, i.byteOffset, i.length / 2), n = e / t, a = Math.floor(s.length / n), r = new Int16Array(a);
+  const s = new Int16Array(i.buffer, i.byteOffset, i.length / 2), n = e / t, a = Math.floor(s.length / n), d = new Int16Array(a);
   for (let o = 0; o < a; o++) {
-    const _ = o * n, p = Math.floor(_), b = Math.min(p + 1, s.length - 1), v = _ - p;
-    r[o] = Math.round(
+    const S = o * n, p = Math.floor(S), b = Math.min(p + 1, s.length - 1), v = S - p;
+    d[o] = Math.round(
       s[p] * (1 - v) + s[b] * v
     );
   }
-  return new Uint8Array(r.buffer);
+  return new Uint8Array(d.buffer);
 }
 function E() {
   const i = /* @__PURE__ */ new Map();
@@ -49,12 +49,12 @@ function w(i) {
   }
   return new Uint8Array(e.buffer);
 }
-const I = 16e3;
+const C = 16e3;
 class u {
   ws = null;
   _state = "idle";
   events = E();
-  inputSampleRate = I;
+  inputSampleRate = C;
   /** Current agent state */
   get state() {
     return this._state;
@@ -113,7 +113,7 @@ class u {
     this.events.emit("closed", { code: e, reason: t });
   }
 }
-const A = "gemini-2.5-flash-native-audio-preview-12-2025", C = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent", k = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContentConstrained";
+const A = "gemini-2.5-flash-native-audio-preview-12-2025", I = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent", k = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContentConstrained";
 class R extends u {
   agentName = "GeminiLive";
   async connect(e) {
@@ -122,8 +122,8 @@ class R extends u {
     if (!e.apiKey)
       throw new Error("Gemini API key is required");
     e.inputSampleRate && (this.inputSampleRate = e.inputSampleRate);
-    const t = e.model ?? A, n = (e.authType ?? "api_key") === "ephemeral_token" ? `${k}?access_token=${encodeURIComponent(e.apiKey)}` : `${C}?key=${encodeURIComponent(e.apiKey)}`;
-    return new Promise((a, r) => {
+    const t = e.model ?? A, n = (e.authType ?? "api_key") === "ephemeral_token" ? `${k}?access_token=${encodeURIComponent(e.apiKey)}` : `${I}?key=${encodeURIComponent(e.apiKey)}`;
+    return new Promise((a, d) => {
       this.ws = new WebSocket(n), this.ws.onopen = () => {
         const o = {
           setup: {
@@ -136,7 +136,7 @@ class R extends u {
         };
         this.ws.send(JSON.stringify(o)), this.setState("listening"), a();
       }, this.ws.onerror = () => {
-        r(new Error("Failed to connect to Gemini Live"));
+        d(new Error("Failed to connect to Gemini Live"));
       }, this.ws.onclose = (o) => {
         this.ws = null, this.setState("idle"), this.emitClosed(o.code, o.reason);
       }, this.ws.onmessage = (o) => {
@@ -189,8 +189,8 @@ class R extends u {
     this.ws.send(JSON.stringify(t));
   }
 }
-const M = ["neutral", "angry", "sad", "happy"], x = "wss://api.elevenlabs.io/v1/convai/conversation";
-class S extends u {
+const M = ["neutral", "angry", "sad", "happy"], T = "wss://api.elevenlabs.io/v1/convai/conversation";
+class _ extends u {
   agentName = "ElevenLabs";
   outputSampleRate = 24e3;
   // Default, updated from metadata
@@ -202,8 +202,6 @@ class S extends u {
   // True after conversation_initiation_metadata received
   lastInterruptId = 0;
   // Track interruptions to filter stale audio
-  emotionEmittedForEventId = -1;
-  // Track which turn's emotion we've already emitted
   async connect(e) {
     if (this.ws)
       throw new Error("Already connected");
@@ -211,7 +209,7 @@ class S extends u {
       throw new Error("ElevenLabs agent ID or signed URL is required");
     e.inputSampleRate && (this.sourceInputSampleRate = e.inputSampleRate);
     let t;
-    return e.signedUrl ? t = e.signedUrl : (t = `${x}?agent_id=${e.agentId}`, e.apiKey && (t += `&xi-api-key=${e.apiKey}`)), new Promise((s, n) => {
+    return e.signedUrl ? t = e.signedUrl : (t = `${T}?agent_id=${e.agentId}`, e.apiKey && (t += `&xi-api-key=${e.apiKey}`)), new Promise((s, n) => {
       this.ws = new WebSocket(t), this.ws.onopen = () => {
         this.setState("listening"), s();
       }, this.ws.onerror = () => {
@@ -244,6 +242,9 @@ class S extends u {
       case "interruption":
         this.handleInterruption(t);
         break;
+      case "client_tool_call":
+        this.handleClientToolCall(t);
+        break;
       case "agent_response_correction":
         this.setState("listening");
         break;
@@ -271,19 +272,11 @@ class S extends u {
   }
   handleAudio(e) {
     const t = e.audio_event;
-    if (!t?.audio_base_64) return;
-    const s = t.event_id ?? 0;
-    if (s <= this.lastInterruptId)
+    if (!t?.audio_base_64 || (t.event_id ?? 0) <= this.lastInterruptId)
       return;
-    if (this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking")), this.emotionEmittedForEventId !== s && t.alignment?.chars) {
-      const r = t.alignment.chars.join("").match(/\[(\w+)\]/);
-      if (r) {
-        const o = r[1].toLowerCase();
-        M.includes(o) && (this.events.emit("emotion", o), this.emotionEmittedForEventId = s);
-      }
-    }
+    this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
     let n = g(t.audio_base_64);
-    this.outputSampleRate !== l && (n = c(n, this.outputSampleRate, l)), this.events.emit("audio", n);
+    this.outputSampleRate !== l && (n = h(n, this.outputSampleRate, l)), this.events.emit("audio", n);
   }
   handleUserTranscript(e) {
     const t = e.user_transcription_event;
@@ -301,6 +294,21 @@ class S extends u {
       isFinal: !0
     }));
   }
+  handleClientToolCall(e) {
+    const t = e.client_tool_call;
+    if (t) {
+      if (t.tool_name === "set_emotion") {
+        const s = t.parameters?.emotion?.toLowerCase();
+        s && M.includes(s) && this.events.emit("emotion", s);
+      }
+      this.ws && this.ws.readyState === WebSocket.OPEN && this.ws.send(JSON.stringify({
+        type: "client_tool_result",
+        tool_call_id: t.tool_call_id,
+        result: "ok",
+        is_error: !1
+      }));
+    }
+  }
   handleInterruption(e) {
     const t = e.interruption_event;
     t?.event_id && (this.lastInterruptId = t.event_id), this.events.emit("interrupted", void 0), this.setState("listening");
@@ -309,7 +317,7 @@ class S extends u {
     if (!this.ws || this.ws.readyState !== WebSocket.OPEN || !this.initialized)
       return;
     let t = e;
-    this.sourceInputSampleRate !== this.expectedInputSampleRate && (t = c(e, this.sourceInputSampleRate, this.expectedInputSampleRate)), this.ws.send(JSON.stringify({
+    this.sourceInputSampleRate !== this.expectedInputSampleRate && (t = h(e, this.sourceInputSampleRate, this.expectedInputSampleRate)), this.ws.send(JSON.stringify({
       user_audio_chunk: m(t)
     }));
   }
@@ -343,7 +351,7 @@ class S extends u {
     this.initialized = !1, this.lastInterruptId = 0, super.close();
   }
 }
-const P = "wss://api.cartesia.ai/agents/stream", T = "2025-04-16";
+const P = "wss://api.cartesia.ai/agents/stream", x = "2025-04-16";
 class O extends u {
   agentName = "Cartesia";
   // Audio configuration
@@ -363,7 +371,7 @@ class O extends u {
     if (!e.apiKey)
       throw new Error("Cartesia API Key is required");
     e.inputSampleRate && (this.inputSampleRate = e.inputSampleRate), this.inputSampleRate === 16e3 ? this.cartesiaInputFormat = "pcm_16000" : this.inputSampleRate === 24e3 ? this.cartesiaInputFormat = "pcm_24000" : this.inputSampleRate === 44100 ? this.cartesiaInputFormat = "pcm_44100" : this.cartesiaInputFormat = "pcm_16000";
-    const t = `${P}/${e.agentId}?api_key=${e.apiKey}&cartesia_version=${T}`;
+    const t = `${P}/${e.agentId}?api_key=${e.apiKey}&cartesia_version=${x}`;
     return new Promise((s, n) => {
       this.ws = new WebSocket(t), this.ws.onopen = () => {
         this.sendStartEvent(), this.startHeartbeat(), s();
@@ -426,7 +434,7 @@ class O extends u {
     if (!e.media?.payload) return;
     this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
     let t = g(e.media.payload);
-    this.cartesiaOutputRate !== l && (t = c(t, this.cartesiaOutputRate, l)), this.events.emit("audio", t);
+    this.cartesiaOutputRate !== l && (t = h(t, this.cartesiaOutputRate, l)), this.events.emit("audio", t);
   }
   handleClear() {
     this.events.emit("interrupted", void 0), this.setState("listening");
@@ -436,7 +444,7 @@ class O extends u {
       return;
     let t = e;
     const s = parseInt(this.cartesiaInputFormat.split("_")[1]);
-    this.inputSampleRate !== s && (t = c(e, this.inputSampleRate, s)), this.ws.send(JSON.stringify({
+    this.inputSampleRate !== s && (t = h(e, this.inputSampleRate, s)), this.ws.send(JSON.stringify({
       event: "media_input",
       stream_id: this.streamId,
       media: {
@@ -475,7 +483,7 @@ class N extends u {
    */
   handleBinaryAudio(e) {
     this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
-    const t = new Uint8Array(e), s = this.vapiSampleRate !== l ? c(t, this.vapiSampleRate, l) : t;
+    const t = new Uint8Array(e), s = this.vapiSampleRate !== l ? h(t, this.vapiSampleRate, l) : t;
     this.events.emit("audio", s);
   }
   handleParsedMessage(e) {
@@ -515,7 +523,7 @@ class N extends u {
     if (!this.ws || this.ws.readyState !== WebSocket.OPEN)
       return;
     let t = e;
-    this.inputSampleRate !== this.vapiSampleRate && (t = c(e, this.inputSampleRate, this.vapiSampleRate)), this.ws.send(t.buffer);
+    this.inputSampleRate !== this.vapiSampleRate && (t = h(e, this.inputSampleRate, this.vapiSampleRate)), this.ws.send(t.buffer);
   }
   /**
    * Send a control message to end the call.
@@ -538,7 +546,7 @@ function y(i) {
     case "gemini":
       return new R();
     case "elevenlabs":
-      return new S();
+      return new _();
     case "cartesia":
       return new O();
     case "vapi":
@@ -558,7 +566,7 @@ class D extends Error {
     super(e.message), this.name = "ApiError", this.status = e.status, this.payload = e.payload, this.url = e.url;
   }
 }
-const h = /* @__PURE__ */ new Set();
+const r = /* @__PURE__ */ new Set();
 class K {
   apiBaseUrl;
   publishableKey;
@@ -603,31 +611,31 @@ class K {
   }
   /** Connect to the embed session */
   async connect() {
-    if (h.has(this.publishableKey)) {
+    if (r.has(this.publishableKey)) {
       console.log("[PersonaEmbed] Connection already in progress, skipping");
       return;
     }
-    h.add(this.publishableKey), this.mounted = !0, this.abortController = new AbortController(), this.setStatus("connecting");
+    r.add(this.publishableKey), this.mounted = !0, this.abortController = new AbortController(), this.setStatus("connecting");
     try {
       const e = await this.fetchSession(this.abortController.signal);
       if (!this.mounted) {
-        h.delete(this.publishableKey);
+        r.delete(this.publishableKey);
         return;
       }
       if (await this.initSession(e), await this.initMicrophone(), await this.connectAgent(e.voice_agent_details), !this.mounted) {
-        this.cleanup(), h.delete(this.publishableKey);
+        this.cleanup(), r.delete(this.publishableKey);
         return;
       }
       this.setStatus("connected");
     } catch (e) {
-      if (h.delete(this.publishableKey), e instanceof Error && e.name === "AbortError")
+      if (r.delete(this.publishableKey), e instanceof Error && e.name === "AbortError")
         return;
       console.error("[PersonaEmbed]", e), this.mounted && (this.setStatus("error"), this.callbacks.onError?.(e));
     }
   }
   /** Disconnect and cleanup */
   disconnect() {
-    this.mounted = !1, this.abortController?.abort(), this.abortController = null, h.delete(this.publishableKey), this.cleanup(), this.setStatus("disconnected");
+    this.mounted = !1, this.abortController?.abort(), this.abortController = null, r.delete(this.publishableKey), this.cleanup(), this.setStatus("disconnected");
   }
   /** Toggle microphone mute */
   toggleMute() {
@@ -693,7 +701,7 @@ class K {
       this.session?.endAudioTurn(), this.session?.interrupt();
     }), this.agent.on("closed", () => {
       this.mounted && this.callbacks.onDisconnect?.();
-    }), this.agent instanceof S && this.agent.on("emotion", (t) => this.session?.setEmotion(t)), await this.session.connect();
+    }), this.agent instanceof _ && this.agent.on("emotion", (t) => this.session?.setEmotion(t)), await this.session.connect();
   }
   async initMicrophone() {
     this.stream = await navigator.mediaDevices.getUserMedia({
@@ -731,7 +739,7 @@ class K {
     this.stream?.getTracks().forEach((e) => e.stop()), this.processor?.disconnect(), this.audioContext?.close(), this.agent?.close(), this.session?.close(), this.stream = null, this.processor = null, this.audioContext = null, this.agent = null, this.session = null;
   }
 }
-const d = /* @__PURE__ */ new Set();
+const c = /* @__PURE__ */ new Set();
 class B {
   voiceAgentDetails;
   sessionDetails;
@@ -776,24 +784,24 @@ class B {
   }
   /** Connect to the session */
   async connect() {
-    if (d.has(this.connectionId)) {
+    if (c.has(this.connectionId)) {
       console.log("[PersonaView] Connection already in progress, skipping");
       return;
     }
-    d.add(this.connectionId), this.mounted = !0, this.setStatus("connecting");
+    c.add(this.connectionId), this.mounted = !0, this.setStatus("connecting");
     try {
       if (await this.initSession(), await this.initMicrophone(), await this.connectAgent(), !this.mounted) {
-        this.cleanup(), d.delete(this.connectionId);
+        this.cleanup(), c.delete(this.connectionId);
         return;
       }
       this.setStatus("connected");
     } catch (e) {
-      d.delete(this.connectionId), console.error("[PersonaView]", e), this.mounted && (this.setStatus("error"), this.callbacks.onError?.(e));
+      c.delete(this.connectionId), console.error("[PersonaView]", e), this.mounted && (this.setStatus("error"), this.callbacks.onError?.(e));
     }
   }
   /** Disconnect and cleanup */
   disconnect() {
-    this.mounted = !1, d.delete(this.connectionId), this.cleanup(), this.setStatus("disconnected");
+    this.mounted = !1, c.delete(this.connectionId), this.cleanup(), this.setStatus("disconnected");
   }
   /** Toggle microphone mute */
   toggleMute() {
@@ -833,7 +841,7 @@ class B {
       this.session?.endAudioTurn(), this.session?.interrupt();
     }), this.agent.on("closed", () => {
       this.mounted && this.callbacks.onDisconnect?.();
-    }), this.agent instanceof S && this.agent.on("emotion", (e) => this.session?.setEmotion(e)), await this.session.connect();
+    }), this.agent instanceof _ && this.agent.on("emotion", (e) => this.session?.setEmotion(e)), await this.session.connect();
   }
   async initMicrophone() {
     this.stream = await navigator.mediaDevices.getUserMedia({
@@ -875,7 +883,7 @@ export {
   U as AGENT_REGISTRY,
   u as BaseAgent,
   O as CartesiaAgent,
-  S as ElevenLabsAgent,
+  _ as ElevenLabsAgent,
   R as GeminiLiveAgent,
   D as KeyframeApiError,
   K as PersonaEmbed,
@@ -887,5 +895,5 @@ export {
   E as createEventEmitter,
   w as floatTo16BitPCM,
   F as getAgentInfo,
-  c as resamplePcm
+  h as resamplePcm
 };

package/package.json CHANGED Viewed

@@ -4,7 +4,7 @@
   "publishConfig": {
     "access": "public"
   },
-  "version": "0.1.0",
+  "version": "0.2.0",
   "type": "module",
   "main": "./dist/index.js",
   "types": "./dist/index.d.ts",