@keyframelabs/elements 0.0.9 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -10
- package/dist/PersonaEmbed.d.ts +1 -1
- package/dist/PersonaView.d.ts +1 -1
- package/dist/agents/elevenlabs.d.ts +1 -1
- package/dist/index.d.ts +2 -1
- package/dist/index.js +77 -63
- package/dist/types.d.ts +2 -1
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -81,19 +81,27 @@ For `PersonaView`, this is determined by `voiceAgentDetails`.
|
|
|
81
81
|
|
|
82
82
|
The avatar can display emotional expressions (`neutral`, `angry`, `sad`, `happy`) that affect its facial expression and demeanor.
|
|
83
83
|
|
|
84
|
-
###
|
|
84
|
+
### ElevenLabs: `set_emotion` Tool Call
|
|
85
85
|
|
|
86
|
-
When using ElevenLabs as the voice agent, emotions are
|
|
86
|
+
When using ElevenLabs as the voice agent, emotions are driven by a **client tool call** named `set_emotion`. The ElevenLabs agent parses incoming `client_tool_call` WebSocket messages and, when the tool name is `set_emotion`, updates the avatar's expression accordingly.
|
|
87
87
|
|
|
88
|
-
|
|
88
|
+
> **Important:** Transcripts from the ElevenLabs agent are **not** automatically consumed. The `transcript` event is emitted, but it is up to you to subscribe to it if you need transcript data.
|
|
89
|
+
|
|
90
|
+
#### Setup
|
|
91
|
+
|
|
92
|
+
You must create a `set_emotion` tool in the [ElevenLabs API](https://elevenlabs.io/docs) for your agent. The tool should accept a single parameter:
|
|
93
|
+
|
|
94
|
+
| Parameter | Type | Description |
|
|
95
|
+
| --------- | -------- | -------------------------------------------------------- |
|
|
96
|
+
| `emotion` | `enum` | One of `neutral`, `angry`, `sad`, `happy`. |
|
|
97
|
+
|
|
98
|
+
Then instruct your agent (via its system prompt) to call `set_emotion` on each turn with the appropriate emotion. The client library handles the rest — it validates the emotion, emits an `emotion` event, and sends a `client_tool_result` back to ElevenLabs.
|
|
89
99
|
|
|
90
100
|
### Manual Emotion Control
|
|
91
101
|
|
|
92
102
|
For other agents or custom emotion logic, you can access the underlying session to set emotions manually:
|
|
93
103
|
|
|
94
104
|
```typescript
|
|
95
|
-
// Access the underlying SDK session for manual control
|
|
96
|
-
// (Available when using @keyframelabs/sdk directly)
|
|
97
105
|
import { createClient } from '@keyframelabs/sdk';
|
|
98
106
|
|
|
99
107
|
const session = createClient({ ... });
|
|
@@ -102,15 +110,15 @@ await session.setEmotion('happy');
|
|
|
102
110
|
|
|
103
111
|
### Agent Events
|
|
104
112
|
|
|
105
|
-
The `emotion` event is emitted when
|
|
113
|
+
The `emotion` event is emitted when the agent triggers a `set_emotion` tool call:
|
|
106
114
|
|
|
107
115
|
```typescript
|
|
108
116
|
agent.on('emotion', (emotion) => {
|
|
109
|
-
console.log('Emotion
|
|
117
|
+
console.log('Emotion changed:', emotion); // 'neutral' | 'angry' | 'sad' | 'happy'
|
|
110
118
|
});
|
|
111
119
|
```
|
|
112
120
|
|
|
113
|
-
Currently, only the ElevenLabs agent emits emotion events.
|
|
121
|
+
Currently, only the ElevenLabs agent emits emotion events via tool calls.
|
|
114
122
|
|
|
115
123
|
## API
|
|
116
124
|
|
|
@@ -125,7 +133,7 @@ Currently, only the ElevenLabs agent emits emotion events.
|
|
|
125
133
|
| `apiBaseUrl` | `string` | `'https://api.keyframelabs.com'` | Base URL for the Keyframe API. |
|
|
126
134
|
| `videoFit` | `'cover' \| 'contain'` | `'cover'` | Video scaling mode (`object-fit`). |
|
|
127
135
|
| `onStateChange` | `(status: EmbedStatus) => void` | — | Fired when connection status changes. |
|
|
128
|
-
| `onAgentStateChange` | `(state: AgentState) => void` | — | Fired when
|
|
136
|
+
| `onAgentStateChange` | `(state: AgentState) => void` | — | Fired when avatar playback state changes. Signaled by the GPU node via RPC, not the voice agent. |
|
|
129
137
|
| `onDisconnect` | `() => void` | — | Fired when the session disconnects. |
|
|
130
138
|
| `onError` | `(err: Error) => void` | — | Fired on fatal errors. |
|
|
131
139
|
|
|
@@ -142,7 +150,7 @@ Currently, only the ElevenLabs agent emits emotion events.
|
|
|
142
150
|
| Property | Type | Description |
|
|
143
151
|
| -------------- | ------------------ | -------------------------------------------------------------------------------------- |
|
|
144
152
|
| `status` | `EmbedStatus` | Current connection status: `'connecting' \| 'connected' \| 'disconnected' \| 'error'`. |
|
|
145
|
-
| `agentState` | `AgentState` |
|
|
153
|
+
| `agentState` | `AgentState` | Avatar playback state: `'listening' \| 'speaking'`. Set by the GPU node, not the voice agent. |
|
|
146
154
|
| `isMuted` | `boolean` | Whether the microphone is currently muted. |
|
|
147
155
|
| `videoElement` | `HTMLVideoElement` | The underlying video element used for rendering. |
|
|
148
156
|
| `audioElement` | `HTMLAudioElement` | The underlying audio element used for playback. |
|
package/dist/PersonaEmbed.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { AgentState } from '
|
|
1
|
+
import { AgentState } from '@keyframelabs/sdk';
|
|
2
2
|
import { EmbedStatus, VideoFit, BaseCallbacks } from './types';
|
|
3
3
|
export type { EmbedStatus, VideoFit } from './types';
|
|
4
4
|
export interface PersonaEmbedOptions extends BaseCallbacks {
|
package/dist/PersonaView.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { AgentState } from '
|
|
1
|
+
import { AgentState } from '@keyframelabs/sdk';
|
|
2
2
|
import { EmbedStatus, VideoFit, VoiceAgentDetails, SessionDetails, BaseCallbacks } from './types';
|
|
3
3
|
export interface PersonaViewOptions extends BaseCallbacks {
|
|
4
4
|
/** Target container element */
|
|
@@ -20,7 +20,6 @@ export declare class ElevenLabsAgent extends BaseAgent {
|
|
|
20
20
|
private sourceInputSampleRate;
|
|
21
21
|
private initialized;
|
|
22
22
|
private lastInterruptId;
|
|
23
|
-
private emotionEmittedForEventId;
|
|
24
23
|
connect(config: ElevenLabsConfig): Promise<void>;
|
|
25
24
|
protected handleParsedMessage(message: unknown): void;
|
|
26
25
|
private handleInitMetadata;
|
|
@@ -28,6 +27,7 @@ export declare class ElevenLabsAgent extends BaseAgent {
|
|
|
28
27
|
private handleAudio;
|
|
29
28
|
private handleUserTranscript;
|
|
30
29
|
private handleAgentResponse;
|
|
30
|
+
private handleClientToolCall;
|
|
31
31
|
private handleInterruption;
|
|
32
32
|
sendAudio(pcmData: Uint8Array): void;
|
|
33
33
|
/**
|
package/dist/index.d.ts
CHANGED
|
@@ -4,7 +4,8 @@ export { PersonaView } from './PersonaView';
|
|
|
4
4
|
export type { PersonaViewOptions } from './PersonaView';
|
|
5
5
|
export type { EmbedStatus, VideoFit, VoiceAgentDetails, SessionDetails, BaseCallbacks, } from './types';
|
|
6
6
|
export { createAgent, GeminiLiveAgent, ElevenLabsAgent, CartesiaAgent, BaseAgent, AGENT_REGISTRY, getAgentInfo, } from './agents';
|
|
7
|
-
export type { AgentType,
|
|
7
|
+
export type { AgentType, AgentConfig, AgentEventMap, Agent, AnyAgent, AgentTypeInfo, GeminiLiveConfig, ElevenLabsConfig, CartesiaConfig, } from './agents';
|
|
8
|
+
export type { AgentState } from '@keyframelabs/sdk';
|
|
8
9
|
export { floatTo16BitPCM, resamplePcm, base64ToBytes, bytesToBase64, SAMPLE_RATE, createEventEmitter, } from './agents';
|
|
9
10
|
export { ApiError as KeyframeApiError } from './ApiError';
|
|
10
11
|
export type { ApiErrorPayload as KeyframeApiErrorPayload } from './ApiError';
|
package/dist/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { createClient as
|
|
1
|
+
import { createClient as f } from "@keyframelabs/sdk";
|
|
2
2
|
const l = 24e3;
|
|
3
3
|
function g(i) {
|
|
4
4
|
const e = atob(i), t = new Uint8Array(e.length);
|
|
@@ -12,17 +12,17 @@ function m(i) {
|
|
|
12
12
|
e += String.fromCharCode(i[t]);
|
|
13
13
|
return btoa(e);
|
|
14
14
|
}
|
|
15
|
-
function
|
|
15
|
+
function h(i, e, t) {
|
|
16
16
|
if (e === t)
|
|
17
17
|
return i;
|
|
18
|
-
const s = new Int16Array(i.buffer, i.byteOffset, i.length / 2), n = e / t, a = Math.floor(s.length / n),
|
|
18
|
+
const s = new Int16Array(i.buffer, i.byteOffset, i.length / 2), n = e / t, a = Math.floor(s.length / n), d = new Int16Array(a);
|
|
19
19
|
for (let o = 0; o < a; o++) {
|
|
20
|
-
const
|
|
21
|
-
|
|
20
|
+
const S = o * n, p = Math.floor(S), b = Math.min(p + 1, s.length - 1), v = S - p;
|
|
21
|
+
d[o] = Math.round(
|
|
22
22
|
s[p] * (1 - v) + s[b] * v
|
|
23
23
|
);
|
|
24
24
|
}
|
|
25
|
-
return new Uint8Array(
|
|
25
|
+
return new Uint8Array(d.buffer);
|
|
26
26
|
}
|
|
27
27
|
function E() {
|
|
28
28
|
const i = /* @__PURE__ */ new Map();
|
|
@@ -41,7 +41,7 @@ function E() {
|
|
|
41
41
|
}
|
|
42
42
|
};
|
|
43
43
|
}
|
|
44
|
-
function
|
|
44
|
+
function w(i) {
|
|
45
45
|
const e = new Int16Array(i.length);
|
|
46
46
|
for (let t = 0; t < i.length; t++) {
|
|
47
47
|
const s = Math.max(-1, Math.min(1, i[t]));
|
|
@@ -49,12 +49,12 @@ function f(i) {
|
|
|
49
49
|
}
|
|
50
50
|
return new Uint8Array(e.buffer);
|
|
51
51
|
}
|
|
52
|
-
const
|
|
52
|
+
const C = 16e3;
|
|
53
53
|
class u {
|
|
54
54
|
ws = null;
|
|
55
55
|
_state = "idle";
|
|
56
56
|
events = E();
|
|
57
|
-
inputSampleRate =
|
|
57
|
+
inputSampleRate = C;
|
|
58
58
|
/** Current agent state */
|
|
59
59
|
get state() {
|
|
60
60
|
return this._state;
|
|
@@ -113,7 +113,7 @@ class u {
|
|
|
113
113
|
this.events.emit("closed", { code: e, reason: t });
|
|
114
114
|
}
|
|
115
115
|
}
|
|
116
|
-
const A = "gemini-2.5-flash-native-audio-preview-12-2025",
|
|
116
|
+
const A = "gemini-2.5-flash-native-audio-preview-12-2025", I = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent", k = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContentConstrained";
|
|
117
117
|
class R extends u {
|
|
118
118
|
agentName = "GeminiLive";
|
|
119
119
|
async connect(e) {
|
|
@@ -122,8 +122,8 @@ class R extends u {
|
|
|
122
122
|
if (!e.apiKey)
|
|
123
123
|
throw new Error("Gemini API key is required");
|
|
124
124
|
e.inputSampleRate && (this.inputSampleRate = e.inputSampleRate);
|
|
125
|
-
const t = e.model ?? A, n = (e.authType ?? "api_key") === "ephemeral_token" ? `${k}?access_token=${encodeURIComponent(e.apiKey)}` : `${
|
|
126
|
-
return new Promise((a,
|
|
125
|
+
const t = e.model ?? A, n = (e.authType ?? "api_key") === "ephemeral_token" ? `${k}?access_token=${encodeURIComponent(e.apiKey)}` : `${I}?key=${encodeURIComponent(e.apiKey)}`;
|
|
126
|
+
return new Promise((a, d) => {
|
|
127
127
|
this.ws = new WebSocket(n), this.ws.onopen = () => {
|
|
128
128
|
const o = {
|
|
129
129
|
setup: {
|
|
@@ -136,7 +136,7 @@ class R extends u {
|
|
|
136
136
|
};
|
|
137
137
|
this.ws.send(JSON.stringify(o)), this.setState("listening"), a();
|
|
138
138
|
}, this.ws.onerror = () => {
|
|
139
|
-
|
|
139
|
+
d(new Error("Failed to connect to Gemini Live"));
|
|
140
140
|
}, this.ws.onclose = (o) => {
|
|
141
141
|
this.ws = null, this.setState("idle"), this.emitClosed(o.code, o.reason);
|
|
142
142
|
}, this.ws.onmessage = (o) => {
|
|
@@ -189,8 +189,8 @@ class R extends u {
|
|
|
189
189
|
this.ws.send(JSON.stringify(t));
|
|
190
190
|
}
|
|
191
191
|
}
|
|
192
|
-
const M = ["neutral", "angry", "sad", "happy"],
|
|
193
|
-
class
|
|
192
|
+
const M = ["neutral", "angry", "sad", "happy"], T = "wss://api.elevenlabs.io/v1/convai/conversation";
|
|
193
|
+
class _ extends u {
|
|
194
194
|
agentName = "ElevenLabs";
|
|
195
195
|
outputSampleRate = 24e3;
|
|
196
196
|
// Default, updated from metadata
|
|
@@ -202,8 +202,6 @@ class S extends u {
|
|
|
202
202
|
// True after conversation_initiation_metadata received
|
|
203
203
|
lastInterruptId = 0;
|
|
204
204
|
// Track interruptions to filter stale audio
|
|
205
|
-
emotionEmittedForEventId = -1;
|
|
206
|
-
// Track which turn's emotion we've already emitted
|
|
207
205
|
async connect(e) {
|
|
208
206
|
if (this.ws)
|
|
209
207
|
throw new Error("Already connected");
|
|
@@ -211,7 +209,7 @@ class S extends u {
|
|
|
211
209
|
throw new Error("ElevenLabs agent ID or signed URL is required");
|
|
212
210
|
e.inputSampleRate && (this.sourceInputSampleRate = e.inputSampleRate);
|
|
213
211
|
let t;
|
|
214
|
-
return e.signedUrl ? t = e.signedUrl : (t = `${
|
|
212
|
+
return e.signedUrl ? t = e.signedUrl : (t = `${T}?agent_id=${e.agentId}`, e.apiKey && (t += `&xi-api-key=${e.apiKey}`)), new Promise((s, n) => {
|
|
215
213
|
this.ws = new WebSocket(t), this.ws.onopen = () => {
|
|
216
214
|
this.setState("listening"), s();
|
|
217
215
|
}, this.ws.onerror = () => {
|
|
@@ -244,6 +242,9 @@ class S extends u {
|
|
|
244
242
|
case "interruption":
|
|
245
243
|
this.handleInterruption(t);
|
|
246
244
|
break;
|
|
245
|
+
case "client_tool_call":
|
|
246
|
+
this.handleClientToolCall(t);
|
|
247
|
+
break;
|
|
247
248
|
case "agent_response_correction":
|
|
248
249
|
this.setState("listening");
|
|
249
250
|
break;
|
|
@@ -271,19 +272,11 @@ class S extends u {
|
|
|
271
272
|
}
|
|
272
273
|
handleAudio(e) {
|
|
273
274
|
const t = e.audio_event;
|
|
274
|
-
if (!t?.audio_base_64)
|
|
275
|
-
const s = t.event_id ?? 0;
|
|
276
|
-
if (s <= this.lastInterruptId)
|
|
275
|
+
if (!t?.audio_base_64 || (t.event_id ?? 0) <= this.lastInterruptId)
|
|
277
276
|
return;
|
|
278
|
-
|
|
279
|
-
const r = t.alignment.chars.join("").match(/\[(\w+)\]/);
|
|
280
|
-
if (r) {
|
|
281
|
-
const o = r[1].toLowerCase();
|
|
282
|
-
M.includes(o) && (this.events.emit("emotion", o), this.emotionEmittedForEventId = s);
|
|
283
|
-
}
|
|
284
|
-
}
|
|
277
|
+
this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
|
|
285
278
|
let n = g(t.audio_base_64);
|
|
286
|
-
this.outputSampleRate !== l && (n =
|
|
279
|
+
this.outputSampleRate !== l && (n = h(n, this.outputSampleRate, l)), this.events.emit("audio", n);
|
|
287
280
|
}
|
|
288
281
|
handleUserTranscript(e) {
|
|
289
282
|
const t = e.user_transcription_event;
|
|
@@ -301,6 +294,21 @@ class S extends u {
|
|
|
301
294
|
isFinal: !0
|
|
302
295
|
}));
|
|
303
296
|
}
|
|
297
|
+
handleClientToolCall(e) {
|
|
298
|
+
const t = e.client_tool_call;
|
|
299
|
+
if (t) {
|
|
300
|
+
if (t.tool_name === "set_emotion") {
|
|
301
|
+
const s = t.parameters?.emotion?.toLowerCase();
|
|
302
|
+
s && M.includes(s) && this.events.emit("emotion", s);
|
|
303
|
+
}
|
|
304
|
+
this.ws && this.ws.readyState === WebSocket.OPEN && this.ws.send(JSON.stringify({
|
|
305
|
+
type: "client_tool_result",
|
|
306
|
+
tool_call_id: t.tool_call_id,
|
|
307
|
+
result: "ok",
|
|
308
|
+
is_error: !1
|
|
309
|
+
}));
|
|
310
|
+
}
|
|
311
|
+
}
|
|
304
312
|
handleInterruption(e) {
|
|
305
313
|
const t = e.interruption_event;
|
|
306
314
|
t?.event_id && (this.lastInterruptId = t.event_id), this.events.emit("interrupted", void 0), this.setState("listening");
|
|
@@ -309,7 +317,7 @@ class S extends u {
|
|
|
309
317
|
if (!this.ws || this.ws.readyState !== WebSocket.OPEN || !this.initialized)
|
|
310
318
|
return;
|
|
311
319
|
let t = e;
|
|
312
|
-
this.sourceInputSampleRate !== this.expectedInputSampleRate && (t =
|
|
320
|
+
this.sourceInputSampleRate !== this.expectedInputSampleRate && (t = h(e, this.sourceInputSampleRate, this.expectedInputSampleRate)), this.ws.send(JSON.stringify({
|
|
313
321
|
user_audio_chunk: m(t)
|
|
314
322
|
}));
|
|
315
323
|
}
|
|
@@ -343,7 +351,7 @@ class S extends u {
|
|
|
343
351
|
this.initialized = !1, this.lastInterruptId = 0, super.close();
|
|
344
352
|
}
|
|
345
353
|
}
|
|
346
|
-
const P = "wss://api.cartesia.ai/agents/stream",
|
|
354
|
+
const P = "wss://api.cartesia.ai/agents/stream", x = "2025-04-16";
|
|
347
355
|
class O extends u {
|
|
348
356
|
agentName = "Cartesia";
|
|
349
357
|
// Audio configuration
|
|
@@ -363,7 +371,7 @@ class O extends u {
|
|
|
363
371
|
if (!e.apiKey)
|
|
364
372
|
throw new Error("Cartesia API Key is required");
|
|
365
373
|
e.inputSampleRate && (this.inputSampleRate = e.inputSampleRate), this.inputSampleRate === 16e3 ? this.cartesiaInputFormat = "pcm_16000" : this.inputSampleRate === 24e3 ? this.cartesiaInputFormat = "pcm_24000" : this.inputSampleRate === 44100 ? this.cartesiaInputFormat = "pcm_44100" : this.cartesiaInputFormat = "pcm_16000";
|
|
366
|
-
const t = `${P}/${e.agentId}?api_key=${e.apiKey}&cartesia_version=${
|
|
374
|
+
const t = `${P}/${e.agentId}?api_key=${e.apiKey}&cartesia_version=${x}`;
|
|
367
375
|
return new Promise((s, n) => {
|
|
368
376
|
this.ws = new WebSocket(t), this.ws.onopen = () => {
|
|
369
377
|
this.sendStartEvent(), this.startHeartbeat(), s();
|
|
@@ -426,7 +434,7 @@ class O extends u {
|
|
|
426
434
|
if (!e.media?.payload) return;
|
|
427
435
|
this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
|
|
428
436
|
let t = g(e.media.payload);
|
|
429
|
-
this.cartesiaOutputRate !== l && (t =
|
|
437
|
+
this.cartesiaOutputRate !== l && (t = h(t, this.cartesiaOutputRate, l)), this.events.emit("audio", t);
|
|
430
438
|
}
|
|
431
439
|
handleClear() {
|
|
432
440
|
this.events.emit("interrupted", void 0), this.setState("listening");
|
|
@@ -436,7 +444,7 @@ class O extends u {
|
|
|
436
444
|
return;
|
|
437
445
|
let t = e;
|
|
438
446
|
const s = parseInt(this.cartesiaInputFormat.split("_")[1]);
|
|
439
|
-
this.inputSampleRate !== s && (t =
|
|
447
|
+
this.inputSampleRate !== s && (t = h(e, this.inputSampleRate, s)), this.ws.send(JSON.stringify({
|
|
440
448
|
event: "media_input",
|
|
441
449
|
stream_id: this.streamId,
|
|
442
450
|
media: {
|
|
@@ -475,7 +483,7 @@ class N extends u {
|
|
|
475
483
|
*/
|
|
476
484
|
handleBinaryAudio(e) {
|
|
477
485
|
this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
|
|
478
|
-
const t = new Uint8Array(e), s = this.vapiSampleRate !== l ?
|
|
486
|
+
const t = new Uint8Array(e), s = this.vapiSampleRate !== l ? h(t, this.vapiSampleRate, l) : t;
|
|
479
487
|
this.events.emit("audio", s);
|
|
480
488
|
}
|
|
481
489
|
handleParsedMessage(e) {
|
|
@@ -515,7 +523,7 @@ class N extends u {
|
|
|
515
523
|
if (!this.ws || this.ws.readyState !== WebSocket.OPEN)
|
|
516
524
|
return;
|
|
517
525
|
let t = e;
|
|
518
|
-
this.inputSampleRate !== this.vapiSampleRate && (t =
|
|
526
|
+
this.inputSampleRate !== this.vapiSampleRate && (t = h(e, this.inputSampleRate, this.vapiSampleRate)), this.ws.send(t.buffer);
|
|
519
527
|
}
|
|
520
528
|
/**
|
|
521
529
|
* Send a control message to end the call.
|
|
@@ -538,7 +546,7 @@ function y(i) {
|
|
|
538
546
|
case "gemini":
|
|
539
547
|
return new R();
|
|
540
548
|
case "elevenlabs":
|
|
541
|
-
return new
|
|
549
|
+
return new _();
|
|
542
550
|
case "cartesia":
|
|
543
551
|
return new O();
|
|
544
552
|
case "vapi":
|
|
@@ -558,7 +566,7 @@ class D extends Error {
|
|
|
558
566
|
super(e.message), this.name = "ApiError", this.status = e.status, this.payload = e.payload, this.url = e.url;
|
|
559
567
|
}
|
|
560
568
|
}
|
|
561
|
-
const
|
|
569
|
+
const r = /* @__PURE__ */ new Set();
|
|
562
570
|
class K {
|
|
563
571
|
apiBaseUrl;
|
|
564
572
|
publishableKey;
|
|
@@ -574,7 +582,7 @@ class K {
|
|
|
574
582
|
stream = null;
|
|
575
583
|
abortController = null;
|
|
576
584
|
_status = "disconnected";
|
|
577
|
-
_agentState = "
|
|
585
|
+
_agentState = "listening";
|
|
578
586
|
_isMuted = !1;
|
|
579
587
|
mounted = !0;
|
|
580
588
|
constructor(e) {
|
|
@@ -603,31 +611,31 @@ class K {
|
|
|
603
611
|
}
|
|
604
612
|
/** Connect to the embed session */
|
|
605
613
|
async connect() {
|
|
606
|
-
if (
|
|
614
|
+
if (r.has(this.publishableKey)) {
|
|
607
615
|
console.log("[PersonaEmbed] Connection already in progress, skipping");
|
|
608
616
|
return;
|
|
609
617
|
}
|
|
610
|
-
|
|
618
|
+
r.add(this.publishableKey), this.mounted = !0, this.abortController = new AbortController(), this.setStatus("connecting");
|
|
611
619
|
try {
|
|
612
620
|
const e = await this.fetchSession(this.abortController.signal);
|
|
613
621
|
if (!this.mounted) {
|
|
614
|
-
|
|
622
|
+
r.delete(this.publishableKey);
|
|
615
623
|
return;
|
|
616
624
|
}
|
|
617
625
|
if (await this.initSession(e), await this.initMicrophone(), await this.connectAgent(e.voice_agent_details), !this.mounted) {
|
|
618
|
-
this.cleanup(),
|
|
626
|
+
this.cleanup(), r.delete(this.publishableKey);
|
|
619
627
|
return;
|
|
620
628
|
}
|
|
621
629
|
this.setStatus("connected");
|
|
622
630
|
} catch (e) {
|
|
623
|
-
if (
|
|
631
|
+
if (r.delete(this.publishableKey), e instanceof Error && e.name === "AbortError")
|
|
624
632
|
return;
|
|
625
633
|
console.error("[PersonaEmbed]", e), this.mounted && (this.setStatus("error"), this.callbacks.onError?.(e));
|
|
626
634
|
}
|
|
627
635
|
}
|
|
628
636
|
/** Disconnect and cleanup */
|
|
629
637
|
disconnect() {
|
|
630
|
-
this.mounted = !1, this.abortController?.abort(), this.abortController = null,
|
|
638
|
+
this.mounted = !1, this.abortController?.abort(), this.abortController = null, r.delete(this.publishableKey), this.cleanup(), this.setStatus("disconnected");
|
|
631
639
|
}
|
|
632
640
|
/** Toggle microphone mute */
|
|
633
641
|
toggleMute() {
|
|
@@ -666,7 +674,7 @@ class K {
|
|
|
666
674
|
return t.json();
|
|
667
675
|
}
|
|
668
676
|
async initSession(e) {
|
|
669
|
-
this.session =
|
|
677
|
+
this.session = f({
|
|
670
678
|
serverUrl: e.session_details.server_url,
|
|
671
679
|
participantToken: e.session_details.participant_token,
|
|
672
680
|
agentIdentity: e.session_details.agent_identity,
|
|
@@ -680,6 +688,9 @@ class K {
|
|
|
680
688
|
onStateChange: (t) => {
|
|
681
689
|
this.mounted && t === "disconnected" && (this.setStatus("disconnected"), this.callbacks.onDisconnect?.());
|
|
682
690
|
},
|
|
691
|
+
onAgentStateChange: (t) => {
|
|
692
|
+
this.mounted && this.setAgentState(t);
|
|
693
|
+
},
|
|
683
694
|
onError: (t) => {
|
|
684
695
|
this.mounted && this.callbacks.onError?.(t);
|
|
685
696
|
},
|
|
@@ -688,9 +699,9 @@ class K {
|
|
|
688
699
|
}
|
|
689
700
|
}), this.agent = y(e.voice_agent_details.type), this.agent.on("audio", (t) => this.session?.sendAudio(t)), this.agent.on("turnEnd", () => this.session?.endAudioTurn()), this.agent.on("interrupted", () => {
|
|
690
701
|
this.session?.endAudioTurn(), this.session?.interrupt();
|
|
691
|
-
}), this.agent.on("
|
|
702
|
+
}), this.agent.on("closed", () => {
|
|
692
703
|
this.mounted && this.callbacks.onDisconnect?.();
|
|
693
|
-
}), this.agent instanceof
|
|
704
|
+
}), this.agent instanceof _ && this.agent.on("emotion", (t) => this.session?.setEmotion(t)), await this.session.connect();
|
|
694
705
|
}
|
|
695
706
|
async initMicrophone() {
|
|
696
707
|
this.stream = await navigator.mediaDevices.getUserMedia({
|
|
@@ -699,7 +710,7 @@ class K {
|
|
|
699
710
|
const e = this.audioContext.createMediaStreamSource(this.stream);
|
|
700
711
|
this.processor = this.audioContext.createScriptProcessor(4096, 1, 1), this.processor.onaudioprocess = (t) => {
|
|
701
712
|
if (!this._isMuted) {
|
|
702
|
-
const s =
|
|
713
|
+
const s = w(t.inputBuffer.getChannelData(0));
|
|
703
714
|
this.agent?.sendAudio(s);
|
|
704
715
|
}
|
|
705
716
|
}, e.connect(this.processor), this.processor.connect(this.audioContext.destination);
|
|
@@ -728,7 +739,7 @@ class K {
|
|
|
728
739
|
this.stream?.getTracks().forEach((e) => e.stop()), this.processor?.disconnect(), this.audioContext?.close(), this.agent?.close(), this.session?.close(), this.stream = null, this.processor = null, this.audioContext = null, this.agent = null, this.session = null;
|
|
729
740
|
}
|
|
730
741
|
}
|
|
731
|
-
const
|
|
742
|
+
const c = /* @__PURE__ */ new Set();
|
|
732
743
|
class B {
|
|
733
744
|
voiceAgentDetails;
|
|
734
745
|
sessionDetails;
|
|
@@ -744,7 +755,7 @@ class B {
|
|
|
744
755
|
processor = null;
|
|
745
756
|
stream = null;
|
|
746
757
|
_status = "disconnected";
|
|
747
|
-
_agentState = "
|
|
758
|
+
_agentState = "listening";
|
|
748
759
|
_isMuted = !1;
|
|
749
760
|
mounted = !0;
|
|
750
761
|
constructor(e) {
|
|
@@ -773,24 +784,24 @@ class B {
|
|
|
773
784
|
}
|
|
774
785
|
/** Connect to the session */
|
|
775
786
|
async connect() {
|
|
776
|
-
if (
|
|
787
|
+
if (c.has(this.connectionId)) {
|
|
777
788
|
console.log("[PersonaView] Connection already in progress, skipping");
|
|
778
789
|
return;
|
|
779
790
|
}
|
|
780
|
-
|
|
791
|
+
c.add(this.connectionId), this.mounted = !0, this.setStatus("connecting");
|
|
781
792
|
try {
|
|
782
793
|
if (await this.initSession(), await this.initMicrophone(), await this.connectAgent(), !this.mounted) {
|
|
783
|
-
this.cleanup(),
|
|
794
|
+
this.cleanup(), c.delete(this.connectionId);
|
|
784
795
|
return;
|
|
785
796
|
}
|
|
786
797
|
this.setStatus("connected");
|
|
787
798
|
} catch (e) {
|
|
788
|
-
|
|
799
|
+
c.delete(this.connectionId), console.error("[PersonaView]", e), this.mounted && (this.setStatus("error"), this.callbacks.onError?.(e));
|
|
789
800
|
}
|
|
790
801
|
}
|
|
791
802
|
/** Disconnect and cleanup */
|
|
792
803
|
disconnect() {
|
|
793
|
-
this.mounted = !1,
|
|
804
|
+
this.mounted = !1, c.delete(this.connectionId), this.cleanup(), this.setStatus("disconnected");
|
|
794
805
|
}
|
|
795
806
|
/** Toggle microphone mute */
|
|
796
807
|
toggleMute() {
|
|
@@ -803,7 +814,7 @@ class B {
|
|
|
803
814
|
this._agentState !== e && (this._agentState = e, this.callbacks.onAgentStateChange?.(e));
|
|
804
815
|
}
|
|
805
816
|
async initSession() {
|
|
806
|
-
this.session =
|
|
817
|
+
this.session = f({
|
|
807
818
|
serverUrl: this.sessionDetails.server_url,
|
|
808
819
|
participantToken: this.sessionDetails.participant_token,
|
|
809
820
|
agentIdentity: this.sessionDetails.agent_identity,
|
|
@@ -817,6 +828,9 @@ class B {
|
|
|
817
828
|
onStateChange: (e) => {
|
|
818
829
|
this.mounted && e === "disconnected" && (this.setStatus("disconnected"), this.callbacks.onDisconnect?.());
|
|
819
830
|
},
|
|
831
|
+
onAgentStateChange: (e) => {
|
|
832
|
+
this.mounted && this.setAgentState(e);
|
|
833
|
+
},
|
|
820
834
|
onError: (e) => {
|
|
821
835
|
this.mounted && this.callbacks.onError?.(e);
|
|
822
836
|
},
|
|
@@ -825,9 +839,9 @@ class B {
|
|
|
825
839
|
}
|
|
826
840
|
}), this.agent = y(this.voiceAgentDetails.type), this.agent.on("audio", (e) => this.session?.sendAudio(e)), this.agent.on("turnEnd", () => this.session?.endAudioTurn()), this.agent.on("interrupted", () => {
|
|
827
841
|
this.session?.endAudioTurn(), this.session?.interrupt();
|
|
828
|
-
}), this.agent.on("
|
|
842
|
+
}), this.agent.on("closed", () => {
|
|
829
843
|
this.mounted && this.callbacks.onDisconnect?.();
|
|
830
|
-
}), this.agent instanceof
|
|
844
|
+
}), this.agent instanceof _ && this.agent.on("emotion", (e) => this.session?.setEmotion(e)), await this.session.connect();
|
|
831
845
|
}
|
|
832
846
|
async initMicrophone() {
|
|
833
847
|
this.stream = await navigator.mediaDevices.getUserMedia({
|
|
@@ -836,7 +850,7 @@ class B {
|
|
|
836
850
|
const e = this.audioContext.createMediaStreamSource(this.stream);
|
|
837
851
|
this.processor = this.audioContext.createScriptProcessor(4096, 1, 1), this.processor.onaudioprocess = (t) => {
|
|
838
852
|
if (!this._isMuted) {
|
|
839
|
-
const s =
|
|
853
|
+
const s = w(t.inputBuffer.getChannelData(0));
|
|
840
854
|
this.agent?.sendAudio(s);
|
|
841
855
|
}
|
|
842
856
|
}, e.connect(this.processor), this.processor.connect(this.audioContext.destination);
|
|
@@ -869,7 +883,7 @@ export {
|
|
|
869
883
|
U as AGENT_REGISTRY,
|
|
870
884
|
u as BaseAgent,
|
|
871
885
|
O as CartesiaAgent,
|
|
872
|
-
|
|
886
|
+
_ as ElevenLabsAgent,
|
|
873
887
|
R as GeminiLiveAgent,
|
|
874
888
|
D as KeyframeApiError,
|
|
875
889
|
K as PersonaEmbed,
|
|
@@ -879,7 +893,7 @@ export {
|
|
|
879
893
|
m as bytesToBase64,
|
|
880
894
|
y as createAgent,
|
|
881
895
|
E as createEventEmitter,
|
|
882
|
-
|
|
896
|
+
w as floatTo16BitPCM,
|
|
883
897
|
F as getAgentInfo,
|
|
884
|
-
|
|
898
|
+
h as resamplePcm
|
|
885
899
|
};
|
package/dist/types.d.ts
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import { AgentType
|
|
1
|
+
import { AgentType } from './agents';
|
|
2
|
+
import { AgentState } from '@keyframelabs/sdk';
|
|
2
3
|
export type EmbedStatus = 'connecting' | 'connected' | 'error' | 'disconnected';
|
|
3
4
|
export type VideoFit = 'cover' | 'contain';
|
|
4
5
|
export type VoiceAgentDetails = {
|
package/package.json
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
"publishConfig": {
|
|
5
5
|
"access": "public"
|
|
6
6
|
},
|
|
7
|
-
"version": "0.0
|
|
7
|
+
"version": "0.2.0",
|
|
8
8
|
"type": "module",
|
|
9
9
|
"main": "./dist/index.js",
|
|
10
10
|
"types": "./dist/index.d.ts",
|
|
@@ -19,7 +19,7 @@
|
|
|
19
19
|
],
|
|
20
20
|
"sideEffects": false,
|
|
21
21
|
"dependencies": {
|
|
22
|
-
"@keyframelabs/sdk": "0.1.
|
|
22
|
+
"@keyframelabs/sdk": "0.1.8"
|
|
23
23
|
},
|
|
24
24
|
"devDependencies": {
|
|
25
25
|
"@types/node": "^25.0.9",
|