@keyframelabs/elements 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +83 -4
- package/dist/ApiError.d.ts +20 -0
- package/dist/PersonaEmbed.d.ts +3 -11
- package/dist/PersonaView.d.ts +67 -0
- package/dist/index.d.ts +6 -1
- package/dist/index.js +791 -1
- package/dist/types.d.ts +20 -0
- package/package.json +15 -8
- package/dist/index.mjs +0 -636
package/dist/types.d.ts
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { AgentType, AgentState } from './agents';
|
|
2
|
+
export type EmbedStatus = 'connecting' | 'connected' | 'error' | 'disconnected';
|
|
3
|
+
export type VideoFit = 'cover' | 'contain';
|
|
4
|
+
export type VoiceAgentDetails = {
|
|
5
|
+
type: AgentType;
|
|
6
|
+
token?: string;
|
|
7
|
+
agent_id?: string;
|
|
8
|
+
signed_url?: string;
|
|
9
|
+
};
|
|
10
|
+
export type SessionDetails = {
|
|
11
|
+
server_url: string;
|
|
12
|
+
participant_token: string;
|
|
13
|
+
agent_identity: string;
|
|
14
|
+
};
|
|
15
|
+
export interface BaseCallbacks {
|
|
16
|
+
onDisconnect?: () => void;
|
|
17
|
+
onError?: (err: Error) => void;
|
|
18
|
+
onStateChange?: (status: EmbedStatus) => void;
|
|
19
|
+
onAgentStateChange?: (state: AgentState) => void;
|
|
20
|
+
}
|
package/package.json
CHANGED
|
@@ -1,17 +1,24 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@keyframelabs/elements",
|
|
3
|
-
"version": "0.0.2",
|
|
4
|
-
"main": "dist/index.js",
|
|
5
|
-
"module": "dist/index.mjs",
|
|
6
|
-
"types": "dist/index.d.ts",
|
|
7
|
-
"files": [
|
|
8
|
-
"dist"
|
|
9
|
-
],
|
|
10
3
|
"publishConfig": {
|
|
11
4
|
"access": "public"
|
|
12
5
|
},
|
|
6
|
+
"version": "0.0.4",
|
|
7
|
+
"type": "module",
|
|
8
|
+
"main": "./dist/index.js",
|
|
9
|
+
"types": "./dist/index.d.ts",
|
|
10
|
+
"exports": {
|
|
11
|
+
".": {
|
|
12
|
+
"types": "./dist/index.d.ts",
|
|
13
|
+
"import": "./dist/index.js"
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"files": [
|
|
17
|
+
"dist"
|
|
18
|
+
],
|
|
19
|
+
"sideEffects": false,
|
|
13
20
|
"dependencies": {
|
|
14
|
-
"@keyframelabs/sdk": "0.1.
|
|
21
|
+
"@keyframelabs/sdk": "0.1.3"
|
|
15
22
|
},
|
|
16
23
|
"devDependencies": {
|
|
17
24
|
"@types/node": "^25.0.9",
|
package/dist/index.mjs
DELETED
|
@@ -1,636 +0,0 @@
|
|
|
1
|
-
import { createClient as v } from "@keyframelabs/sdk";
|
|
2
|
-
const h = 24e3;
|
|
3
|
-
function u(i) {
|
|
4
|
-
const e = atob(i), t = new Uint8Array(e.length);
|
|
5
|
-
for (let s = 0; s < e.length; s++)
|
|
6
|
-
t[s] = e.charCodeAt(s);
|
|
7
|
-
return t;
|
|
8
|
-
}
|
|
9
|
-
function p(i) {
|
|
10
|
-
let e = "";
|
|
11
|
-
for (let t = 0; t < i.length; t++)
|
|
12
|
-
e += String.fromCharCode(i[t]);
|
|
13
|
-
return btoa(e);
|
|
14
|
-
}
|
|
15
|
-
function d(i, e, t) {
|
|
16
|
-
if (e === t)
|
|
17
|
-
return i;
|
|
18
|
-
const s = new Int16Array(i.buffer, i.byteOffset, i.length / 2), n = e / t, a = Math.floor(s.length / n), o = new Int16Array(a);
|
|
19
|
-
for (let r = 0; r < a; r++) {
|
|
20
|
-
const g = r * n, c = Math.floor(g), S = Math.min(c + 1, s.length - 1), _ = g - c;
|
|
21
|
-
o[r] = Math.round(
|
|
22
|
-
s[c] * (1 - _) + s[S] * _
|
|
23
|
-
);
|
|
24
|
-
}
|
|
25
|
-
return new Uint8Array(o.buffer);
|
|
26
|
-
}
|
|
27
|
-
function f() {
|
|
28
|
-
const i = /* @__PURE__ */ new Map();
|
|
29
|
-
return {
|
|
30
|
-
on(e, t) {
|
|
31
|
-
i.has(e) || i.set(e, /* @__PURE__ */ new Set()), i.get(e).add(t);
|
|
32
|
-
},
|
|
33
|
-
off(e, t) {
|
|
34
|
-
i.get(e)?.delete(t);
|
|
35
|
-
},
|
|
36
|
-
emit(e, t) {
|
|
37
|
-
i.get(e)?.forEach((s) => s(t));
|
|
38
|
-
},
|
|
39
|
-
removeAllListeners() {
|
|
40
|
-
i.clear();
|
|
41
|
-
}
|
|
42
|
-
};
|
|
43
|
-
}
|
|
44
|
-
function w(i) {
|
|
45
|
-
const e = new Int16Array(i.length);
|
|
46
|
-
for (let t = 0; t < i.length; t++) {
|
|
47
|
-
const s = Math.max(-1, Math.min(1, i[t]));
|
|
48
|
-
e[t] = s < 0 ? s * 32768 : s * 32767;
|
|
49
|
-
}
|
|
50
|
-
return new Uint8Array(e.buffer);
|
|
51
|
-
}
|
|
52
|
-
const y = 16e3;
|
|
53
|
-
class m {
|
|
54
|
-
ws = null;
|
|
55
|
-
_state = "idle";
|
|
56
|
-
events = f();
|
|
57
|
-
inputSampleRate = y;
|
|
58
|
-
/** Current agent state */
|
|
59
|
-
get state() {
|
|
60
|
-
return this._state;
|
|
61
|
-
}
|
|
62
|
-
/**
|
|
63
|
-
* Update state and emit stateChange event.
|
|
64
|
-
*/
|
|
65
|
-
setState(e) {
|
|
66
|
-
this._state !== e && (this._state = e, this.events.emit("stateChange", e));
|
|
67
|
-
}
|
|
68
|
-
/**
|
|
69
|
-
* Handle WebSocket message (string or Blob).
|
|
70
|
-
* Converts to string and parses JSON before calling handleParsedMessage.
|
|
71
|
-
*/
|
|
72
|
-
handleMessage(e) {
|
|
73
|
-
if (e instanceof Blob) {
|
|
74
|
-
e.text().then((t) => this.parseAndHandle(t));
|
|
75
|
-
return;
|
|
76
|
-
}
|
|
77
|
-
this.parseAndHandle(e);
|
|
78
|
-
}
|
|
79
|
-
/**
|
|
80
|
-
* Parse JSON and call handleParsedMessage if valid.
|
|
81
|
-
*/
|
|
82
|
-
parseAndHandle(e) {
|
|
83
|
-
try {
|
|
84
|
-
const t = JSON.parse(e);
|
|
85
|
-
this.handleParsedMessage(t);
|
|
86
|
-
} catch {
|
|
87
|
-
console.warn(`[${this.agentName}] Failed to parse message:`, e.slice(0, 200));
|
|
88
|
-
}
|
|
89
|
-
}
|
|
90
|
-
/**
|
|
91
|
-
* Close the WebSocket connection and clean up resources.
|
|
92
|
-
* Subclasses can override to add custom cleanup, but should call super.close().
|
|
93
|
-
*/
|
|
94
|
-
close() {
|
|
95
|
-
this.ws && (this.ws.close(), this.ws = null), this.events.removeAllListeners(), this.setState("idle");
|
|
96
|
-
}
|
|
97
|
-
/**
|
|
98
|
-
* Register an event handler.
|
|
99
|
-
*/
|
|
100
|
-
on(e, t) {
|
|
101
|
-
this.events.on(e, t);
|
|
102
|
-
}
|
|
103
|
-
/**
|
|
104
|
-
* Remove an event handler.
|
|
105
|
-
*/
|
|
106
|
-
off(e, t) {
|
|
107
|
-
this.events.off(e, t);
|
|
108
|
-
}
|
|
109
|
-
/**
|
|
110
|
-
* Helper to emit the closed event with code and reason.
|
|
111
|
-
*/
|
|
112
|
-
emitClosed(e, t) {
|
|
113
|
-
this.events.emit("closed", { code: e, reason: t });
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
const b = "gemini-2.5-flash-native-audio-preview-12-2025", I = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent", E = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContentConstrained";
|
|
117
|
-
class C extends m {
|
|
118
|
-
agentName = "GeminiLive";
|
|
119
|
-
async connect(e) {
|
|
120
|
-
if (this.ws)
|
|
121
|
-
throw new Error("Already connected");
|
|
122
|
-
if (!e.apiKey)
|
|
123
|
-
throw new Error("Gemini API key is required");
|
|
124
|
-
e.inputSampleRate && (this.inputSampleRate = e.inputSampleRate);
|
|
125
|
-
const t = e.model ?? b, n = (e.authType ?? "api_key") === "ephemeral_token" ? `${E}?access_token=${encodeURIComponent(e.apiKey)}` : `${I}?key=${encodeURIComponent(e.apiKey)}`;
|
|
126
|
-
return new Promise((a, o) => {
|
|
127
|
-
this.ws = new WebSocket(n), this.ws.onopen = () => {
|
|
128
|
-
const r = {
|
|
129
|
-
setup: {
|
|
130
|
-
model: `models/${t}`,
|
|
131
|
-
generationConfig: {
|
|
132
|
-
responseModalities: ["AUDIO"]
|
|
133
|
-
},
|
|
134
|
-
systemInstruction: e.systemPrompt ? { parts: [{ text: e.systemPrompt }] } : void 0
|
|
135
|
-
}
|
|
136
|
-
};
|
|
137
|
-
this.ws.send(JSON.stringify(r)), this.setState("listening"), a();
|
|
138
|
-
}, this.ws.onerror = () => {
|
|
139
|
-
o(new Error("Failed to connect to Gemini Live"));
|
|
140
|
-
}, this.ws.onclose = (r) => {
|
|
141
|
-
this.ws = null, this.setState("idle"), this.emitClosed(r.code, r.reason);
|
|
142
|
-
}, this.ws.onmessage = (r) => {
|
|
143
|
-
this.handleMessage(r.data);
|
|
144
|
-
};
|
|
145
|
-
});
|
|
146
|
-
}
|
|
147
|
-
handleParsedMessage(e) {
|
|
148
|
-
const s = e.serverContent;
|
|
149
|
-
if (s) {
|
|
150
|
-
if (s.interrupted) {
|
|
151
|
-
this.events.emit("interrupted", void 0), this.setState("listening");
|
|
152
|
-
return;
|
|
153
|
-
}
|
|
154
|
-
if (s.turnComplete) {
|
|
155
|
-
this.events.emit("turnEnd", void 0), this.setState("listening");
|
|
156
|
-
return;
|
|
157
|
-
}
|
|
158
|
-
if (s.modelTurn?.parts) {
|
|
159
|
-
this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
|
|
160
|
-
for (const n of s.modelTurn.parts) {
|
|
161
|
-
if (n.inlineData?.data) {
|
|
162
|
-
const a = u(n.inlineData.data);
|
|
163
|
-
this.events.emit("audio", a);
|
|
164
|
-
}
|
|
165
|
-
n.text && this.events.emit("transcript", {
|
|
166
|
-
role: "assistant",
|
|
167
|
-
text: n.text,
|
|
168
|
-
isFinal: !0
|
|
169
|
-
});
|
|
170
|
-
}
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
sendAudio(e) {
|
|
175
|
-
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
|
|
176
|
-
console.warn("[GeminiLive] Cannot send audio: not connected");
|
|
177
|
-
return;
|
|
178
|
-
}
|
|
179
|
-
const t = {
|
|
180
|
-
realtimeInput: {
|
|
181
|
-
mediaChunks: [
|
|
182
|
-
{
|
|
183
|
-
mimeType: `audio/pcm;rate=${this.inputSampleRate}`,
|
|
184
|
-
data: p(e)
|
|
185
|
-
}
|
|
186
|
-
]
|
|
187
|
-
}
|
|
188
|
-
};
|
|
189
|
-
this.ws.send(JSON.stringify(t));
|
|
190
|
-
}
|
|
191
|
-
}
|
|
192
|
-
const A = "wss://api.elevenlabs.io/v1/convai/conversation";
|
|
193
|
-
class k extends m {
|
|
194
|
-
agentName = "ElevenLabs";
|
|
195
|
-
outputSampleRate = 24e3;
|
|
196
|
-
// Default, updated from metadata
|
|
197
|
-
expectedInputSampleRate = 16e3;
|
|
198
|
-
// What ElevenLabs expects, updated from metadata
|
|
199
|
-
sourceInputSampleRate = 16e3;
|
|
200
|
-
// What caller sends via sendAudio, from config
|
|
201
|
-
initialized = !1;
|
|
202
|
-
// True after conversation_initiation_metadata received
|
|
203
|
-
lastInterruptId = 0;
|
|
204
|
-
// Track interruptions to filter stale audio
|
|
205
|
-
async connect(e) {
|
|
206
|
-
if (this.ws)
|
|
207
|
-
throw new Error("Already connected");
|
|
208
|
-
if (!e.agentId && !e.signedUrl)
|
|
209
|
-
throw new Error("ElevenLabs agent ID or signed URL is required");
|
|
210
|
-
e.inputSampleRate && (this.sourceInputSampleRate = e.inputSampleRate);
|
|
211
|
-
let t;
|
|
212
|
-
return e.signedUrl ? t = e.signedUrl : (t = `${A}?agent_id=${e.agentId}`, e.apiKey && (t += `&xi-api-key=${e.apiKey}`)), new Promise((s, n) => {
|
|
213
|
-
this.ws = new WebSocket(t), this.ws.onopen = () => {
|
|
214
|
-
this.setState("listening"), s();
|
|
215
|
-
}, this.ws.onerror = () => {
|
|
216
|
-
n(new Error("Failed to connect to ElevenLabs"));
|
|
217
|
-
}, this.ws.onclose = (a) => {
|
|
218
|
-
this.ws = null, this.setState("idle"), this.emitClosed(a.code, a.reason);
|
|
219
|
-
}, this.ws.onmessage = (a) => {
|
|
220
|
-
this.handleMessage(a.data);
|
|
221
|
-
};
|
|
222
|
-
});
|
|
223
|
-
}
|
|
224
|
-
handleParsedMessage(e) {
|
|
225
|
-
const t = e;
|
|
226
|
-
switch (t.type) {
|
|
227
|
-
case "conversation_initiation_metadata":
|
|
228
|
-
this.handleInitMetadata(t);
|
|
229
|
-
break;
|
|
230
|
-
case "ping":
|
|
231
|
-
this.handlePing(t);
|
|
232
|
-
break;
|
|
233
|
-
case "audio":
|
|
234
|
-
this.handleAudio(t);
|
|
235
|
-
break;
|
|
236
|
-
case "user_transcript":
|
|
237
|
-
this.handleUserTranscript(t);
|
|
238
|
-
break;
|
|
239
|
-
case "agent_response":
|
|
240
|
-
this.handleAgentResponse(t);
|
|
241
|
-
break;
|
|
242
|
-
case "interruption":
|
|
243
|
-
this.handleInterruption(t);
|
|
244
|
-
break;
|
|
245
|
-
case "agent_response_correction":
|
|
246
|
-
this.setState("listening");
|
|
247
|
-
break;
|
|
248
|
-
}
|
|
249
|
-
}
|
|
250
|
-
handleInitMetadata(e) {
|
|
251
|
-
const t = e.conversation_initiation_metadata_event;
|
|
252
|
-
if (t) {
|
|
253
|
-
if (t.agent_output_audio_format) {
|
|
254
|
-
const s = t.agent_output_audio_format.match(/pcm_(\d+)/);
|
|
255
|
-
s && (this.outputSampleRate = parseInt(s[1], 10));
|
|
256
|
-
}
|
|
257
|
-
if (t.user_input_audio_format) {
|
|
258
|
-
const s = t.user_input_audio_format.match(/pcm_(\d+)/);
|
|
259
|
-
s && (this.expectedInputSampleRate = parseInt(s[1], 10));
|
|
260
|
-
}
|
|
261
|
-
this.initialized = !0;
|
|
262
|
-
}
|
|
263
|
-
}
|
|
264
|
-
handlePing(e) {
|
|
265
|
-
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
|
|
266
|
-
const t = e.ping_event?.event_id;
|
|
267
|
-
this.ws.send(JSON.stringify({ type: "pong", event_id: t }));
|
|
268
|
-
}
|
|
269
|
-
}
|
|
270
|
-
handleAudio(e) {
|
|
271
|
-
const t = e.audio_event;
|
|
272
|
-
if (!t?.audio_base_64 || (t.event_id ?? 0) <= this.lastInterruptId)
|
|
273
|
-
return;
|
|
274
|
-
this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
|
|
275
|
-
let n = u(t.audio_base_64);
|
|
276
|
-
this.outputSampleRate !== h && (n = d(n, this.outputSampleRate, h));
|
|
277
|
-
const a = 4800;
|
|
278
|
-
if (n.length <= a)
|
|
279
|
-
this.events.emit("audio", n);
|
|
280
|
-
else
|
|
281
|
-
for (let o = 0; o < n.length; o += a) {
|
|
282
|
-
const r = n.slice(o, Math.min(o + a, n.length));
|
|
283
|
-
this.events.emit("audio", r);
|
|
284
|
-
}
|
|
285
|
-
}
|
|
286
|
-
handleUserTranscript(e) {
|
|
287
|
-
const t = e.user_transcription_event;
|
|
288
|
-
t?.user_transcript && this.events.emit("transcript", {
|
|
289
|
-
role: "user",
|
|
290
|
-
text: t.user_transcript,
|
|
291
|
-
isFinal: !0
|
|
292
|
-
});
|
|
293
|
-
}
|
|
294
|
-
handleAgentResponse(e) {
|
|
295
|
-
const t = e.agent_response_event;
|
|
296
|
-
t?.agent_response && this.events.emit("transcript", {
|
|
297
|
-
role: "assistant",
|
|
298
|
-
text: t.agent_response,
|
|
299
|
-
isFinal: !0
|
|
300
|
-
});
|
|
301
|
-
}
|
|
302
|
-
handleInterruption(e) {
|
|
303
|
-
const t = e.interruption_event;
|
|
304
|
-
t?.event_id && (this.lastInterruptId = t.event_id), this.events.emit("interrupted", void 0), this.setState("listening");
|
|
305
|
-
}
|
|
306
|
-
sendAudio(e) {
|
|
307
|
-
if (!this.ws || this.ws.readyState !== WebSocket.OPEN || !this.initialized)
|
|
308
|
-
return;
|
|
309
|
-
let t = e;
|
|
310
|
-
this.sourceInputSampleRate !== this.expectedInputSampleRate && (t = d(e, this.sourceInputSampleRate, this.expectedInputSampleRate)), this.ws.send(JSON.stringify({
|
|
311
|
-
user_audio_chunk: p(t)
|
|
312
|
-
}));
|
|
313
|
-
}
|
|
314
|
-
/**
|
|
315
|
-
* Send a text message as if the user spoke it.
|
|
316
|
-
*/
|
|
317
|
-
sendText(e) {
|
|
318
|
-
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
|
|
319
|
-
console.warn("[ElevenLabs] Cannot send text: not connected");
|
|
320
|
-
return;
|
|
321
|
-
}
|
|
322
|
-
this.ws.send(JSON.stringify({
|
|
323
|
-
type: "user_message",
|
|
324
|
-
text: e
|
|
325
|
-
}));
|
|
326
|
-
}
|
|
327
|
-
/**
|
|
328
|
-
* Send contextual information to the agent without interrupting.
|
|
329
|
-
*/
|
|
330
|
-
sendContext(e) {
|
|
331
|
-
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
|
|
332
|
-
console.warn("[ElevenLabs] Cannot send context: not connected");
|
|
333
|
-
return;
|
|
334
|
-
}
|
|
335
|
-
this.ws.send(JSON.stringify({
|
|
336
|
-
type: "contextual_update",
|
|
337
|
-
text: e
|
|
338
|
-
}));
|
|
339
|
-
}
|
|
340
|
-
close() {
|
|
341
|
-
this.initialized = !1, this.lastInterruptId = 0, super.close();
|
|
342
|
-
}
|
|
343
|
-
}
|
|
344
|
-
const R = "wss://api.cartesia.ai/agents/stream", M = "2025-04-16";
|
|
345
|
-
class P extends m {
|
|
346
|
-
agentName = "Cartesia";
|
|
347
|
-
// Audio configuration
|
|
348
|
-
cartesiaInputFormat = "pcm_16000";
|
|
349
|
-
// Format we tell Cartesia we are sending
|
|
350
|
-
cartesiaOutputRate = 16e3;
|
|
351
|
-
// Cartesia defaults to 16kHz for web
|
|
352
|
-
// Connection state
|
|
353
|
-
streamId = null;
|
|
354
|
-
isReady = !1;
|
|
355
|
-
pingInterval = null;
|
|
356
|
-
async connect(e) {
|
|
357
|
-
if (this.ws)
|
|
358
|
-
throw new Error("Already connected");
|
|
359
|
-
if (!e.agentId)
|
|
360
|
-
throw new Error("Cartesia Agent ID is required");
|
|
361
|
-
if (!e.apiKey)
|
|
362
|
-
throw new Error("Cartesia API Key is required");
|
|
363
|
-
e.inputSampleRate && (this.inputSampleRate = e.inputSampleRate), this.inputSampleRate === 16e3 ? this.cartesiaInputFormat = "pcm_16000" : this.inputSampleRate === 24e3 ? this.cartesiaInputFormat = "pcm_24000" : this.inputSampleRate === 44100 ? this.cartesiaInputFormat = "pcm_44100" : this.cartesiaInputFormat = "pcm_16000";
|
|
364
|
-
const t = `${R}/${e.agentId}?api_key=${e.apiKey}&cartesia_version=${M}`;
|
|
365
|
-
return new Promise((s, n) => {
|
|
366
|
-
this.ws = new WebSocket(t), this.ws.onopen = () => {
|
|
367
|
-
this.sendStartEvent(), this.startHeartbeat(), s();
|
|
368
|
-
}, this.ws.onerror = () => {
|
|
369
|
-
n(new Error("Failed to connect to Cartesia"));
|
|
370
|
-
}, this.ws.onclose = (a) => {
|
|
371
|
-
this.stopHeartbeat(), this.ws = null, this.isReady = !1, this.streamId = null, this.setState("idle"), this.emitClosed(a.code, a.reason);
|
|
372
|
-
}, this.ws.onmessage = (a) => {
|
|
373
|
-
this.handleMessage(a.data);
|
|
374
|
-
};
|
|
375
|
-
});
|
|
376
|
-
}
|
|
377
|
-
sendStartEvent() {
|
|
378
|
-
if (!this.ws) return;
|
|
379
|
-
const e = {
|
|
380
|
-
event: "start",
|
|
381
|
-
config: {
|
|
382
|
-
input_format: this.cartesiaInputFormat
|
|
383
|
-
}
|
|
384
|
-
};
|
|
385
|
-
this.ws.send(JSON.stringify(e));
|
|
386
|
-
}
|
|
387
|
-
/**
|
|
388
|
-
* Keep connection alive with periodic custom events.
|
|
389
|
-
* Cartesia requires activity every 30s.
|
|
390
|
-
*/
|
|
391
|
-
startHeartbeat() {
|
|
392
|
-
this.pingInterval = window.setInterval(() => {
|
|
393
|
-
this.ws?.readyState === WebSocket.OPEN && this.streamId && this.ws.send(JSON.stringify({
|
|
394
|
-
event: "custom",
|
|
395
|
-
stream_id: this.streamId,
|
|
396
|
-
metadata: { keepalive: !0 }
|
|
397
|
-
}));
|
|
398
|
-
}, 2e4);
|
|
399
|
-
}
|
|
400
|
-
stopHeartbeat() {
|
|
401
|
-
this.pingInterval && (clearInterval(this.pingInterval), this.pingInterval = null);
|
|
402
|
-
}
|
|
403
|
-
handleParsedMessage(e) {
|
|
404
|
-
const t = e;
|
|
405
|
-
switch (t.event) {
|
|
406
|
-
case "ack":
|
|
407
|
-
this.handleAck(t);
|
|
408
|
-
break;
|
|
409
|
-
case "media_output":
|
|
410
|
-
this.handleMediaOutput(t);
|
|
411
|
-
break;
|
|
412
|
-
case "clear":
|
|
413
|
-
this.handleClear();
|
|
414
|
-
break;
|
|
415
|
-
case "error":
|
|
416
|
-
console.error("[Cartesia] Server error:", t);
|
|
417
|
-
break;
|
|
418
|
-
}
|
|
419
|
-
}
|
|
420
|
-
handleAck(e) {
|
|
421
|
-
this.streamId = e.stream_id || null, this.isReady = !0, this.setState("listening");
|
|
422
|
-
}
|
|
423
|
-
handleMediaOutput(e) {
|
|
424
|
-
if (!e.media?.payload) return;
|
|
425
|
-
this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
|
|
426
|
-
let t = u(e.media.payload);
|
|
427
|
-
this.cartesiaOutputRate !== h && (t = d(t, this.cartesiaOutputRate, h)), this.events.emit("audio", t);
|
|
428
|
-
}
|
|
429
|
-
handleClear() {
|
|
430
|
-
this.events.emit("interrupted", void 0), this.setState("listening");
|
|
431
|
-
}
|
|
432
|
-
sendAudio(e) {
|
|
433
|
-
if (!this.ws || this.ws.readyState !== WebSocket.OPEN || !this.isReady || !this.streamId)
|
|
434
|
-
return;
|
|
435
|
-
let t = e;
|
|
436
|
-
const s = parseInt(this.cartesiaInputFormat.split("_")[1]);
|
|
437
|
-
this.inputSampleRate !== s && (t = d(e, this.inputSampleRate, s)), this.ws.send(JSON.stringify({
|
|
438
|
-
event: "media_input",
|
|
439
|
-
stream_id: this.streamId,
|
|
440
|
-
media: {
|
|
441
|
-
payload: p(t)
|
|
442
|
-
}
|
|
443
|
-
}));
|
|
444
|
-
}
|
|
445
|
-
close() {
|
|
446
|
-
this.stopHeartbeat(), this.isReady = !1, this.streamId = null, super.close();
|
|
447
|
-
}
|
|
448
|
-
}
|
|
449
|
-
const x = [
|
|
450
|
-
{ id: "gemini", name: "Gemini Live", description: "Google Gemini Live API" },
|
|
451
|
-
{ id: "elevenlabs", name: "ElevenLabs", description: "ElevenLabs Conversational AI" },
|
|
452
|
-
{ id: "cartesia", name: "Cartesia", description: "Cartesia Agents API" }
|
|
453
|
-
];
|
|
454
|
-
function N(i) {
|
|
455
|
-
switch (i) {
|
|
456
|
-
case "gemini":
|
|
457
|
-
return new C();
|
|
458
|
-
case "elevenlabs":
|
|
459
|
-
return new k();
|
|
460
|
-
case "cartesia":
|
|
461
|
-
return new P();
|
|
462
|
-
default:
|
|
463
|
-
throw new Error(`Unknown agent type: ${i}`);
|
|
464
|
-
}
|
|
465
|
-
}
|
|
466
|
-
function T(i) {
|
|
467
|
-
return x.find((e) => e.id === i);
|
|
468
|
-
}
|
|
469
|
-
const l = /* @__PURE__ */ new Set();
|
|
470
|
-
class L {
|
|
471
|
-
apiBaseUrl;
|
|
472
|
-
publishableKey;
|
|
473
|
-
callbacks;
|
|
474
|
-
// DOM
|
|
475
|
-
_video;
|
|
476
|
-
_audio;
|
|
477
|
-
// Session
|
|
478
|
-
session = null;
|
|
479
|
-
agent = null;
|
|
480
|
-
audioContext = null;
|
|
481
|
-
processor = null;
|
|
482
|
-
stream = null;
|
|
483
|
-
abortController = null;
|
|
484
|
-
_status = "disconnected";
|
|
485
|
-
_agentState = "idle";
|
|
486
|
-
_isMuted = !1;
|
|
487
|
-
mounted = !0;
|
|
488
|
-
constructor(e) {
|
|
489
|
-
this.apiBaseUrl = e.apiBaseUrl ?? "https://api.keyframelabs.com", this.publishableKey = e.publishableKey, this.callbacks = {
|
|
490
|
-
onDisconnect: e.onDisconnect,
|
|
491
|
-
onError: e.onError,
|
|
492
|
-
onStateChange: e.onStateChange,
|
|
493
|
-
onAgentStateChange: e.onAgentStateChange
|
|
494
|
-
}, this._video = document.createElement("video"), this._video.style.position = "absolute", this._video.style.inset = "0", this._video.style.width = "100%", this._video.style.height = "100%", this._video.style.objectFit = e.videoFit ?? "cover", this._video.autoplay = !0, this._video.playsInline = !0, this._video.muted = !0, e.container.style.position = "relative", e.container.style.backgroundColor = "#000", this._audio = document.createElement("audio"), this._audio.autoplay = !0, e.container.appendChild(this._video), e.container.appendChild(this._audio);
|
|
495
|
-
}
|
|
496
|
-
// Read-only state
|
|
497
|
-
get status() {
|
|
498
|
-
return this._status;
|
|
499
|
-
}
|
|
500
|
-
get agentState() {
|
|
501
|
-
return this._agentState;
|
|
502
|
-
}
|
|
503
|
-
get isMuted() {
|
|
504
|
-
return this._isMuted;
|
|
505
|
-
}
|
|
506
|
-
get videoElement() {
|
|
507
|
-
return this._video;
|
|
508
|
-
}
|
|
509
|
-
get audioElement() {
|
|
510
|
-
return this._audio;
|
|
511
|
-
}
|
|
512
|
-
/** Connect to the embed session */
|
|
513
|
-
async connect() {
|
|
514
|
-
if (l.has(this.publishableKey)) {
|
|
515
|
-
console.log("[PersonaEmbed] Connection already in progress, skipping");
|
|
516
|
-
return;
|
|
517
|
-
}
|
|
518
|
-
l.add(this.publishableKey), this.mounted = !0, this.abortController = new AbortController(), this.setStatus("connecting");
|
|
519
|
-
try {
|
|
520
|
-
const e = await this.fetchSession(this.abortController.signal);
|
|
521
|
-
if (!this.mounted) {
|
|
522
|
-
l.delete(this.publishableKey);
|
|
523
|
-
return;
|
|
524
|
-
}
|
|
525
|
-
if (await this.initSession(e), await this.initMicrophone(), await this.connectAgent(e.voice_agent_details), !this.mounted) {
|
|
526
|
-
this.cleanup(), l.delete(this.publishableKey);
|
|
527
|
-
return;
|
|
528
|
-
}
|
|
529
|
-
this.setStatus("connected");
|
|
530
|
-
} catch (e) {
|
|
531
|
-
if (l.delete(this.publishableKey), e instanceof Error && e.name === "AbortError")
|
|
532
|
-
return;
|
|
533
|
-
console.error("[PersonaEmbed]", e), this.mounted && (this.setStatus("error"), this.callbacks.onError?.(e));
|
|
534
|
-
}
|
|
535
|
-
}
|
|
536
|
-
/** Disconnect and cleanup */
|
|
537
|
-
disconnect() {
|
|
538
|
-
this.mounted = !1, this.abortController?.abort(), this.abortController = null, l.delete(this.publishableKey), this.cleanup(), this.setStatus("disconnected");
|
|
539
|
-
}
|
|
540
|
-
/** Toggle microphone mute */
|
|
541
|
-
toggleMute() {
|
|
542
|
-
this._isMuted = !this._isMuted;
|
|
543
|
-
}
|
|
544
|
-
setStatus(e) {
|
|
545
|
-
this._status !== e && (this._status = e, this.callbacks.onStateChange?.(e));
|
|
546
|
-
}
|
|
547
|
-
setAgentState(e) {
|
|
548
|
-
this._agentState !== e && (this._agentState = e, this.callbacks.onAgentStateChange?.(e));
|
|
549
|
-
}
|
|
550
|
-
async fetchSession(e) {
|
|
551
|
-
const t = await fetch(`${this.apiBaseUrl}/v1/embed/create_session`, {
|
|
552
|
-
method: "POST",
|
|
553
|
-
headers: { "Content-Type": "application/json" },
|
|
554
|
-
body: JSON.stringify({ publishable_key: this.publishableKey }),
|
|
555
|
-
signal: e
|
|
556
|
-
});
|
|
557
|
-
if (!t.ok) {
|
|
558
|
-
const s = await t.json().catch(() => null);
|
|
559
|
-
throw new Error(`create_session failed: ${t.status} ${JSON.stringify(s)}`);
|
|
560
|
-
}
|
|
561
|
-
return t.json();
|
|
562
|
-
}
|
|
563
|
-
async initSession(e) {
|
|
564
|
-
this.session = v({
|
|
565
|
-
serverUrl: e.session_details.server_url,
|
|
566
|
-
participantToken: e.session_details.participant_token,
|
|
567
|
-
agentIdentity: e.session_details.agent_identity,
|
|
568
|
-
onVideoTrack: (t) => {
|
|
569
|
-
console.log("[PersonaEmbed] Setting video track", t.readyState, t.enabled), this._video.srcObject = new MediaStream([t]), this._video.play().catch((s) => console.warn("[PersonaEmbed] Video play failed:", s));
|
|
570
|
-
},
|
|
571
|
-
onAudioTrack: (t) => {
|
|
572
|
-
this._audio.srcObject = new MediaStream([t]), this._audio.play().catch(() => {
|
|
573
|
-
});
|
|
574
|
-
},
|
|
575
|
-
onStateChange: (t) => {
|
|
576
|
-
this.mounted && t === "disconnected" && (this.setStatus("disconnected"), this.callbacks.onDisconnect?.());
|
|
577
|
-
},
|
|
578
|
-
onError: (t) => {
|
|
579
|
-
this.mounted && this.callbacks.onError?.(t);
|
|
580
|
-
},
|
|
581
|
-
onClose: () => {
|
|
582
|
-
this.mounted && this.callbacks.onDisconnect?.();
|
|
583
|
-
}
|
|
584
|
-
}), this.agent = N(e.voice_agent_details.type), this.agent.on("audio", (t) => this.session?.sendAudio(t)), this.agent.on("interrupted", () => this.session?.interrupt()), this.agent.on("stateChange", (t) => this.setAgentState(t)), this.agent.on("closed", () => {
|
|
585
|
-
this.mounted && this.callbacks.onDisconnect?.();
|
|
586
|
-
}), await this.session.connect();
|
|
587
|
-
}
|
|
588
|
-
async initMicrophone() {
|
|
589
|
-
this.stream = await navigator.mediaDevices.getUserMedia({
|
|
590
|
-
audio: { sampleRate: 16e3, echoCancellation: !0, noiseSuppression: !0 }
|
|
591
|
-
}), this.audioContext = new AudioContext({ sampleRate: 16e3 });
|
|
592
|
-
const e = this.audioContext.createMediaStreamSource(this.stream);
|
|
593
|
-
this.processor = this.audioContext.createScriptProcessor(4096, 1, 1), this.processor.onaudioprocess = (t) => {
|
|
594
|
-
if (!this._isMuted) {
|
|
595
|
-
const s = w(t.inputBuffer.getChannelData(0));
|
|
596
|
-
this.agent?.sendAudio(s);
|
|
597
|
-
}
|
|
598
|
-
}, e.connect(this.processor), this.processor.connect(this.audioContext.destination);
|
|
599
|
-
}
|
|
600
|
-
async connectAgent(e) {
|
|
601
|
-
if (!this.agent) return;
|
|
602
|
-
const t = { inputSampleRate: 16e3 };
|
|
603
|
-
e.type === "gemini" ? await this.agent.connect({
|
|
604
|
-
...t,
|
|
605
|
-
apiKey: e.token,
|
|
606
|
-
authType: "ephemeral_token"
|
|
607
|
-
}) : e.type === "elevenlabs" ? await this.agent.connect({
|
|
608
|
-
...t,
|
|
609
|
-
agentId: e.agent_id,
|
|
610
|
-
signedUrl: e.signed_url
|
|
611
|
-
}) : e.type === "cartesia" && await this.agent.connect({
|
|
612
|
-
...t,
|
|
613
|
-
agentId: e.agent_id,
|
|
614
|
-
apiKey: e.token
|
|
615
|
-
});
|
|
616
|
-
}
|
|
617
|
-
cleanup() {
|
|
618
|
-
this.stream?.getTracks().forEach((e) => e.stop()), this.processor?.disconnect(), this.audioContext?.close(), this.agent?.close(), this.session?.close(), this.stream = null, this.processor = null, this.audioContext = null, this.agent = null, this.session = null;
|
|
619
|
-
}
|
|
620
|
-
}
|
|
621
|
-
export {
|
|
622
|
-
x as AGENT_REGISTRY,
|
|
623
|
-
m as BaseAgent,
|
|
624
|
-
P as CartesiaAgent,
|
|
625
|
-
k as ElevenLabsAgent,
|
|
626
|
-
C as GeminiLiveAgent,
|
|
627
|
-
L as PersonaEmbed,
|
|
628
|
-
h as SAMPLE_RATE,
|
|
629
|
-
u as base64ToBytes,
|
|
630
|
-
p as bytesToBase64,
|
|
631
|
-
N as createAgent,
|
|
632
|
-
f as createEventEmitter,
|
|
633
|
-
w as floatTo16BitPCM,
|
|
634
|
-
T as getAgentInfo,
|
|
635
|
-
d as resamplePcm
|
|
636
|
-
};
|