@keyframelabs/elements 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +83 -4
- package/dist/ApiError.d.ts +20 -0
- package/dist/PersonaEmbed.d.ts +3 -11
- package/dist/PersonaView.d.ts +67 -0
- package/dist/index.d.ts +6 -1
- package/dist/index.js +791 -1
- package/dist/types.d.ts +20 -0
- package/package.json +15 -8
- package/dist/index.mjs +0 -636
package/dist/index.js
CHANGED
|
@@ -1 +1,791 @@
|
|
|
1
|
-
"use strict";Object.defineProperty(exports,Symbol.toStringTag,{value:"Module"});const A=require("@keyframelabs/sdk"),h=24e3;function c(i){const e=atob(i),t=new Uint8Array(e.length);for(let s=0;s<e.length;s++)t[s]=e.charCodeAt(s);return t}function u(i){let e="";for(let t=0;t<i.length;t++)e+=String.fromCharCode(i[t]);return btoa(e)}function d(i,e,t){if(e===t)return i;const s=new Int16Array(i.buffer,i.byteOffset,i.length/2),n=e/t,a=Math.floor(s.length/n),o=new Int16Array(a);for(let r=0;r<a;r++){const g=r*n,m=Math.floor(g),E=Math.min(m+1,s.length-1),_=g-m;o[r]=Math.round(s[m]*(1-_)+s[E]*_)}return new Uint8Array(o.buffer)}function S(){const i=new Map;return{on(e,t){i.has(e)||i.set(e,new Set),i.get(e).add(t)},off(e,t){i.get(e)?.delete(t)},emit(e,t){i.get(e)?.forEach(s=>s(t))},removeAllListeners(){i.clear()}}}function v(i){const e=new Int16Array(i.length);for(let t=0;t<i.length;t++){const s=Math.max(-1,Math.min(1,i[t]));e[t]=s<0?s*32768:s*32767}return new Uint8Array(e.buffer)}const C=16e3;class p{ws=null;_state="idle";events=S();inputSampleRate=C;get state(){return this._state}setState(e){this._state!==e&&(this._state=e,this.events.emit("stateChange",e))}handleMessage(e){if(e instanceof Blob){e.text().then(t=>this.parseAndHandle(t));return}this.parseAndHandle(e)}parseAndHandle(e){try{const t=JSON.parse(e);this.handleParsedMessage(t)}catch{console.warn(`[${this.agentName}] Failed to parse message:`,e.slice(0,200))}}close(){this.ws&&(this.ws.close(),this.ws=null),this.events.removeAllListeners(),this.setState("idle")}on(e,t){this.events.on(e,t)}off(e,t){this.events.off(e,t)}emitClosed(e,t){this.events.emit("closed",{code:e,reason:t})}}const k="gemini-2.5-flash-native-audio-preview-12-2025",R="wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent",M="wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContentConstrained";class f extends p{agentName="GeminiLive";async connect(e){if(this.ws)throw new Error("Already connected");if(!e.apiKey)throw new Error("Gemini API key is required");e.inputSampleRate&&(this.inputSampleRate=e.inputSampleRate);const t=e.model??k,n=(e.authType??"api_key")==="ephemeral_token"?`${M}?access_token=${encodeURIComponent(e.apiKey)}`:`${R}?key=${encodeURIComponent(e.apiKey)}`;return new Promise((a,o)=>{this.ws=new WebSocket(n),this.ws.onopen=()=>{const r={setup:{model:`models/${t}`,generationConfig:{responseModalities:["AUDIO"]},systemInstruction:e.systemPrompt?{parts:[{text:e.systemPrompt}]}:void 0}};this.ws.send(JSON.stringify(r)),this.setState("listening"),a()},this.ws.onerror=()=>{o(new Error("Failed to connect to Gemini Live"))},this.ws.onclose=r=>{this.ws=null,this.setState("idle"),this.emitClosed(r.code,r.reason)},this.ws.onmessage=r=>{this.handleMessage(r.data)}})}handleParsedMessage(e){const s=e.serverContent;if(s){if(s.interrupted){this.events.emit("interrupted",void 0),this.setState("listening");return}if(s.turnComplete){this.events.emit("turnEnd",void 0),this.setState("listening");return}if(s.modelTurn?.parts){this._state!=="speaking"&&(this.events.emit("turnStart",void 0),this.setState("speaking"));for(const n of s.modelTurn.parts){if(n.inlineData?.data){const a=c(n.inlineData.data);this.events.emit("audio",a)}n.text&&this.events.emit("transcript",{role:"assistant",text:n.text,isFinal:!0})}}}}sendAudio(e){if(!this.ws||this.ws.readyState!==WebSocket.OPEN){console.warn("[GeminiLive] Cannot send audio: not connected");return}const t={realtimeInput:{mediaChunks:[{mimeType:`audio/pcm;rate=${this.inputSampleRate}`,data:u(e)}]}};this.ws.send(JSON.stringify(t))}}const P="wss://api.elevenlabs.io/v1/convai/conversation";class w extends p{agentName="ElevenLabs";outputSampleRate=24e3;expectedInputSampleRate=16e3;sourceInputSampleRate=16e3;initialized=!1;lastInterruptId=0;async connect(e){if(this.ws)throw new Error("Already connected");if(!e.agentId&&!e.signedUrl)throw new Error("ElevenLabs agent ID or signed URL is required");e.inputSampleRate&&(this.sourceInputSampleRate=e.inputSampleRate);let t;return e.signedUrl?t=e.signedUrl:(t=`${P}?agent_id=${e.agentId}`,e.apiKey&&(t+=`&xi-api-key=${e.apiKey}`)),new Promise((s,n)=>{this.ws=new WebSocket(t),this.ws.onopen=()=>{this.setState("listening"),s()},this.ws.onerror=()=>{n(new Error("Failed to connect to ElevenLabs"))},this.ws.onclose=a=>{this.ws=null,this.setState("idle"),this.emitClosed(a.code,a.reason)},this.ws.onmessage=a=>{this.handleMessage(a.data)}})}handleParsedMessage(e){const t=e;switch(t.type){case"conversation_initiation_metadata":this.handleInitMetadata(t);break;case"ping":this.handlePing(t);break;case"audio":this.handleAudio(t);break;case"user_transcript":this.handleUserTranscript(t);break;case"agent_response":this.handleAgentResponse(t);break;case"interruption":this.handleInterruption(t);break;case"agent_response_correction":this.setState("listening");break}}handleInitMetadata(e){const t=e.conversation_initiation_metadata_event;if(t){if(t.agent_output_audio_format){const s=t.agent_output_audio_format.match(/pcm_(\d+)/);s&&(this.outputSampleRate=parseInt(s[1],10))}if(t.user_input_audio_format){const s=t.user_input_audio_format.match(/pcm_(\d+)/);s&&(this.expectedInputSampleRate=parseInt(s[1],10))}this.initialized=!0}}handlePing(e){if(this.ws&&this.ws.readyState===WebSocket.OPEN){const t=e.ping_event?.event_id;this.ws.send(JSON.stringify({type:"pong",event_id:t}))}}handleAudio(e){const t=e.audio_event;if(!t?.audio_base_64||(t.event_id??0)<=this.lastInterruptId)return;this._state!=="speaking"&&(this.events.emit("turnStart",void 0),this.setState("speaking"));let n=c(t.audio_base_64);this.outputSampleRate!==h&&(n=d(n,this.outputSampleRate,h));const a=4800;if(n.length<=a)this.events.emit("audio",n);else for(let o=0;o<n.length;o+=a){const r=n.slice(o,Math.min(o+a,n.length));this.events.emit("audio",r)}}handleUserTranscript(e){const t=e.user_transcription_event;t?.user_transcript&&this.events.emit("transcript",{role:"user",text:t.user_transcript,isFinal:!0})}handleAgentResponse(e){const t=e.agent_response_event;t?.agent_response&&this.events.emit("transcript",{role:"assistant",text:t.agent_response,isFinal:!0})}handleInterruption(e){const t=e.interruption_event;t?.event_id&&(this.lastInterruptId=t.event_id),this.events.emit("interrupted",void 0),this.setState("listening")}sendAudio(e){if(!this.ws||this.ws.readyState!==WebSocket.OPEN||!this.initialized)return;let t=e;this.sourceInputSampleRate!==this.expectedInputSampleRate&&(t=d(e,this.sourceInputSampleRate,this.expectedInputSampleRate)),this.ws.send(JSON.stringify({user_audio_chunk:u(t)}))}sendText(e){if(!this.ws||this.ws.readyState!==WebSocket.OPEN){console.warn("[ElevenLabs] Cannot send text: not connected");return}this.ws.send(JSON.stringify({type:"user_message",text:e}))}sendContext(e){if(!this.ws||this.ws.readyState!==WebSocket.OPEN){console.warn("[ElevenLabs] Cannot send context: not connected");return}this.ws.send(JSON.stringify({type:"contextual_update",text:e}))}close(){this.initialized=!1,this.lastInterruptId=0,super.close()}}const T="wss://api.cartesia.ai/agents/stream",N="2025-04-16";class y extends p{agentName="Cartesia";cartesiaInputFormat="pcm_16000";cartesiaOutputRate=16e3;streamId=null;isReady=!1;pingInterval=null;async connect(e){if(this.ws)throw new Error("Already connected");if(!e.agentId)throw new Error("Cartesia Agent ID is required");if(!e.apiKey)throw new Error("Cartesia API Key is required");e.inputSampleRate&&(this.inputSampleRate=e.inputSampleRate),this.inputSampleRate===16e3?this.cartesiaInputFormat="pcm_16000":this.inputSampleRate===24e3?this.cartesiaInputFormat="pcm_24000":this.inputSampleRate===44100?this.cartesiaInputFormat="pcm_44100":this.cartesiaInputFormat="pcm_16000";const t=`${T}/${e.agentId}?api_key=${e.apiKey}&cartesia_version=${N}`;return new Promise((s,n)=>{this.ws=new WebSocket(t),this.ws.onopen=()=>{this.sendStartEvent(),this.startHeartbeat(),s()},this.ws.onerror=()=>{n(new Error("Failed to connect to Cartesia"))},this.ws.onclose=a=>{this.stopHeartbeat(),this.ws=null,this.isReady=!1,this.streamId=null,this.setState("idle"),this.emitClosed(a.code,a.reason)},this.ws.onmessage=a=>{this.handleMessage(a.data)}})}sendStartEvent(){if(!this.ws)return;const e={event:"start",config:{input_format:this.cartesiaInputFormat}};this.ws.send(JSON.stringify(e))}startHeartbeat(){this.pingInterval=window.setInterval(()=>{this.ws?.readyState===WebSocket.OPEN&&this.streamId&&this.ws.send(JSON.stringify({event:"custom",stream_id:this.streamId,metadata:{keepalive:!0}}))},2e4)}stopHeartbeat(){this.pingInterval&&(clearInterval(this.pingInterval),this.pingInterval=null)}handleParsedMessage(e){const t=e;switch(t.event){case"ack":this.handleAck(t);break;case"media_output":this.handleMediaOutput(t);break;case"clear":this.handleClear();break;case"error":console.error("[Cartesia] Server error:",t);break}}handleAck(e){this.streamId=e.stream_id||null,this.isReady=!0,this.setState("listening")}handleMediaOutput(e){if(!e.media?.payload)return;this._state!=="speaking"&&(this.events.emit("turnStart",void 0),this.setState("speaking"));let t=c(e.media.payload);this.cartesiaOutputRate!==h&&(t=d(t,this.cartesiaOutputRate,h)),this.events.emit("audio",t)}handleClear(){this.events.emit("interrupted",void 0),this.setState("listening")}sendAudio(e){if(!this.ws||this.ws.readyState!==WebSocket.OPEN||!this.isReady||!this.streamId)return;let t=e;const s=parseInt(this.cartesiaInputFormat.split("_")[1]);this.inputSampleRate!==s&&(t=d(e,this.inputSampleRate,s)),this.ws.send(JSON.stringify({event:"media_input",stream_id:this.streamId,media:{payload:u(t)}}))}close(){this.stopHeartbeat(),this.isReady=!1,this.streamId=null,super.close()}}const b=[{id:"gemini",name:"Gemini Live",description:"Google Gemini Live API"},{id:"elevenlabs",name:"ElevenLabs",description:"ElevenLabs Conversational AI"},{id:"cartesia",name:"Cartesia",description:"Cartesia Agents API"}];function I(i){switch(i){case"gemini":return new f;case"elevenlabs":return new w;case"cartesia":return new y;default:throw new Error(`Unknown agent type: ${i}`)}}function O(i){return b.find(e=>e.id===i)}const l=new Set;class L{apiBaseUrl;publishableKey;callbacks;_video;_audio;session=null;agent=null;audioContext=null;processor=null;stream=null;abortController=null;_status="disconnected";_agentState="idle";_isMuted=!1;mounted=!0;constructor(e){this.apiBaseUrl=e.apiBaseUrl??"https://api.keyframelabs.com",this.publishableKey=e.publishableKey,this.callbacks={onDisconnect:e.onDisconnect,onError:e.onError,onStateChange:e.onStateChange,onAgentStateChange:e.onAgentStateChange},this._video=document.createElement("video"),this._video.style.position="absolute",this._video.style.inset="0",this._video.style.width="100%",this._video.style.height="100%",this._video.style.objectFit=e.videoFit??"cover",this._video.autoplay=!0,this._video.playsInline=!0,this._video.muted=!0,e.container.style.position="relative",e.container.style.backgroundColor="#000",this._audio=document.createElement("audio"),this._audio.autoplay=!0,e.container.appendChild(this._video),e.container.appendChild(this._audio)}get status(){return this._status}get agentState(){return this._agentState}get isMuted(){return this._isMuted}get videoElement(){return this._video}get audioElement(){return this._audio}async connect(){if(l.has(this.publishableKey)){console.log("[PersonaEmbed] Connection already in progress, skipping");return}l.add(this.publishableKey),this.mounted=!0,this.abortController=new AbortController,this.setStatus("connecting");try{const e=await this.fetchSession(this.abortController.signal);if(!this.mounted){l.delete(this.publishableKey);return}if(await this.initSession(e),await this.initMicrophone(),await this.connectAgent(e.voice_agent_details),!this.mounted){this.cleanup(),l.delete(this.publishableKey);return}this.setStatus("connected")}catch(e){if(l.delete(this.publishableKey),e instanceof Error&&e.name==="AbortError")return;console.error("[PersonaEmbed]",e),this.mounted&&(this.setStatus("error"),this.callbacks.onError?.(e))}}disconnect(){this.mounted=!1,this.abortController?.abort(),this.abortController=null,l.delete(this.publishableKey),this.cleanup(),this.setStatus("disconnected")}toggleMute(){this._isMuted=!this._isMuted}setStatus(e){this._status!==e&&(this._status=e,this.callbacks.onStateChange?.(e))}setAgentState(e){this._agentState!==e&&(this._agentState=e,this.callbacks.onAgentStateChange?.(e))}async fetchSession(e){const t=await fetch(`${this.apiBaseUrl}/v1/embed/create_session`,{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({publishable_key:this.publishableKey}),signal:e});if(!t.ok){const s=await t.json().catch(()=>null);throw new Error(`create_session failed: ${t.status} ${JSON.stringify(s)}`)}return t.json()}async initSession(e){this.session=A.createClient({serverUrl:e.session_details.server_url,participantToken:e.session_details.participant_token,agentIdentity:e.session_details.agent_identity,onVideoTrack:t=>{console.log("[PersonaEmbed] Setting video track",t.readyState,t.enabled),this._video.srcObject=new MediaStream([t]),this._video.play().catch(s=>console.warn("[PersonaEmbed] Video play failed:",s))},onAudioTrack:t=>{this._audio.srcObject=new MediaStream([t]),this._audio.play().catch(()=>{})},onStateChange:t=>{this.mounted&&t==="disconnected"&&(this.setStatus("disconnected"),this.callbacks.onDisconnect?.())},onError:t=>{this.mounted&&this.callbacks.onError?.(t)},onClose:()=>{this.mounted&&this.callbacks.onDisconnect?.()}}),this.agent=I(e.voice_agent_details.type),this.agent.on("audio",t=>this.session?.sendAudio(t)),this.agent.on("interrupted",()=>this.session?.interrupt()),this.agent.on("stateChange",t=>this.setAgentState(t)),this.agent.on("closed",()=>{this.mounted&&this.callbacks.onDisconnect?.()}),await this.session.connect()}async initMicrophone(){this.stream=await navigator.mediaDevices.getUserMedia({audio:{sampleRate:16e3,echoCancellation:!0,noiseSuppression:!0}}),this.audioContext=new AudioContext({sampleRate:16e3});const e=this.audioContext.createMediaStreamSource(this.stream);this.processor=this.audioContext.createScriptProcessor(4096,1,1),this.processor.onaudioprocess=t=>{if(!this._isMuted){const s=v(t.inputBuffer.getChannelData(0));this.agent?.sendAudio(s)}},e.connect(this.processor),this.processor.connect(this.audioContext.destination)}async connectAgent(e){if(!this.agent)return;const t={inputSampleRate:16e3};e.type==="gemini"?await this.agent.connect({...t,apiKey:e.token,authType:"ephemeral_token"}):e.type==="elevenlabs"?await this.agent.connect({...t,agentId:e.agent_id,signedUrl:e.signed_url}):e.type==="cartesia"&&await this.agent.connect({...t,agentId:e.agent_id,apiKey:e.token})}cleanup(){this.stream?.getTracks().forEach(e=>e.stop()),this.processor?.disconnect(),this.audioContext?.close(),this.agent?.close(),this.session?.close(),this.stream=null,this.processor=null,this.audioContext=null,this.agent=null,this.session=null}}exports.AGENT_REGISTRY=b;exports.BaseAgent=p;exports.CartesiaAgent=y;exports.ElevenLabsAgent=w;exports.GeminiLiveAgent=f;exports.PersonaEmbed=L;exports.SAMPLE_RATE=h;exports.base64ToBytes=c;exports.bytesToBase64=u;exports.createAgent=I;exports.createEventEmitter=S;exports.floatTo16BitPCM=v;exports.getAgentInfo=O;exports.resamplePcm=d;
|
|
1
|
+
import { createClient as v } from "@keyframelabs/sdk";
|
|
2
|
+
const c = 24e3;
|
|
3
|
+
function p(i) {
|
|
4
|
+
const e = atob(i), t = new Uint8Array(e.length);
|
|
5
|
+
for (let s = 0; s < e.length; s++)
|
|
6
|
+
t[s] = e.charCodeAt(s);
|
|
7
|
+
return t;
|
|
8
|
+
}
|
|
9
|
+
function g(i) {
|
|
10
|
+
let e = "";
|
|
11
|
+
for (let t = 0; t < i.length; t++)
|
|
12
|
+
e += String.fromCharCode(i[t]);
|
|
13
|
+
return btoa(e);
|
|
14
|
+
}
|
|
15
|
+
function d(i, e, t) {
|
|
16
|
+
if (e === t)
|
|
17
|
+
return i;
|
|
18
|
+
const s = new Int16Array(i.buffer, i.byteOffset, i.length / 2), n = e / t, a = Math.floor(s.length / n), r = new Int16Array(a);
|
|
19
|
+
for (let o = 0; o < a; o++) {
|
|
20
|
+
const _ = o * n, u = Math.floor(_), w = Math.min(u + 1, s.length - 1), S = _ - u;
|
|
21
|
+
r[o] = Math.round(
|
|
22
|
+
s[u] * (1 - S) + s[w] * S
|
|
23
|
+
);
|
|
24
|
+
}
|
|
25
|
+
return new Uint8Array(r.buffer);
|
|
26
|
+
}
|
|
27
|
+
function b() {
|
|
28
|
+
const i = /* @__PURE__ */ new Map();
|
|
29
|
+
return {
|
|
30
|
+
on(e, t) {
|
|
31
|
+
i.has(e) || i.set(e, /* @__PURE__ */ new Set()), i.get(e).add(t);
|
|
32
|
+
},
|
|
33
|
+
off(e, t) {
|
|
34
|
+
i.get(e)?.delete(t);
|
|
35
|
+
},
|
|
36
|
+
emit(e, t) {
|
|
37
|
+
i.get(e)?.forEach((s) => s(t));
|
|
38
|
+
},
|
|
39
|
+
removeAllListeners() {
|
|
40
|
+
i.clear();
|
|
41
|
+
}
|
|
42
|
+
};
|
|
43
|
+
}
|
|
44
|
+
function y(i) {
|
|
45
|
+
const e = new Int16Array(i.length);
|
|
46
|
+
for (let t = 0; t < i.length; t++) {
|
|
47
|
+
const s = Math.max(-1, Math.min(1, i[t]));
|
|
48
|
+
e[t] = s < 0 ? s * 32768 : s * 32767;
|
|
49
|
+
}
|
|
50
|
+
return new Uint8Array(e.buffer);
|
|
51
|
+
}
|
|
52
|
+
const I = 16e3;
|
|
53
|
+
class m {
|
|
54
|
+
ws = null;
|
|
55
|
+
_state = "idle";
|
|
56
|
+
events = b();
|
|
57
|
+
inputSampleRate = I;
|
|
58
|
+
/** Current agent state */
|
|
59
|
+
get state() {
|
|
60
|
+
return this._state;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Update state and emit stateChange event.
|
|
64
|
+
*/
|
|
65
|
+
setState(e) {
|
|
66
|
+
this._state !== e && (this._state = e, this.events.emit("stateChange", e));
|
|
67
|
+
}
|
|
68
|
+
/**
|
|
69
|
+
* Handle WebSocket message (string or Blob).
|
|
70
|
+
* Converts to string and parses JSON before calling handleParsedMessage.
|
|
71
|
+
*/
|
|
72
|
+
handleMessage(e) {
|
|
73
|
+
if (e instanceof Blob) {
|
|
74
|
+
e.text().then((t) => this.parseAndHandle(t));
|
|
75
|
+
return;
|
|
76
|
+
}
|
|
77
|
+
this.parseAndHandle(e);
|
|
78
|
+
}
|
|
79
|
+
/**
|
|
80
|
+
* Parse JSON and call handleParsedMessage if valid.
|
|
81
|
+
*/
|
|
82
|
+
parseAndHandle(e) {
|
|
83
|
+
try {
|
|
84
|
+
const t = JSON.parse(e);
|
|
85
|
+
this.handleParsedMessage(t);
|
|
86
|
+
} catch {
|
|
87
|
+
console.warn(`[${this.agentName}] Failed to parse message:`, e.slice(0, 200));
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Close the WebSocket connection and clean up resources.
|
|
92
|
+
* Subclasses can override to add custom cleanup, but should call super.close().
|
|
93
|
+
*/
|
|
94
|
+
close() {
|
|
95
|
+
this.ws && (this.ws.close(), this.ws = null), this.events.removeAllListeners(), this.setState("idle");
|
|
96
|
+
}
|
|
97
|
+
/**
|
|
98
|
+
* Register an event handler.
|
|
99
|
+
*/
|
|
100
|
+
on(e, t) {
|
|
101
|
+
this.events.on(e, t);
|
|
102
|
+
}
|
|
103
|
+
/**
|
|
104
|
+
* Remove an event handler.
|
|
105
|
+
*/
|
|
106
|
+
off(e, t) {
|
|
107
|
+
this.events.off(e, t);
|
|
108
|
+
}
|
|
109
|
+
/**
|
|
110
|
+
* Helper to emit the closed event with code and reason.
|
|
111
|
+
*/
|
|
112
|
+
emitClosed(e, t) {
|
|
113
|
+
this.events.emit("closed", { code: e, reason: t });
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
const C = "gemini-2.5-flash-native-audio-preview-12-2025", E = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1beta.GenerativeService.BidiGenerateContent", A = "wss://generativelanguage.googleapis.com/ws/google.ai.generativelanguage.v1alpha.GenerativeService.BidiGenerateContentConstrained";
|
|
117
|
+
class k extends m {
|
|
118
|
+
agentName = "GeminiLive";
|
|
119
|
+
async connect(e) {
|
|
120
|
+
if (this.ws)
|
|
121
|
+
throw new Error("Already connected");
|
|
122
|
+
if (!e.apiKey)
|
|
123
|
+
throw new Error("Gemini API key is required");
|
|
124
|
+
e.inputSampleRate && (this.inputSampleRate = e.inputSampleRate);
|
|
125
|
+
const t = e.model ?? C, n = (e.authType ?? "api_key") === "ephemeral_token" ? `${A}?access_token=${encodeURIComponent(e.apiKey)}` : `${E}?key=${encodeURIComponent(e.apiKey)}`;
|
|
126
|
+
return new Promise((a, r) => {
|
|
127
|
+
this.ws = new WebSocket(n), this.ws.onopen = () => {
|
|
128
|
+
const o = {
|
|
129
|
+
setup: {
|
|
130
|
+
model: `models/${t}`,
|
|
131
|
+
generationConfig: {
|
|
132
|
+
responseModalities: ["AUDIO"]
|
|
133
|
+
},
|
|
134
|
+
systemInstruction: e.systemPrompt ? { parts: [{ text: e.systemPrompt }] } : void 0
|
|
135
|
+
}
|
|
136
|
+
};
|
|
137
|
+
this.ws.send(JSON.stringify(o)), this.setState("listening"), a();
|
|
138
|
+
}, this.ws.onerror = () => {
|
|
139
|
+
r(new Error("Failed to connect to Gemini Live"));
|
|
140
|
+
}, this.ws.onclose = (o) => {
|
|
141
|
+
this.ws = null, this.setState("idle"), this.emitClosed(o.code, o.reason);
|
|
142
|
+
}, this.ws.onmessage = (o) => {
|
|
143
|
+
this.handleMessage(o.data);
|
|
144
|
+
};
|
|
145
|
+
});
|
|
146
|
+
}
|
|
147
|
+
handleParsedMessage(e) {
|
|
148
|
+
const s = e.serverContent;
|
|
149
|
+
if (s) {
|
|
150
|
+
if (s.interrupted) {
|
|
151
|
+
this.events.emit("interrupted", void 0), this.setState("listening");
|
|
152
|
+
return;
|
|
153
|
+
}
|
|
154
|
+
if (s.turnComplete) {
|
|
155
|
+
this.events.emit("turnEnd", void 0), this.setState("listening");
|
|
156
|
+
return;
|
|
157
|
+
}
|
|
158
|
+
if (s.modelTurn?.parts) {
|
|
159
|
+
this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
|
|
160
|
+
for (const n of s.modelTurn.parts) {
|
|
161
|
+
if (n.inlineData?.data) {
|
|
162
|
+
const a = p(n.inlineData.data);
|
|
163
|
+
this.events.emit("audio", a);
|
|
164
|
+
}
|
|
165
|
+
n.text && this.events.emit("transcript", {
|
|
166
|
+
role: "assistant",
|
|
167
|
+
text: n.text,
|
|
168
|
+
isFinal: !0
|
|
169
|
+
});
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
sendAudio(e) {
|
|
175
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
|
|
176
|
+
console.warn("[GeminiLive] Cannot send audio: not connected");
|
|
177
|
+
return;
|
|
178
|
+
}
|
|
179
|
+
const t = {
|
|
180
|
+
realtimeInput: {
|
|
181
|
+
mediaChunks: [
|
|
182
|
+
{
|
|
183
|
+
mimeType: `audio/pcm;rate=${this.inputSampleRate}`,
|
|
184
|
+
data: g(e)
|
|
185
|
+
}
|
|
186
|
+
]
|
|
187
|
+
}
|
|
188
|
+
};
|
|
189
|
+
this.ws.send(JSON.stringify(t));
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
const R = "wss://api.elevenlabs.io/v1/convai/conversation";
|
|
193
|
+
class M extends m {
|
|
194
|
+
agentName = "ElevenLabs";
|
|
195
|
+
outputSampleRate = 24e3;
|
|
196
|
+
// Default, updated from metadata
|
|
197
|
+
expectedInputSampleRate = 16e3;
|
|
198
|
+
// What ElevenLabs expects, updated from metadata
|
|
199
|
+
sourceInputSampleRate = 16e3;
|
|
200
|
+
// What caller sends via sendAudio, from config
|
|
201
|
+
initialized = !1;
|
|
202
|
+
// True after conversation_initiation_metadata received
|
|
203
|
+
lastInterruptId = 0;
|
|
204
|
+
// Track interruptions to filter stale audio
|
|
205
|
+
async connect(e) {
|
|
206
|
+
if (this.ws)
|
|
207
|
+
throw new Error("Already connected");
|
|
208
|
+
if (!e.agentId && !e.signedUrl)
|
|
209
|
+
throw new Error("ElevenLabs agent ID or signed URL is required");
|
|
210
|
+
e.inputSampleRate && (this.sourceInputSampleRate = e.inputSampleRate);
|
|
211
|
+
let t;
|
|
212
|
+
return e.signedUrl ? t = e.signedUrl : (t = `${R}?agent_id=${e.agentId}`, e.apiKey && (t += `&xi-api-key=${e.apiKey}`)), new Promise((s, n) => {
|
|
213
|
+
this.ws = new WebSocket(t), this.ws.onopen = () => {
|
|
214
|
+
this.setState("listening"), s();
|
|
215
|
+
}, this.ws.onerror = () => {
|
|
216
|
+
n(new Error("Failed to connect to ElevenLabs"));
|
|
217
|
+
}, this.ws.onclose = (a) => {
|
|
218
|
+
this.ws = null, this.setState("idle"), this.emitClosed(a.code, a.reason);
|
|
219
|
+
}, this.ws.onmessage = (a) => {
|
|
220
|
+
this.handleMessage(a.data);
|
|
221
|
+
};
|
|
222
|
+
});
|
|
223
|
+
}
|
|
224
|
+
handleParsedMessage(e) {
|
|
225
|
+
const t = e;
|
|
226
|
+
switch (t.type) {
|
|
227
|
+
case "conversation_initiation_metadata":
|
|
228
|
+
this.handleInitMetadata(t);
|
|
229
|
+
break;
|
|
230
|
+
case "ping":
|
|
231
|
+
this.handlePing(t);
|
|
232
|
+
break;
|
|
233
|
+
case "audio":
|
|
234
|
+
this.handleAudio(t);
|
|
235
|
+
break;
|
|
236
|
+
case "user_transcript":
|
|
237
|
+
this.handleUserTranscript(t);
|
|
238
|
+
break;
|
|
239
|
+
case "agent_response":
|
|
240
|
+
this.handleAgentResponse(t);
|
|
241
|
+
break;
|
|
242
|
+
case "interruption":
|
|
243
|
+
this.handleInterruption(t);
|
|
244
|
+
break;
|
|
245
|
+
case "agent_response_correction":
|
|
246
|
+
this.setState("listening");
|
|
247
|
+
break;
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
handleInitMetadata(e) {
|
|
251
|
+
const t = e.conversation_initiation_metadata_event;
|
|
252
|
+
if (t) {
|
|
253
|
+
if (t.agent_output_audio_format) {
|
|
254
|
+
const s = t.agent_output_audio_format.match(/pcm_(\d+)/);
|
|
255
|
+
s && (this.outputSampleRate = parseInt(s[1], 10));
|
|
256
|
+
}
|
|
257
|
+
if (t.user_input_audio_format) {
|
|
258
|
+
const s = t.user_input_audio_format.match(/pcm_(\d+)/);
|
|
259
|
+
s && (this.expectedInputSampleRate = parseInt(s[1], 10));
|
|
260
|
+
}
|
|
261
|
+
this.initialized = !0;
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
handlePing(e) {
|
|
265
|
+
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
|
|
266
|
+
const t = e.ping_event?.event_id;
|
|
267
|
+
this.ws.send(JSON.stringify({ type: "pong", event_id: t }));
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
handleAudio(e) {
|
|
271
|
+
const t = e.audio_event;
|
|
272
|
+
if (!t?.audio_base_64 || (t.event_id ?? 0) <= this.lastInterruptId)
|
|
273
|
+
return;
|
|
274
|
+
this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
|
|
275
|
+
let n = p(t.audio_base_64);
|
|
276
|
+
this.outputSampleRate !== c && (n = d(n, this.outputSampleRate, c));
|
|
277
|
+
const a = 4800;
|
|
278
|
+
if (n.length <= a)
|
|
279
|
+
this.events.emit("audio", n);
|
|
280
|
+
else
|
|
281
|
+
for (let r = 0; r < n.length; r += a) {
|
|
282
|
+
const o = n.slice(r, Math.min(r + a, n.length));
|
|
283
|
+
this.events.emit("audio", o);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
handleUserTranscript(e) {
|
|
287
|
+
const t = e.user_transcription_event;
|
|
288
|
+
t?.user_transcript && this.events.emit("transcript", {
|
|
289
|
+
role: "user",
|
|
290
|
+
text: t.user_transcript,
|
|
291
|
+
isFinal: !0
|
|
292
|
+
});
|
|
293
|
+
}
|
|
294
|
+
handleAgentResponse(e) {
|
|
295
|
+
const t = e.agent_response_event;
|
|
296
|
+
t?.agent_response && this.events.emit("transcript", {
|
|
297
|
+
role: "assistant",
|
|
298
|
+
text: t.agent_response,
|
|
299
|
+
isFinal: !0
|
|
300
|
+
});
|
|
301
|
+
}
|
|
302
|
+
handleInterruption(e) {
|
|
303
|
+
const t = e.interruption_event;
|
|
304
|
+
t?.event_id && (this.lastInterruptId = t.event_id), this.events.emit("interrupted", void 0), this.setState("listening");
|
|
305
|
+
}
|
|
306
|
+
sendAudio(e) {
|
|
307
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN || !this.initialized)
|
|
308
|
+
return;
|
|
309
|
+
let t = e;
|
|
310
|
+
this.sourceInputSampleRate !== this.expectedInputSampleRate && (t = d(e, this.sourceInputSampleRate, this.expectedInputSampleRate)), this.ws.send(JSON.stringify({
|
|
311
|
+
user_audio_chunk: g(t)
|
|
312
|
+
}));
|
|
313
|
+
}
|
|
314
|
+
/**
|
|
315
|
+
* Send a text message as if the user spoke it.
|
|
316
|
+
*/
|
|
317
|
+
sendText(e) {
|
|
318
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
|
|
319
|
+
console.warn("[ElevenLabs] Cannot send text: not connected");
|
|
320
|
+
return;
|
|
321
|
+
}
|
|
322
|
+
this.ws.send(JSON.stringify({
|
|
323
|
+
type: "user_message",
|
|
324
|
+
text: e
|
|
325
|
+
}));
|
|
326
|
+
}
|
|
327
|
+
/**
|
|
328
|
+
* Send contextual information to the agent without interrupting.
|
|
329
|
+
*/
|
|
330
|
+
sendContext(e) {
|
|
331
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN) {
|
|
332
|
+
console.warn("[ElevenLabs] Cannot send context: not connected");
|
|
333
|
+
return;
|
|
334
|
+
}
|
|
335
|
+
this.ws.send(JSON.stringify({
|
|
336
|
+
type: "contextual_update",
|
|
337
|
+
text: e
|
|
338
|
+
}));
|
|
339
|
+
}
|
|
340
|
+
close() {
|
|
341
|
+
this.initialized = !1, this.lastInterruptId = 0, super.close();
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
const x = "wss://api.cartesia.ai/agents/stream", P = "2025-04-16";
|
|
345
|
+
class T extends m {
|
|
346
|
+
agentName = "Cartesia";
|
|
347
|
+
// Audio configuration
|
|
348
|
+
cartesiaInputFormat = "pcm_16000";
|
|
349
|
+
// Format we tell Cartesia we are sending
|
|
350
|
+
cartesiaOutputRate = 16e3;
|
|
351
|
+
// Cartesia defaults to 16kHz for web
|
|
352
|
+
// Connection state
|
|
353
|
+
streamId = null;
|
|
354
|
+
isReady = !1;
|
|
355
|
+
pingInterval = null;
|
|
356
|
+
async connect(e) {
|
|
357
|
+
if (this.ws)
|
|
358
|
+
throw new Error("Already connected");
|
|
359
|
+
if (!e.agentId)
|
|
360
|
+
throw new Error("Cartesia Agent ID is required");
|
|
361
|
+
if (!e.apiKey)
|
|
362
|
+
throw new Error("Cartesia API Key is required");
|
|
363
|
+
e.inputSampleRate && (this.inputSampleRate = e.inputSampleRate), this.inputSampleRate === 16e3 ? this.cartesiaInputFormat = "pcm_16000" : this.inputSampleRate === 24e3 ? this.cartesiaInputFormat = "pcm_24000" : this.inputSampleRate === 44100 ? this.cartesiaInputFormat = "pcm_44100" : this.cartesiaInputFormat = "pcm_16000";
|
|
364
|
+
const t = `${x}/${e.agentId}?api_key=${e.apiKey}&cartesia_version=${P}`;
|
|
365
|
+
return new Promise((s, n) => {
|
|
366
|
+
this.ws = new WebSocket(t), this.ws.onopen = () => {
|
|
367
|
+
this.sendStartEvent(), this.startHeartbeat(), s();
|
|
368
|
+
}, this.ws.onerror = () => {
|
|
369
|
+
n(new Error("Failed to connect to Cartesia"));
|
|
370
|
+
}, this.ws.onclose = (a) => {
|
|
371
|
+
this.stopHeartbeat(), this.ws = null, this.isReady = !1, this.streamId = null, this.setState("idle"), this.emitClosed(a.code, a.reason);
|
|
372
|
+
}, this.ws.onmessage = (a) => {
|
|
373
|
+
this.handleMessage(a.data);
|
|
374
|
+
};
|
|
375
|
+
});
|
|
376
|
+
}
|
|
377
|
+
sendStartEvent() {
|
|
378
|
+
if (!this.ws) return;
|
|
379
|
+
const e = {
|
|
380
|
+
event: "start",
|
|
381
|
+
config: {
|
|
382
|
+
input_format: this.cartesiaInputFormat
|
|
383
|
+
}
|
|
384
|
+
};
|
|
385
|
+
this.ws.send(JSON.stringify(e));
|
|
386
|
+
}
|
|
387
|
+
/**
|
|
388
|
+
* Keep connection alive with periodic custom events.
|
|
389
|
+
* Cartesia requires activity every 30s.
|
|
390
|
+
*/
|
|
391
|
+
startHeartbeat() {
|
|
392
|
+
this.pingInterval = window.setInterval(() => {
|
|
393
|
+
this.ws?.readyState === WebSocket.OPEN && this.streamId && this.ws.send(JSON.stringify({
|
|
394
|
+
event: "custom",
|
|
395
|
+
stream_id: this.streamId,
|
|
396
|
+
metadata: { keepalive: !0 }
|
|
397
|
+
}));
|
|
398
|
+
}, 2e4);
|
|
399
|
+
}
|
|
400
|
+
stopHeartbeat() {
|
|
401
|
+
this.pingInterval && (clearInterval(this.pingInterval), this.pingInterval = null);
|
|
402
|
+
}
|
|
403
|
+
handleParsedMessage(e) {
|
|
404
|
+
const t = e;
|
|
405
|
+
switch (t.event) {
|
|
406
|
+
case "ack":
|
|
407
|
+
this.handleAck(t);
|
|
408
|
+
break;
|
|
409
|
+
case "media_output":
|
|
410
|
+
this.handleMediaOutput(t);
|
|
411
|
+
break;
|
|
412
|
+
case "clear":
|
|
413
|
+
this.handleClear();
|
|
414
|
+
break;
|
|
415
|
+
case "error":
|
|
416
|
+
console.error("[Cartesia] Server error:", t);
|
|
417
|
+
break;
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
handleAck(e) {
|
|
421
|
+
this.streamId = e.stream_id || null, this.isReady = !0, this.setState("listening");
|
|
422
|
+
}
|
|
423
|
+
handleMediaOutput(e) {
|
|
424
|
+
if (!e.media?.payload) return;
|
|
425
|
+
this._state !== "speaking" && (this.events.emit("turnStart", void 0), this.setState("speaking"));
|
|
426
|
+
let t = p(e.media.payload);
|
|
427
|
+
this.cartesiaOutputRate !== c && (t = d(t, this.cartesiaOutputRate, c)), this.events.emit("audio", t);
|
|
428
|
+
}
|
|
429
|
+
handleClear() {
|
|
430
|
+
this.events.emit("interrupted", void 0), this.setState("listening");
|
|
431
|
+
}
|
|
432
|
+
sendAudio(e) {
|
|
433
|
+
if (!this.ws || this.ws.readyState !== WebSocket.OPEN || !this.isReady || !this.streamId)
|
|
434
|
+
return;
|
|
435
|
+
let t = e;
|
|
436
|
+
const s = parseInt(this.cartesiaInputFormat.split("_")[1]);
|
|
437
|
+
this.inputSampleRate !== s && (t = d(e, this.inputSampleRate, s)), this.ws.send(JSON.stringify({
|
|
438
|
+
event: "media_input",
|
|
439
|
+
stream_id: this.streamId,
|
|
440
|
+
media: {
|
|
441
|
+
payload: g(t)
|
|
442
|
+
}
|
|
443
|
+
}));
|
|
444
|
+
}
|
|
445
|
+
close() {
|
|
446
|
+
this.stopHeartbeat(), this.isReady = !1, this.streamId = null, super.close();
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
const O = [
|
|
450
|
+
{ id: "gemini", name: "Gemini Live", description: "Google Gemini Live API" },
|
|
451
|
+
{ id: "elevenlabs", name: "ElevenLabs", description: "ElevenLabs Conversational AI" },
|
|
452
|
+
{ id: "cartesia", name: "Cartesia", description: "Cartesia Agents API" }
|
|
453
|
+
];
|
|
454
|
+
function f(i) {
|
|
455
|
+
switch (i) {
|
|
456
|
+
case "gemini":
|
|
457
|
+
return new k();
|
|
458
|
+
case "elevenlabs":
|
|
459
|
+
return new M();
|
|
460
|
+
case "cartesia":
|
|
461
|
+
return new T();
|
|
462
|
+
default:
|
|
463
|
+
throw new Error(`Unknown agent type: ${i}`);
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
function L(i) {
|
|
467
|
+
return O.find((e) => e.id === i);
|
|
468
|
+
}
|
|
469
|
+
class D extends Error {
|
|
470
|
+
status;
|
|
471
|
+
payload;
|
|
472
|
+
url;
|
|
473
|
+
constructor(e) {
|
|
474
|
+
super(e.message), this.name = "ApiError", this.status = e.status, this.payload = e.payload, this.url = e.url;
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
const l = /* @__PURE__ */ new Set();
|
|
478
|
+
class U {
|
|
479
|
+
apiBaseUrl;
|
|
480
|
+
publishableKey;
|
|
481
|
+
callbacks;
|
|
482
|
+
// DOM
|
|
483
|
+
_video;
|
|
484
|
+
_audio;
|
|
485
|
+
// Session
|
|
486
|
+
session = null;
|
|
487
|
+
agent = null;
|
|
488
|
+
audioContext = null;
|
|
489
|
+
processor = null;
|
|
490
|
+
stream = null;
|
|
491
|
+
abortController = null;
|
|
492
|
+
_status = "disconnected";
|
|
493
|
+
_agentState = "idle";
|
|
494
|
+
_isMuted = !1;
|
|
495
|
+
mounted = !0;
|
|
496
|
+
constructor(e) {
|
|
497
|
+
this.apiBaseUrl = e.apiBaseUrl ?? "https://api.keyframelabs.com", this.publishableKey = e.publishableKey, this.callbacks = {
|
|
498
|
+
onDisconnect: e.onDisconnect,
|
|
499
|
+
onError: e.onError,
|
|
500
|
+
onStateChange: e.onStateChange,
|
|
501
|
+
onAgentStateChange: e.onAgentStateChange
|
|
502
|
+
}, this._video = document.createElement("video"), this._video.style.position = "absolute", this._video.style.inset = "0", this._video.style.width = "100%", this._video.style.height = "100%", this._video.style.objectFit = e.videoFit ?? "cover", this._video.autoplay = !0, this._video.playsInline = !0, this._video.muted = !0, e.container.style.position = "relative", e.container.style.backgroundColor = "#000", this._audio = document.createElement("audio"), this._audio.autoplay = !0, e.container.appendChild(this._video), e.container.appendChild(this._audio);
|
|
503
|
+
}
|
|
504
|
+
// Read-only state
|
|
505
|
+
get status() {
|
|
506
|
+
return this._status;
|
|
507
|
+
}
|
|
508
|
+
get agentState() {
|
|
509
|
+
return this._agentState;
|
|
510
|
+
}
|
|
511
|
+
get isMuted() {
|
|
512
|
+
return this._isMuted;
|
|
513
|
+
}
|
|
514
|
+
get videoElement() {
|
|
515
|
+
return this._video;
|
|
516
|
+
}
|
|
517
|
+
get audioElement() {
|
|
518
|
+
return this._audio;
|
|
519
|
+
}
|
|
520
|
+
/** Connect to the embed session */
|
|
521
|
+
async connect() {
|
|
522
|
+
if (l.has(this.publishableKey)) {
|
|
523
|
+
console.log("[PersonaEmbed] Connection already in progress, skipping");
|
|
524
|
+
return;
|
|
525
|
+
}
|
|
526
|
+
l.add(this.publishableKey), this.mounted = !0, this.abortController = new AbortController(), this.setStatus("connecting");
|
|
527
|
+
try {
|
|
528
|
+
const e = await this.fetchSession(this.abortController.signal);
|
|
529
|
+
if (!this.mounted) {
|
|
530
|
+
l.delete(this.publishableKey);
|
|
531
|
+
return;
|
|
532
|
+
}
|
|
533
|
+
if (await this.initSession(e), await this.initMicrophone(), await this.connectAgent(e.voice_agent_details), !this.mounted) {
|
|
534
|
+
this.cleanup(), l.delete(this.publishableKey);
|
|
535
|
+
return;
|
|
536
|
+
}
|
|
537
|
+
this.setStatus("connected");
|
|
538
|
+
} catch (e) {
|
|
539
|
+
if (l.delete(this.publishableKey), e instanceof Error && e.name === "AbortError")
|
|
540
|
+
return;
|
|
541
|
+
console.error("[PersonaEmbed]", e), this.mounted && (this.setStatus("error"), this.callbacks.onError?.(e));
|
|
542
|
+
}
|
|
543
|
+
}
|
|
544
|
+
/** Disconnect and cleanup */
|
|
545
|
+
disconnect() {
|
|
546
|
+
this.mounted = !1, this.abortController?.abort(), this.abortController = null, l.delete(this.publishableKey), this.cleanup(), this.setStatus("disconnected");
|
|
547
|
+
}
|
|
548
|
+
/** Toggle microphone mute */
|
|
549
|
+
toggleMute() {
|
|
550
|
+
this._isMuted = !this._isMuted;
|
|
551
|
+
}
|
|
552
|
+
setStatus(e) {
|
|
553
|
+
this._status !== e && (this._status = e, this.callbacks.onStateChange?.(e));
|
|
554
|
+
}
|
|
555
|
+
setAgentState(e) {
|
|
556
|
+
this._agentState !== e && (this._agentState = e, this.callbacks.onAgentStateChange?.(e));
|
|
557
|
+
}
|
|
558
|
+
async fetchSession(e) {
|
|
559
|
+
const t = await fetch(`${this.apiBaseUrl}/v1/embed/create_session`, {
|
|
560
|
+
method: "POST",
|
|
561
|
+
headers: { "Content-Type": "application/json" },
|
|
562
|
+
body: JSON.stringify({ publishable_key: this.publishableKey }),
|
|
563
|
+
signal: e
|
|
564
|
+
});
|
|
565
|
+
if (!t.ok) {
|
|
566
|
+
let s;
|
|
567
|
+
try {
|
|
568
|
+
s = await t.json();
|
|
569
|
+
} catch {
|
|
570
|
+
}
|
|
571
|
+
throw new D({
|
|
572
|
+
message: s?.message ?? "create_session failed",
|
|
573
|
+
status: t.status,
|
|
574
|
+
payload: s,
|
|
575
|
+
url: t.url
|
|
576
|
+
});
|
|
577
|
+
}
|
|
578
|
+
if (!t.ok) {
|
|
579
|
+
const s = await t.json().catch(() => null);
|
|
580
|
+
throw new Error(`create_session failed: ${t.status} ${JSON.stringify(s)}`);
|
|
581
|
+
}
|
|
582
|
+
return t.json();
|
|
583
|
+
}
|
|
584
|
+
async initSession(e) {
|
|
585
|
+
this.session = v({
|
|
586
|
+
serverUrl: e.session_details.server_url,
|
|
587
|
+
participantToken: e.session_details.participant_token,
|
|
588
|
+
agentIdentity: e.session_details.agent_identity,
|
|
589
|
+
onVideoTrack: (t) => {
|
|
590
|
+
console.log("[PersonaEmbed] Setting video track", t.readyState, t.enabled), this._video.srcObject = new MediaStream([t]), this._video.play().catch((s) => console.warn("[PersonaEmbed] Video play failed:", s));
|
|
591
|
+
},
|
|
592
|
+
onAudioTrack: (t) => {
|
|
593
|
+
this._audio.srcObject = new MediaStream([t]), this._audio.play().catch(() => {
|
|
594
|
+
});
|
|
595
|
+
},
|
|
596
|
+
onStateChange: (t) => {
|
|
597
|
+
this.mounted && t === "disconnected" && (this.setStatus("disconnected"), this.callbacks.onDisconnect?.());
|
|
598
|
+
},
|
|
599
|
+
onError: (t) => {
|
|
600
|
+
this.mounted && this.callbacks.onError?.(t);
|
|
601
|
+
},
|
|
602
|
+
onClose: () => {
|
|
603
|
+
this.mounted && this.callbacks.onDisconnect?.();
|
|
604
|
+
}
|
|
605
|
+
}), this.agent = f(e.voice_agent_details.type), this.agent.on("audio", (t) => this.session?.sendAudio(t)), this.agent.on("interrupted", () => this.session?.interrupt()), this.agent.on("stateChange", (t) => this.setAgentState(t)), this.agent.on("closed", () => {
|
|
606
|
+
this.mounted && this.callbacks.onDisconnect?.();
|
|
607
|
+
}), await this.session.connect();
|
|
608
|
+
}
|
|
609
|
+
async initMicrophone() {
|
|
610
|
+
this.stream = await navigator.mediaDevices.getUserMedia({
|
|
611
|
+
audio: { sampleRate: 16e3, echoCancellation: !0, noiseSuppression: !0 }
|
|
612
|
+
}), this.audioContext = new AudioContext({ sampleRate: 16e3 });
|
|
613
|
+
const e = this.audioContext.createMediaStreamSource(this.stream);
|
|
614
|
+
this.processor = this.audioContext.createScriptProcessor(4096, 1, 1), this.processor.onaudioprocess = (t) => {
|
|
615
|
+
if (!this._isMuted) {
|
|
616
|
+
const s = y(t.inputBuffer.getChannelData(0));
|
|
617
|
+
this.agent?.sendAudio(s);
|
|
618
|
+
}
|
|
619
|
+
}, e.connect(this.processor), this.processor.connect(this.audioContext.destination);
|
|
620
|
+
}
|
|
621
|
+
async connectAgent(e) {
|
|
622
|
+
if (!this.agent) return;
|
|
623
|
+
const t = { inputSampleRate: 16e3 };
|
|
624
|
+
e.type === "gemini" ? await this.agent.connect({
|
|
625
|
+
...t,
|
|
626
|
+
apiKey: e.token,
|
|
627
|
+
authType: "ephemeral_token"
|
|
628
|
+
}) : e.type === "elevenlabs" ? await this.agent.connect({
|
|
629
|
+
...t,
|
|
630
|
+
agentId: e.agent_id,
|
|
631
|
+
signedUrl: e.signed_url
|
|
632
|
+
}) : e.type === "cartesia" && await this.agent.connect({
|
|
633
|
+
...t,
|
|
634
|
+
agentId: e.agent_id,
|
|
635
|
+
apiKey: e.token
|
|
636
|
+
});
|
|
637
|
+
}
|
|
638
|
+
cleanup() {
|
|
639
|
+
this.stream?.getTracks().forEach((e) => e.stop()), this.processor?.disconnect(), this.audioContext?.close(), this.agent?.close(), this.session?.close(), this.stream = null, this.processor = null, this.audioContext = null, this.agent = null, this.session = null;
|
|
640
|
+
}
|
|
641
|
+
}
|
|
642
|
+
const h = /* @__PURE__ */ new Set();
|
|
643
|
+
class K {
|
|
644
|
+
voiceAgentDetails;
|
|
645
|
+
sessionDetails;
|
|
646
|
+
callbacks;
|
|
647
|
+
connectionId;
|
|
648
|
+
// DOM
|
|
649
|
+
_video;
|
|
650
|
+
_audio;
|
|
651
|
+
// Session
|
|
652
|
+
session = null;
|
|
653
|
+
agent = null;
|
|
654
|
+
audioContext = null;
|
|
655
|
+
processor = null;
|
|
656
|
+
stream = null;
|
|
657
|
+
_status = "disconnected";
|
|
658
|
+
_agentState = "idle";
|
|
659
|
+
_isMuted = !1;
|
|
660
|
+
mounted = !0;
|
|
661
|
+
constructor(e) {
|
|
662
|
+
this.voiceAgentDetails = e.voiceAgentDetails, this.sessionDetails = e.sessionDetails, this.callbacks = {
|
|
663
|
+
onDisconnect: e.onDisconnect,
|
|
664
|
+
onError: e.onError,
|
|
665
|
+
onStateChange: e.onStateChange,
|
|
666
|
+
onAgentStateChange: e.onAgentStateChange
|
|
667
|
+
}, this.connectionId = e.sessionDetails.participant_token, this._video = document.createElement("video"), this._video.style.position = "absolute", this._video.style.inset = "0", this._video.style.width = "100%", this._video.style.height = "100%", this._video.style.objectFit = e.videoFit ?? "cover", this._video.autoplay = !0, this._video.playsInline = !0, this._video.muted = !0, e.container.style.position = "relative", e.container.style.backgroundColor = "#000", this._audio = document.createElement("audio"), this._audio.autoplay = !0, e.container.appendChild(this._video), e.container.appendChild(this._audio);
|
|
668
|
+
}
|
|
669
|
+
// Read-only state
|
|
670
|
+
get status() {
|
|
671
|
+
return this._status;
|
|
672
|
+
}
|
|
673
|
+
get agentState() {
|
|
674
|
+
return this._agentState;
|
|
675
|
+
}
|
|
676
|
+
get isMuted() {
|
|
677
|
+
return this._isMuted;
|
|
678
|
+
}
|
|
679
|
+
get videoElement() {
|
|
680
|
+
return this._video;
|
|
681
|
+
}
|
|
682
|
+
get audioElement() {
|
|
683
|
+
return this._audio;
|
|
684
|
+
}
|
|
685
|
+
/** Connect to the session */
|
|
686
|
+
async connect() {
|
|
687
|
+
if (h.has(this.connectionId)) {
|
|
688
|
+
console.log("[PersonaView] Connection already in progress, skipping");
|
|
689
|
+
return;
|
|
690
|
+
}
|
|
691
|
+
h.add(this.connectionId), this.mounted = !0, this.setStatus("connecting");
|
|
692
|
+
try {
|
|
693
|
+
if (await this.initSession(), await this.initMicrophone(), await this.connectAgent(), !this.mounted) {
|
|
694
|
+
this.cleanup(), h.delete(this.connectionId);
|
|
695
|
+
return;
|
|
696
|
+
}
|
|
697
|
+
this.setStatus("connected");
|
|
698
|
+
} catch (e) {
|
|
699
|
+
h.delete(this.connectionId), console.error("[PersonaView]", e), this.mounted && (this.setStatus("error"), this.callbacks.onError?.(e));
|
|
700
|
+
}
|
|
701
|
+
}
|
|
702
|
+
/** Disconnect and cleanup */
|
|
703
|
+
disconnect() {
|
|
704
|
+
this.mounted = !1, h.delete(this.connectionId), this.cleanup(), this.setStatus("disconnected");
|
|
705
|
+
}
|
|
706
|
+
/** Toggle microphone mute */
|
|
707
|
+
toggleMute() {
|
|
708
|
+
this._isMuted = !this._isMuted;
|
|
709
|
+
}
|
|
710
|
+
setStatus(e) {
|
|
711
|
+
this._status !== e && (this._status = e, this.callbacks.onStateChange?.(e));
|
|
712
|
+
}
|
|
713
|
+
setAgentState(e) {
|
|
714
|
+
this._agentState !== e && (this._agentState = e, this.callbacks.onAgentStateChange?.(e));
|
|
715
|
+
}
|
|
716
|
+
async initSession() {
|
|
717
|
+
this.session = v({
|
|
718
|
+
serverUrl: this.sessionDetails.server_url,
|
|
719
|
+
participantToken: this.sessionDetails.participant_token,
|
|
720
|
+
agentIdentity: this.sessionDetails.agent_identity,
|
|
721
|
+
onVideoTrack: (e) => {
|
|
722
|
+
console.log("[PersonaView] Setting video track", e.readyState, e.enabled), this._video.srcObject = new MediaStream([e]), this._video.play().catch((t) => console.warn("[PersonaView] Video play failed:", t));
|
|
723
|
+
},
|
|
724
|
+
onAudioTrack: (e) => {
|
|
725
|
+
this._audio.srcObject = new MediaStream([e]), this._audio.play().catch(() => {
|
|
726
|
+
});
|
|
727
|
+
},
|
|
728
|
+
onStateChange: (e) => {
|
|
729
|
+
this.mounted && e === "disconnected" && (this.setStatus("disconnected"), this.callbacks.onDisconnect?.());
|
|
730
|
+
},
|
|
731
|
+
onError: (e) => {
|
|
732
|
+
this.mounted && this.callbacks.onError?.(e);
|
|
733
|
+
},
|
|
734
|
+
onClose: () => {
|
|
735
|
+
this.mounted && this.callbacks.onDisconnect?.();
|
|
736
|
+
}
|
|
737
|
+
}), this.agent = f(this.voiceAgentDetails.type), this.agent.on("audio", (e) => this.session?.sendAudio(e)), this.agent.on("interrupted", () => this.session?.interrupt()), this.agent.on("stateChange", (e) => this.setAgentState(e)), this.agent.on("closed", () => {
|
|
738
|
+
this.mounted && this.callbacks.onDisconnect?.();
|
|
739
|
+
}), await this.session.connect();
|
|
740
|
+
}
|
|
741
|
+
async initMicrophone() {
|
|
742
|
+
this.stream = await navigator.mediaDevices.getUserMedia({
|
|
743
|
+
audio: { sampleRate: 16e3, echoCancellation: !0, noiseSuppression: !0 }
|
|
744
|
+
}), this.audioContext = new AudioContext({ sampleRate: 16e3 });
|
|
745
|
+
const e = this.audioContext.createMediaStreamSource(this.stream);
|
|
746
|
+
this.processor = this.audioContext.createScriptProcessor(4096, 1, 1), this.processor.onaudioprocess = (t) => {
|
|
747
|
+
if (!this._isMuted) {
|
|
748
|
+
const s = y(t.inputBuffer.getChannelData(0));
|
|
749
|
+
this.agent?.sendAudio(s);
|
|
750
|
+
}
|
|
751
|
+
}, e.connect(this.processor), this.processor.connect(this.audioContext.destination);
|
|
752
|
+
}
|
|
753
|
+
async connectAgent() {
|
|
754
|
+
if (!this.agent) return;
|
|
755
|
+
const e = this.voiceAgentDetails, t = { inputSampleRate: 16e3 };
|
|
756
|
+
e.type === "gemini" ? await this.agent.connect({
|
|
757
|
+
...t,
|
|
758
|
+
apiKey: e.token,
|
|
759
|
+
authType: "ephemeral_token"
|
|
760
|
+
}) : e.type === "elevenlabs" ? await this.agent.connect({
|
|
761
|
+
...t,
|
|
762
|
+
agentId: e.agent_id,
|
|
763
|
+
signedUrl: e.signed_url
|
|
764
|
+
}) : e.type === "cartesia" && await this.agent.connect({
|
|
765
|
+
...t,
|
|
766
|
+
agentId: e.agent_id,
|
|
767
|
+
apiKey: e.token
|
|
768
|
+
});
|
|
769
|
+
}
|
|
770
|
+
cleanup() {
|
|
771
|
+
this.stream?.getTracks().forEach((e) => e.stop()), this.processor?.disconnect(), this.audioContext?.close(), this.agent?.close(), this.session?.close(), this.stream = null, this.processor = null, this.audioContext = null, this.agent = null, this.session = null;
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
export {
|
|
775
|
+
O as AGENT_REGISTRY,
|
|
776
|
+
m as BaseAgent,
|
|
777
|
+
T as CartesiaAgent,
|
|
778
|
+
M as ElevenLabsAgent,
|
|
779
|
+
k as GeminiLiveAgent,
|
|
780
|
+
D as KeyframeApiError,
|
|
781
|
+
U as PersonaEmbed,
|
|
782
|
+
K as PersonaView,
|
|
783
|
+
c as SAMPLE_RATE,
|
|
784
|
+
p as base64ToBytes,
|
|
785
|
+
g as bytesToBase64,
|
|
786
|
+
f as createAgent,
|
|
787
|
+
b as createEventEmitter,
|
|
788
|
+
y as floatTo16BitPCM,
|
|
789
|
+
L as getAgentInfo,
|
|
790
|
+
d as resamplePcm
|
|
791
|
+
};
|