npm - @glydeunity/voice-sdk - Versions diffs - 1.3.4 → 1.3.6 - Mend

@glydeunity/voice-sdk 1.3.4 → 1.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -47,22 +47,48 @@ export declare interface DeepgramAgentConfig {
  * Function call request from Deepgram Voice Agent
  * @see https://developers.deepgram.com/docs/voice-agent-function-call-request
  */
+/**
+ * Individual function call within a FunctionCallRequest
+ * @see https://developers.deepgram.com/docs/voice-agent-function-call-request
+ */
+export declare interface FunctionCall {
+    /** Unique identifier for the function call */
+    id: string;
+    /** Name of the function to execute */
+    name: string;
+    /** JSON string containing function arguments */
+    arguments: string;
+    /** If true, client must execute and respond; if false, server handles it */
+    client_side?: boolean;
+}
+/**
+ * Function call request message from Deepgram Voice Agent
+ * Contains an array of functions to be executed
+ * @see https://developers.deepgram.com/docs/voice-agent-function-call-request
+ */
 export declare interface FunctionCallRequest {
     type: 'FunctionCallRequest';
-    function_name: string;
-    function_call_id: string;
-    input: Record<string, unknown>;
-    client_side?: boolean;
+    /** Array of function calls to execute */
+    functions: FunctionCall[];
 }
 /**
  * Function call response to send back to Deepgram
  * @see https://developers.deepgram.com/docs/voice-agent-function-call-response
  */
+/**
+ * Function call response to send back to Deepgram Voice Agent
+ * @see https://developers.deepgram.com/docs/voice-agent-function-call-response
+ */
 export declare interface FunctionCallResponse {
     type: 'FunctionCallResponse';
-    function_call_id: string;
-    output: string;
+    /** Unique identifier of the original function call */
+    id: string;
+    /** Name of the function that was executed */
+    name: string;
+    /** Text summary or JSON result of the function's output */
+    content: string;
 }
 /**
@@ -195,7 +221,10 @@ export declare class GlydeVoice {
      * Handle a function call request from Deepgram Voice Agent
      * Routes function execution through the Unity voice function endpoint for proper authentication
      *
-     * @param request - The function call request from Deepgram
+     * Deepgram sends an array of functions in each request, so we process each one
+     * and send individual responses back.
+     *
+     * @param request - The function call request from Deepgram containing functions array
      * @see https://developers.deepgram.com/docs/voice-agents-function-calling
      */
     private handleFunctionCallRequest;

package/dist/voice-sdk.es.js CHANGED Viewed

@@ -176,15 +176,15 @@ class y {
    * @returns Voice configuration including system prompt, tools, and Deepgram settings
    */
   async fetchConfig() {
-    const e = `${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`, t = this.config.contextId ? `${e}/${this.config.contextId}` : e, o = await fetch(t, {
+    const e = `${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`, t = this.config.contextId ? `${e}/${this.config.contextId}` : e, s = await fetch(t, {
       method: "GET",
       headers: this.getAuthHeaders()
     });
-    if (!o.ok) {
-      const a = await o.json();
-      throw new Error(a.error?.message || a.message || "Failed to fetch voice config");
+    if (!s.ok) {
+      const o = await s.json();
+      throw new Error(o.error?.message || o.message || "Failed to fetch voice config");
     }
-    const { data: i } = await o.json();
+    const { data: i } = await s.json();
     return i;
   }
   /**
@@ -206,26 +206,26 @@ class y {
           body: JSON.stringify(e)
         });
         if (!t.ok) {
-          const s = await t.json();
-          throw new Error(s.error?.message || s.message || "Failed to authenticate voice session");
+          const a = await t.json();
+          throw new Error(a.error?.message || a.message || "Failed to authenticate voice session");
         }
-        const { data: o } = await t.json(), { token: i, agent_config: a, deepgram_config: n } = o;
+        const { data: s } = await t.json(), { token: i, agent_config: o, deepgram_config: n } = s;
         this.setSessionContext({
-          clientUuid: a?.client_uuid,
+          clientUuid: o?.client_uuid,
           contextId: this.config.contextId,
           contextType: this.config.contextType,
-          currentJobUuid: a?.job_uuid
+          currentJobUuid: o?.job_uuid
         });
-        const c = this.config.systemPrompt || a.instructions || this.serverConfig?.system_prompt || "You are a helpful AI assistant.";
+        const c = this.config.systemPrompt || o.instructions || this.serverConfig?.system_prompt || "You are a helpful AI assistant.";
         await this.initializeAudio();
         let l = "wss://agent.deepgram.com/v1/agent/converse";
         const r = this.config.deepgramConfig || n || this.serverConfig?.deepgram_config;
         if (r?.tags && r.tags.length > 0) {
-          const s = new URLSearchParams();
-          r.tags.forEach((h) => s.append("tag", h)), l += `?${s.toString()}`;
+          const a = new URLSearchParams();
+          r.tags.forEach((h) => a.append("tag", h)), l += `?${a.toString()}`;
         }
         this.ws = new WebSocket(l, ["bearer", i]), this.ws.onopen = () => {
-          const s = r || {
+          const a = r || {
             think: { provider: { type: "open_ai", model: "gpt-4.1-nano" } },
             speak: { provider: { type: "deepgram", model: "aura-2-thalia-en" } },
             listen: { provider: { type: "deepgram", version: "v2", model: "flux-general-en" } }
@@ -244,39 +244,39 @@ class y {
             },
             agent: {
               language: "en",
-              speak: s.speak || {
+              speak: a.speak || {
                 provider: { type: "deepgram", model: "aura-2-thalia-en" }
               },
-              listen: s.listen || {
+              listen: a.listen || {
                 provider: { type: "deepgram", version: "v2", model: "flux-general-en" }
               },
               think: {
-                provider: s.think?.provider || { type: "open_ai", model: "gpt-4.1-nano" },
+                provider: a.think?.provider || { type: "open_ai", model: "gpt-4.1-nano" },
                 // Functions come from server config - no client-side defaults
-                ...s.think?.functions && { functions: s.think.functions }
+                ...a.think?.functions && { functions: a.think.functions }
               },
               greeting: "Hi! I'm excited you chose to speak with me. Are you ready to start?"
             }
           };
-          s.tags && s.tags.length > 0 && (h.tags = s.tags), this.ws.send(JSON.stringify(h)), this.emit({ type: "open", payload: { config: a, serverConfig: this.serverConfig } });
+          a.tags && a.tags.length > 0 && (h.tags = a.tags), this.ws.send(JSON.stringify(h)), this.emit({ type: "open", payload: { config: o, serverConfig: this.serverConfig } });
         };
-        const d = c;
-        this.ws.onmessage = (s) => {
-          if (typeof s.data == "string") {
+        const u = c;
+        this.ws.onmessage = (a) => {
+          if (typeof a.data == "string") {
             try {
-              if (JSON.parse(s.data).type === "SettingsApplied") {
-                const u = {
+              if (JSON.parse(a.data).type === "SettingsApplied") {
+                const d = {
                   type: "UpdatePrompt",
-                  prompt: d
+                  prompt: u
                 };
-                this.ws.send(JSON.stringify(u)), this.startMicrophone();
+                this.ws.send(JSON.stringify(d)), this.startMicrophone();
               }
             } catch {
             }
-            this.handleTextMessage(s.data);
-          } else s.data instanceof Blob ? this.handleAudioData(s.data) : s.data instanceof ArrayBuffer && this.handleAudioBuffer(s.data);
-        }, this.ws.onerror = (s) => {
-          console.error("[GlydeVoice] WebSocket error:", s), this.emit({ type: "error", payload: s });
+            this.handleTextMessage(a.data);
+          } else a.data instanceof Blob ? this.handleAudioData(a.data) : a.data instanceof ArrayBuffer && this.handleAudioBuffer(a.data);
+        }, this.ws.onerror = (a) => {
+          console.error("[GlydeVoice] WebSocket error:", a), this.emit({ type: "error", payload: a });
         }, this.ws.onclose = () => {
           this.cleanup(), this.emit({ type: "close" });
         }, this.renderUI();
@@ -310,8 +310,8 @@ class y {
     } finally {
       URL.revokeObjectURL(e), URL.revokeObjectURL(t);
     }
-    this.playbackWorkletNode = new AudioWorkletNode(this.audioContext, "audio-playback-processor"), this.playbackWorkletNode.connect(this.audioContext.destination), this.playbackWorkletNode.port.onmessage = (o) => {
-      const { type: i } = o.data;
+    this.playbackWorkletNode = new AudioWorkletNode(this.audioContext, "audio-playback-processor"), this.playbackWorkletNode.connect(this.audioContext.destination), this.playbackWorkletNode.port.onmessage = (s) => {
+      const { type: i } = s.data;
       (i === "cleared" || i === "bufferEmpty") && (this.isAgentSpeaking = !1, this.agentAudioDoneReceived = !1, this.emit({ type: "agent_speaking", payload: !1 }));
     };
   }
@@ -335,8 +335,8 @@ class y {
           break;
         case "ConversationText":
           if (t.content && t.content.trim()) {
-            const o = t.role === "assistant" ? "agent" : "user";
-            this.config.onTranscript && this.config.onTranscript(t.content, o), this.emit({ type: "transcript", payload: { text: t.content, role: o } }), this.saveTranscript(t.content, t.role);
+            const s = t.role === "assistant" ? "agent" : "user";
+            this.config.onTranscript && this.config.onTranscript(t.content, s), this.emit({ type: "transcript", payload: { text: t.content, role: s } }), this.saveTranscript(t.content, t.role);
           }
           break;
         case "AgentStartedSpeaking":
@@ -372,11 +372,11 @@ class y {
     this.audioContext.state === "suspended" && this.audioContext.resume();
     const t = e.byteLength;
     if (t === 0) return;
-    const o = t - t % 2;
-    if (o === 0) return;
-    const i = o === t ? e : e.slice(0, o), a = new Int16Array(i), n = new Float32Array(a.length);
-    for (let r = 0; r < a.length; r++)
-      n[r] = a[r] / 32768;
+    const s = t - t % 2;
+    if (s === 0) return;
+    const i = s === t ? e : e.slice(0, s), o = new Int16Array(i), n = new Float32Array(o.length);
+    for (let r = 0; r < o.length; r++)
+      n[r] = o[r] / 32768;
     const c = this.resample24kTo48k(n);
     !this.isAgentSpeaking && !this.agentAudioDoneReceived && (this.isAgentSpeaking = !0, this.emit({ type: "agent_speaking", payload: !0 }));
     const l = new Float32Array(c);
@@ -389,13 +389,13 @@ class y {
    * Resample audio from 24kHz to 48kHz using linear interpolation
    */
   resample24kTo48k(e) {
-    const t = e.length * 2, o = new Float32Array(t);
-    for (let a = 0; a < e.length - 1; a++) {
-      const n = e[a], c = e[a + 1];
-      o[a * 2] = n, o[a * 2 + 1] = (n + c) / 2;
+    const t = e.length * 2, s = new Float32Array(t);
+    for (let o = 0; o < e.length - 1; o++) {
+      const n = e[o], c = e[o + 1];
+      s[o * 2] = n, s[o * 2 + 1] = (n + c) / 2;
     }
     const i = e.length - 1;
-    return o[i * 2] = e[i], o[i * 2 + 1] = e[i], o;
+    return s[i * 2] = e[i], s[i * 2 + 1] = e[i], s;
   }
   /**
    * Clear the playback buffer (for interruption handling)
@@ -506,26 +506,38 @@ class y {
    * Handle a function call request from Deepgram Voice Agent
    * Routes function execution through the Unity voice function endpoint for proper authentication
    *
-   * @param request - The function call request from Deepgram
+   * Deepgram sends an array of functions in each request, so we process each one
+   * and send individual responses back.
+   *
+   * @param request - The function call request from Deepgram containing functions array
    * @see https://developers.deepgram.com/docs/voice-agents-function-calling
    */
   async handleFunctionCallRequest(e) {
-    console.log("[GlydeVoice] Function call request:", e.function_name, e.input);
-    let t;
-    try {
-      e.function_name === "end_conversation" ? t = await this.handleEndConversation(e.input) : t = await this.executeVoiceFunction(e.function_name, e.function_call_id, e.input);
-    } catch (i) {
-      console.error("[GlydeVoice] Function call error:", i), t = JSON.stringify({
-        error: "Function execution failed",
-        details: i instanceof Error ? i.message : String(i)
-      });
+    for (const t of e.functions) {
+      console.log("[GlydeVoice] Function call request:", t.name, t.arguments);
+      let s = {};
+      try {
+        s = t.arguments ? JSON.parse(t.arguments) : {};
+      } catch (n) {
+        console.warn("[GlydeVoice] Failed to parse function arguments:", n);
+      }
+      let i;
+      try {
+        t.name === "end_conversation" ? i = await this.handleEndConversation(s) : i = await this.executeVoiceFunction(t.name, t.id, s);
+      } catch (n) {
+        console.error("[GlydeVoice] Function call error:", n), i = JSON.stringify({
+          error: "Function execution failed",
+          details: n instanceof Error ? n.message : String(n)
+        });
+      }
+      const o = {
+        type: "FunctionCallResponse",
+        id: t.id,
+        name: t.name,
+        content: i
+      };
+      this.ws && this.ws.readyState === WebSocket.OPEN ? (this.ws.send(JSON.stringify(o)), console.log("[GlydeVoice] Function response sent:", t.name)) : console.error("[GlydeVoice] Cannot send function response - WebSocket not open");
     }
-    const o = {
-      type: "FunctionCallResponse",
-      function_call_id: e.function_call_id,
-      output: t
-    };
-    this.ws && this.ws.readyState === WebSocket.OPEN ? (this.ws.send(JSON.stringify(o)), console.log("[GlydeVoice] Function response sent:", e.function_name)) : console.error("[GlydeVoice] Cannot send function response - WebSocket not open");
   }
   /**
    * Execute a voice function through the Unity API with proper authentication
@@ -537,7 +549,7 @@ class y {
    * @param input - Function input parameters
    * @returns JSON string with function result
    */
-  async executeVoiceFunction(e, t, o) {
+  async executeVoiceFunction(e, t, s) {
     console.log("[GlydeVoice] Executing voice function via Unity API:", e);
     try {
       const i = await fetch(`${this.unityUrl}/api/unity/voice/function`, {
@@ -546,7 +558,7 @@ class y {
         body: JSON.stringify({
           function_name: e,
           function_call_id: t,
-          input: o,
+          input: s,
           context: {
             context_id: this.sessionContext.contextId,
             context_type: this.sessionContext.contextType,
@@ -558,9 +570,9 @@ class y {
         const n = await i.json().catch(() => ({}));
         throw new Error(n.error?.message || `Function call failed: ${i.status}`);
       }
-      const a = await i.json();
-      if (a.success && a.data?.output)
-        return typeof a.data.output == "string" ? a.data.output : JSON.stringify(a.data.output);
+      const o = await i.json();
+      if (o.success && o.data?.output)
+        return typeof o.data.output == "string" ? o.data.output : JSON.stringify(o.data.output);
       throw new Error("Invalid response from voice function endpoint");
     } catch (i) {
       return console.error("[GlydeVoice] Voice function error:", i), JSON.stringify({

package/dist/voice-sdk.umd.js CHANGED Viewed

@@ -130,11 +130,11 @@ class AudioPlaybackProcessor extends AudioWorkletProcessor {
 }
 registerProcessor('audio-playback-processor', AudioPlaybackProcessor);
-`;class f{config;unityUrl;active=!1;serverConfig=null;ws=null;audioContext=null;mediaStream=null;captureWorkletNode=null;playbackWorkletNode=null;isMuted=!1;outputSampleRate=24e3;inputSampleRate=48e3;isAgentSpeaking=!1;agentAudioDoneReceived=!1;sessionContext={};constructor(e){this.config=e,this.unityUrl=e.unityBaseUrl||"https://api.glydeunity.com",!e.publishableKey&&!e.apiKey&&!e.authToken&&console.warn("[GlydeVoice] No authentication method provided. One of publishableKey, apiKey, or authToken is required.")}getAuthHeaders(){const e={"Content-Type":"application/json"};return this.config.publishableKey&&(e["x-publishable-key"]=this.config.publishableKey),this.config.apiKey&&(e["x-api-key"]=this.config.apiKey),this.config.authToken&&(e.Authorization=`Bearer ${this.config.authToken}`),e}async fetchConfig(){const e=`${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`,t=this.config.contextId?`${e}/${this.config.contextId}`:e,o=await fetch(t,{method:"GET",headers:this.getAuthHeaders()});if(!o.ok){const a=await o.json();throw new Error(a.error?.message||a.message||"Failed to fetch voice config")}const{data:i}=await o.json();return i}async start(){if(!this.active){this.active=!0;try{this.config.systemPrompt||(this.serverConfig=await this.fetchConfig(),console.log("[GlydeVoice] Fetched config:",this.serverConfig));const e={context_id:this.config.contextId,domain:typeof window<"u"?window.location.hostname:"localhost"};this.config.systemPrompt&&(e.system_prompt=this.config.systemPrompt),this.config.deepgramConfig&&(e.deepgram_config=this.config.deepgramConfig);const t=await fetch(`${this.unityUrl}/api/unity/voice/auth`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify(e)});if(!t.ok){const s=await t.json();throw new Error(s.error?.message||s.message||"Failed to authenticate voice session")}const{data:o}=await t.json(),{token:i,agent_config:a,deepgram_config:n}=o;this.setSessionContext({clientUuid:a?.client_uuid,contextId:this.config.contextId,contextType:this.config.contextType,currentJobUuid:a?.job_uuid});const d=this.config.systemPrompt||a.instructions||this.serverConfig?.system_prompt||"You are a helpful AI assistant.";await this.initializeAudio();let h="wss://agent.deepgram.com/v1/agent/converse";const r=this.config.deepgramConfig||n||this.serverConfig?.deepgram_config;if(r?.tags&&r.tags.length>0){const s=new URLSearchParams;r.tags.forEach(u=>s.append("tag",u)),h+=`?${s.toString()}`}this.ws=new WebSocket(h,["bearer",i]),this.ws.onopen=()=>{const s=r||{think:{provider:{type:"open_ai",model:"gpt-4.1-nano"}},speak:{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}}},u={type:"Settings",audio:{input:{encoding:"linear16",sample_rate:this.inputSampleRate},output:{encoding:"linear16",sample_rate:this.outputSampleRate,container:"none"}},agent:{language:"en",speak:s.speak||{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:s.listen||{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}},think:{provider:s.think?.provider||{type:"open_ai",model:"gpt-4.1-nano"},...s.think?.functions&&{functions:s.think.functions}},greeting:"Hi! I'm excited you chose to speak with me. Are you ready to start?"}};s.tags&&s.tags.length>0&&(u.tags=s.tags),this.ws.send(JSON.stringify(u)),this.emit({type:"open",payload:{config:a,serverConfig:this.serverConfig}})};const g=d;this.ws.onmessage=s=>{if(typeof s.data=="string"){try{if(JSON.parse(s.data).type==="SettingsApplied"){const y={type:"UpdatePrompt",prompt:g};this.ws.send(JSON.stringify(y)),this.startMicrophone()}}catch{}this.handleTextMessage(s.data)}else s.data instanceof Blob?this.handleAudioData(s.data):s.data instanceof ArrayBuffer&&this.handleAudioBuffer(s.data)},this.ws.onerror=s=>{console.error("[GlydeVoice] WebSocket error:",s),this.emit({type:"error",payload:s})},this.ws.onclose=()=>{this.cleanup(),this.emit({type:"close"})},this.renderUI()}catch(e){throw console.error("[GlydeVoice] Error starting session:",e),this.active=!1,this.emit({type:"error",payload:e}),e}}}createWorkletBlobUrl(e){const t=new Blob([e],{type:"application/javascript"});return URL.createObjectURL(t)}async initializeAudio(){this.audioContext=new AudioContext({sampleRate:this.inputSampleRate});const e=this.createWorkletBlobUrl(l),t=this.createWorkletBlobUrl(p);try{await Promise.all([this.audioContext.audioWorklet.addModule(e),this.audioContext.audioWorklet.addModule(t)])}finally{URL.revokeObjectURL(e),URL.revokeObjectURL(t)}this.playbackWorkletNode=new AudioWorkletNode(this.audioContext,"audio-playback-processor"),this.playbackWorkletNode.connect(this.audioContext.destination),this.playbackWorkletNode.port.onmessage=o=>{const{type:i}=o.data;(i==="cleared"||i==="bufferEmpty")&&(this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!1}))}}handleTextMessage(e){try{const t=JSON.parse(e);switch(t.type){case"Welcome":this.emit({type:"ready"});break;case"SettingsApplied":break;case"UserStartedSpeaking":this.emit({type:"user_speaking",payload:!0}),this.clearPlaybackBuffer(),this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1;break;case"UserStoppedSpeaking":this.emit({type:"user_speaking",payload:!1});break;case"ConversationText":if(t.content&&t.content.trim()){const o=t.role==="assistant"?"agent":"user";this.config.onTranscript&&this.config.onTranscript(t.content,o),this.emit({type:"transcript",payload:{text:t.content,role:o}}),this.saveTranscript(t.content,t.role)}break;case"AgentStartedSpeaking":this.isAgentSpeaking=!0,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!0});break;case"AgentAudioDone":this.agentAudioDoneReceived=!0;break;case"Error":console.error("[GlydeVoice] Agent error:",t),this.emit({type:"error",payload:t});break;case"FunctionCallRequest":this.handleFunctionCallRequest(t);break}}catch(t){console.error("[GlydeVoice] Failed to parse message:",t)}}async handleAudioData(e){const t=await e.arrayBuffer();this.handleAudioBuffer(t)}handleAudioBuffer(e){if(!this.playbackWorkletNode||!this.audioContext)return;this.audioContext.state==="suspended"&&this.audioContext.resume();const t=e.byteLength;if(t===0)return;const o=t-t%2;if(o===0)return;const i=o===t?e:e.slice(0,o),a=new Int16Array(i),n=new Float32Array(a.length);for(let r=0;r<a.length;r++)n[r]=a[r]/32768;const d=this.resample24kTo48k(n);!this.isAgentSpeaking&&!this.agentAudioDoneReceived&&(this.isAgentSpeaking=!0,this.emit({type:"agent_speaking",payload:!0}));const h=new Float32Array(d);this.playbackWorkletNode.port.postMessage({type:"audio",data:h},[h.buffer])}resample24kTo48k(e){const t=e.length*2,o=new Float32Array(t);for(let a=0;a<e.length-1;a++){const n=e[a],d=e[a+1];o[a*2]=n,o[a*2+1]=(n+d)/2}const i=e.length-1;return o[i*2]=e[i],o[i*2+1]=e[i],o}clearPlaybackBuffer(){this.playbackWorkletNode&&this.playbackWorkletNode.port.postMessage({type:"clear"})}async startMicrophone(){if(!this.audioContext)throw new Error("Audio context not initialized");try{this.mediaStream=await navigator.mediaDevices.getUserMedia({audio:{channelCount:1,sampleRate:this.inputSampleRate,echoCancellation:!0,noiseSuppression:!0}});const e=this.audioContext.createMediaStreamSource(this.mediaStream);this.captureWorkletNode=new AudioWorkletNode(this.audioContext,"audio-capture-processor"),this.captureWorkletNode.port.onmessage=t=>{!this.active||!this.ws||this.ws.readyState!==WebSocket.OPEN||this.isMuted||this.ws.send(t.data)},e.connect(this.captureWorkletNode),this.emit({type:"microphone_ready"})}catch(e){throw console.error("[GlydeVoice] Microphone error:",e),e}}async saveTranscript(e,t){if(!(!this.config.contextId||!e))try{await fetch(`${this.unityUrl}/api/unity/voice/transcript`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({context_id:this.config.contextId,content:e,role:t==="assistant"?"assistant":"user"})})}catch{}}setMuted(e){this.isMuted=e}getMuted(){return this.isMuted}isActive(){return this.active}getServerConfig(){return this.serverConfig}stop(){this.active=!1,this.cleanup()}cleanup(){this.captureWorkletNode&&(this.captureWorkletNode.disconnect(),this.captureWorkletNode.port.close(),this.captureWorkletNode=null),this.playbackWorkletNode&&(this.playbackWorkletNode.disconnect(),this.playbackWorkletNode.port.close(),this.playbackWorkletNode=null),this.mediaStream&&(this.mediaStream.getTracks().forEach(e=>e.stop()),this.mediaStream=null),this.audioContext&&(this.audioContext.close(),this.audioContext=null),this.ws&&(this.ws.readyState===WebSocket.OPEN&&this.ws.close(),this.ws=null)}emit(e){this.config.onEvent&&this.config.onEvent(e)}renderUI(){if(!this.config.container)return;const e=typeof this.config.container=="string"?document.querySelector(this.config.container):this.config.container;e&&(e.innerHTML=`
+`;class f{config;unityUrl;active=!1;serverConfig=null;ws=null;audioContext=null;mediaStream=null;captureWorkletNode=null;playbackWorkletNode=null;isMuted=!1;outputSampleRate=24e3;inputSampleRate=48e3;isAgentSpeaking=!1;agentAudioDoneReceived=!1;sessionContext={};constructor(e){this.config=e,this.unityUrl=e.unityBaseUrl||"https://api.glydeunity.com",!e.publishableKey&&!e.apiKey&&!e.authToken&&console.warn("[GlydeVoice] No authentication method provided. One of publishableKey, apiKey, or authToken is required.")}getAuthHeaders(){const e={"Content-Type":"application/json"};return this.config.publishableKey&&(e["x-publishable-key"]=this.config.publishableKey),this.config.apiKey&&(e["x-api-key"]=this.config.apiKey),this.config.authToken&&(e.Authorization=`Bearer ${this.config.authToken}`),e}async fetchConfig(){const e=`${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`,t=this.config.contextId?`${e}/${this.config.contextId}`:e,s=await fetch(t,{method:"GET",headers:this.getAuthHeaders()});if(!s.ok){const o=await s.json();throw new Error(o.error?.message||o.message||"Failed to fetch voice config")}const{data:i}=await s.json();return i}async start(){if(!this.active){this.active=!0;try{this.config.systemPrompt||(this.serverConfig=await this.fetchConfig(),console.log("[GlydeVoice] Fetched config:",this.serverConfig));const e={context_id:this.config.contextId,domain:typeof window<"u"?window.location.hostname:"localhost"};this.config.systemPrompt&&(e.system_prompt=this.config.systemPrompt),this.config.deepgramConfig&&(e.deepgram_config=this.config.deepgramConfig);const t=await fetch(`${this.unityUrl}/api/unity/voice/auth`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify(e)});if(!t.ok){const a=await t.json();throw new Error(a.error?.message||a.message||"Failed to authenticate voice session")}const{data:s}=await t.json(),{token:i,agent_config:o,deepgram_config:n}=s;this.setSessionContext({clientUuid:o?.client_uuid,contextId:this.config.contextId,contextType:this.config.contextType,currentJobUuid:o?.job_uuid});const d=this.config.systemPrompt||o.instructions||this.serverConfig?.system_prompt||"You are a helpful AI assistant.";await this.initializeAudio();let u="wss://agent.deepgram.com/v1/agent/converse";const r=this.config.deepgramConfig||n||this.serverConfig?.deepgram_config;if(r?.tags&&r.tags.length>0){const a=new URLSearchParams;r.tags.forEach(h=>a.append("tag",h)),u+=`?${a.toString()}`}this.ws=new WebSocket(u,["bearer",i]),this.ws.onopen=()=>{const a=r||{think:{provider:{type:"open_ai",model:"gpt-4.1-nano"}},speak:{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}}},h={type:"Settings",audio:{input:{encoding:"linear16",sample_rate:this.inputSampleRate},output:{encoding:"linear16",sample_rate:this.outputSampleRate,container:"none"}},agent:{language:"en",speak:a.speak||{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:a.listen||{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}},think:{provider:a.think?.provider||{type:"open_ai",model:"gpt-4.1-nano"},...a.think?.functions&&{functions:a.think.functions}},greeting:"Hi! I'm excited you chose to speak with me. Are you ready to start?"}};a.tags&&a.tags.length>0&&(h.tags=a.tags),this.ws.send(JSON.stringify(h)),this.emit({type:"open",payload:{config:o,serverConfig:this.serverConfig}})};const g=d;this.ws.onmessage=a=>{if(typeof a.data=="string"){try{if(JSON.parse(a.data).type==="SettingsApplied"){const y={type:"UpdatePrompt",prompt:g};this.ws.send(JSON.stringify(y)),this.startMicrophone()}}catch{}this.handleTextMessage(a.data)}else a.data instanceof Blob?this.handleAudioData(a.data):a.data instanceof ArrayBuffer&&this.handleAudioBuffer(a.data)},this.ws.onerror=a=>{console.error("[GlydeVoice] WebSocket error:",a),this.emit({type:"error",payload:a})},this.ws.onclose=()=>{this.cleanup(),this.emit({type:"close"})},this.renderUI()}catch(e){throw console.error("[GlydeVoice] Error starting session:",e),this.active=!1,this.emit({type:"error",payload:e}),e}}}createWorkletBlobUrl(e){const t=new Blob([e],{type:"application/javascript"});return URL.createObjectURL(t)}async initializeAudio(){this.audioContext=new AudioContext({sampleRate:this.inputSampleRate});const e=this.createWorkletBlobUrl(l),t=this.createWorkletBlobUrl(p);try{await Promise.all([this.audioContext.audioWorklet.addModule(e),this.audioContext.audioWorklet.addModule(t)])}finally{URL.revokeObjectURL(e),URL.revokeObjectURL(t)}this.playbackWorkletNode=new AudioWorkletNode(this.audioContext,"audio-playback-processor"),this.playbackWorkletNode.connect(this.audioContext.destination),this.playbackWorkletNode.port.onmessage=s=>{const{type:i}=s.data;(i==="cleared"||i==="bufferEmpty")&&(this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!1}))}}handleTextMessage(e){try{const t=JSON.parse(e);switch(t.type){case"Welcome":this.emit({type:"ready"});break;case"SettingsApplied":break;case"UserStartedSpeaking":this.emit({type:"user_speaking",payload:!0}),this.clearPlaybackBuffer(),this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1;break;case"UserStoppedSpeaking":this.emit({type:"user_speaking",payload:!1});break;case"ConversationText":if(t.content&&t.content.trim()){const s=t.role==="assistant"?"agent":"user";this.config.onTranscript&&this.config.onTranscript(t.content,s),this.emit({type:"transcript",payload:{text:t.content,role:s}}),this.saveTranscript(t.content,t.role)}break;case"AgentStartedSpeaking":this.isAgentSpeaking=!0,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!0});break;case"AgentAudioDone":this.agentAudioDoneReceived=!0;break;case"Error":console.error("[GlydeVoice] Agent error:",t),this.emit({type:"error",payload:t});break;case"FunctionCallRequest":this.handleFunctionCallRequest(t);break}}catch(t){console.error("[GlydeVoice] Failed to parse message:",t)}}async handleAudioData(e){const t=await e.arrayBuffer();this.handleAudioBuffer(t)}handleAudioBuffer(e){if(!this.playbackWorkletNode||!this.audioContext)return;this.audioContext.state==="suspended"&&this.audioContext.resume();const t=e.byteLength;if(t===0)return;const s=t-t%2;if(s===0)return;const i=s===t?e:e.slice(0,s),o=new Int16Array(i),n=new Float32Array(o.length);for(let r=0;r<o.length;r++)n[r]=o[r]/32768;const d=this.resample24kTo48k(n);!this.isAgentSpeaking&&!this.agentAudioDoneReceived&&(this.isAgentSpeaking=!0,this.emit({type:"agent_speaking",payload:!0}));const u=new Float32Array(d);this.playbackWorkletNode.port.postMessage({type:"audio",data:u},[u.buffer])}resample24kTo48k(e){const t=e.length*2,s=new Float32Array(t);for(let o=0;o<e.length-1;o++){const n=e[o],d=e[o+1];s[o*2]=n,s[o*2+1]=(n+d)/2}const i=e.length-1;return s[i*2]=e[i],s[i*2+1]=e[i],s}clearPlaybackBuffer(){this.playbackWorkletNode&&this.playbackWorkletNode.port.postMessage({type:"clear"})}async startMicrophone(){if(!this.audioContext)throw new Error("Audio context not initialized");try{this.mediaStream=await navigator.mediaDevices.getUserMedia({audio:{channelCount:1,sampleRate:this.inputSampleRate,echoCancellation:!0,noiseSuppression:!0}});const e=this.audioContext.createMediaStreamSource(this.mediaStream);this.captureWorkletNode=new AudioWorkletNode(this.audioContext,"audio-capture-processor"),this.captureWorkletNode.port.onmessage=t=>{!this.active||!this.ws||this.ws.readyState!==WebSocket.OPEN||this.isMuted||this.ws.send(t.data)},e.connect(this.captureWorkletNode),this.emit({type:"microphone_ready"})}catch(e){throw console.error("[GlydeVoice] Microphone error:",e),e}}async saveTranscript(e,t){if(!(!this.config.contextId||!e))try{await fetch(`${this.unityUrl}/api/unity/voice/transcript`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({context_id:this.config.contextId,content:e,role:t==="assistant"?"assistant":"user"})})}catch{}}setMuted(e){this.isMuted=e}getMuted(){return this.isMuted}isActive(){return this.active}getServerConfig(){return this.serverConfig}stop(){this.active=!1,this.cleanup()}cleanup(){this.captureWorkletNode&&(this.captureWorkletNode.disconnect(),this.captureWorkletNode.port.close(),this.captureWorkletNode=null),this.playbackWorkletNode&&(this.playbackWorkletNode.disconnect(),this.playbackWorkletNode.port.close(),this.playbackWorkletNode=null),this.mediaStream&&(this.mediaStream.getTracks().forEach(e=>e.stop()),this.mediaStream=null),this.audioContext&&(this.audioContext.close(),this.audioContext=null),this.ws&&(this.ws.readyState===WebSocket.OPEN&&this.ws.close(),this.ws=null)}emit(e){this.config.onEvent&&this.config.onEvent(e)}renderUI(){if(!this.config.container)return;const e=typeof this.config.container=="string"?document.querySelector(this.config.container):this.config.container;e&&(e.innerHTML=`
         <div style="padding: 20px; border: 1px solid #ccc; border-radius: 8px; background: #fff;">
           <h3>Glyde Voice Agent</h3>
           <p>Status: Active</p>
           <p>Context: ${this.config.contextType}</p>
           <button onclick="this.closest('div').remove()">Close</button>
         </div>
-      `)}async handleFunctionCallRequest(e){console.log("[GlydeVoice] Function call request:",e.function_name,e.input);let t;try{e.function_name==="end_conversation"?t=await this.handleEndConversation(e.input):t=await this.executeVoiceFunction(e.function_name,e.function_call_id,e.input)}catch(i){console.error("[GlydeVoice] Function call error:",i),t=JSON.stringify({error:"Function execution failed",details:i instanceof Error?i.message:String(i)})}const o={type:"FunctionCallResponse",function_call_id:e.function_call_id,output:t};this.ws&&this.ws.readyState===WebSocket.OPEN?(this.ws.send(JSON.stringify(o)),console.log("[GlydeVoice] Function response sent:",e.function_name)):console.error("[GlydeVoice] Cannot send function response - WebSocket not open")}async executeVoiceFunction(e,t,o){console.log("[GlydeVoice] Executing voice function via Unity API:",e);try{const i=await fetch(`${this.unityUrl}/api/unity/voice/function`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({function_name:e,function_call_id:t,input:o,context:{context_id:this.sessionContext.contextId,context_type:this.sessionContext.contextType,current_job_uuid:this.sessionContext.currentJobUuid}})});if(!i.ok){const n=await i.json().catch(()=>({}));throw new Error(n.error?.message||`Function call failed: ${i.status}`)}const a=await i.json();if(a.success&&a.data?.output)return typeof a.data.output=="string"?a.data.output:JSON.stringify(a.data.output);throw new Error("Invalid response from voice function endpoint")}catch(i){return console.error("[GlydeVoice] Voice function error:",i),JSON.stringify({success:!1,error:i instanceof Error?i.message:"Function execution failed",fallback_message:"I apologize, but I'm having trouble with that request right now. Is there something else I can help you with?"})}}async handleEndConversation(e){const t=e.item||"user request";return console.log(`[GlydeVoice] End conversation triggered by: ${t}`),setTimeout(()=>{this.stop()},2e3),JSON.stringify({success:!0,message:"Conversation ending. Say goodbye to the user.",trigger_phrase:t})}setSessionContext(e){this.sessionContext={...this.sessionContext,...e},console.log("[GlydeVoice] Session context updated:",{hasContextId:!!e.contextId,contextType:e.contextType,hasJobUuid:!!e.currentJobUuid})}}c.GlydeVoice=f,Object.defineProperty(c,Symbol.toStringTag,{value:"Module"})}));
+      `)}async handleFunctionCallRequest(e){for(const t of e.functions){console.log("[GlydeVoice] Function call request:",t.name,t.arguments);let s={};try{s=t.arguments?JSON.parse(t.arguments):{}}catch(n){console.warn("[GlydeVoice] Failed to parse function arguments:",n)}let i;try{t.name==="end_conversation"?i=await this.handleEndConversation(s):i=await this.executeVoiceFunction(t.name,t.id,s)}catch(n){console.error("[GlydeVoice] Function call error:",n),i=JSON.stringify({error:"Function execution failed",details:n instanceof Error?n.message:String(n)})}const o={type:"FunctionCallResponse",id:t.id,name:t.name,content:i};this.ws&&this.ws.readyState===WebSocket.OPEN?(this.ws.send(JSON.stringify(o)),console.log("[GlydeVoice] Function response sent:",t.name)):console.error("[GlydeVoice] Cannot send function response - WebSocket not open")}}async executeVoiceFunction(e,t,s){console.log("[GlydeVoice] Executing voice function via Unity API:",e);try{const i=await fetch(`${this.unityUrl}/api/unity/voice/function`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({function_name:e,function_call_id:t,input:s,context:{context_id:this.sessionContext.contextId,context_type:this.sessionContext.contextType,current_job_uuid:this.sessionContext.currentJobUuid}})});if(!i.ok){const n=await i.json().catch(()=>({}));throw new Error(n.error?.message||`Function call failed: ${i.status}`)}const o=await i.json();if(o.success&&o.data?.output)return typeof o.data.output=="string"?o.data.output:JSON.stringify(o.data.output);throw new Error("Invalid response from voice function endpoint")}catch(i){return console.error("[GlydeVoice] Voice function error:",i),JSON.stringify({success:!1,error:i instanceof Error?i.message:"Function execution failed",fallback_message:"I apologize, but I'm having trouble with that request right now. Is there something else I can help you with?"})}}async handleEndConversation(e){const t=e.item||"user request";return console.log(`[GlydeVoice] End conversation triggered by: ${t}`),setTimeout(()=>{this.stop()},2e3),JSON.stringify({success:!0,message:"Conversation ending. Say goodbye to the user.",trigger_phrase:t})}setSessionContext(e){this.sessionContext={...this.sessionContext,...e},console.log("[GlydeVoice] Session context updated:",{hasContextId:!!e.contextId,contextType:e.contextType,hasJobUuid:!!e.currentJobUuid})}}c.GlydeVoice=f,Object.defineProperty(c,Symbol.toStringTag,{value:"Module"})}));

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@glydeunity/voice-sdk",
-  "version": "1.3.4",
+  "version": "1.3.6",
   "description": "GLYDE Voice Agent SDK - AI-powered voice interactions for web applications",
   "type": "module",
   "main": "./dist/voice-sdk.umd.js",