@glydeunity/voice-sdk 1.3.3 → 1.3.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -47,12 +47,30 @@ export declare interface DeepgramAgentConfig {
47
47
  * Function call request from Deepgram Voice Agent
48
48
  * @see https://developers.deepgram.com/docs/voice-agent-function-call-request
49
49
  */
50
+ /**
51
+ * Individual function call within a FunctionCallRequest
52
+ * @see https://developers.deepgram.com/docs/voice-agent-function-call-request
53
+ */
54
+ export declare interface FunctionCall {
55
+ /** Unique identifier for the function call */
56
+ id: string;
57
+ /** Name of the function to execute */
58
+ name: string;
59
+ /** JSON string containing function arguments */
60
+ arguments: string;
61
+ /** If true, client must execute and respond; if false, server handles it */
62
+ client_side?: boolean;
63
+ }
64
+
65
+ /**
66
+ * Function call request message from Deepgram Voice Agent
67
+ * Contains an array of functions to be executed
68
+ * @see https://developers.deepgram.com/docs/voice-agent-function-call-request
69
+ */
50
70
  export declare interface FunctionCallRequest {
51
71
  type: 'FunctionCallRequest';
52
- function_name: string;
53
- function_call_id: string;
54
- input: Record<string, unknown>;
55
- client_side?: boolean;
72
+ /** Array of function calls to execute */
73
+ functions: FunctionCall[];
56
74
  }
57
75
 
58
76
  /**
@@ -195,7 +213,10 @@ export declare class GlydeVoice {
195
213
  * Handle a function call request from Deepgram Voice Agent
196
214
  * Routes function execution through the Unity voice function endpoint for proper authentication
197
215
  *
198
- * @param request - The function call request from Deepgram
216
+ * Deepgram sends an array of functions in each request, so we process each one
217
+ * and send individual responses back.
218
+ *
219
+ * @param request - The function call request from Deepgram containing functions array
199
220
  * @see https://developers.deepgram.com/docs/voice-agents-function-calling
200
221
  */
201
222
  private handleFunctionCallRequest;
@@ -176,15 +176,15 @@ class y {
176
176
  * @returns Voice configuration including system prompt, tools, and Deepgram settings
177
177
  */
178
178
  async fetchConfig() {
179
- const e = `${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`, t = this.config.contextId ? `${e}/${this.config.contextId}` : e, o = await fetch(t, {
179
+ const e = `${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`, t = this.config.contextId ? `${e}/${this.config.contextId}` : e, s = await fetch(t, {
180
180
  method: "GET",
181
181
  headers: this.getAuthHeaders()
182
182
  });
183
- if (!o.ok) {
184
- const a = await o.json();
185
- throw new Error(a.error?.message || a.message || "Failed to fetch voice config");
183
+ if (!s.ok) {
184
+ const o = await s.json();
185
+ throw new Error(o.error?.message || o.message || "Failed to fetch voice config");
186
186
  }
187
- const { data: i } = await o.json();
187
+ const { data: i } = await s.json();
188
188
  return i;
189
189
  }
190
190
  /**
@@ -206,27 +206,27 @@ class y {
206
206
  body: JSON.stringify(e)
207
207
  });
208
208
  if (!t.ok) {
209
- const s = await t.json();
210
- throw new Error(s.error?.message || s.message || "Failed to authenticate voice session");
209
+ const a = await t.json();
210
+ throw new Error(a.error?.message || a.message || "Failed to authenticate voice session");
211
211
  }
212
- const { data: o } = await t.json(), { token: i, agent_config: a, deepgram_config: n } = o;
212
+ const { data: s } = await t.json(), { token: i, agent_config: o, deepgram_config: n } = s;
213
213
  this.setSessionContext({
214
- clientUuid: a?.client_uuid,
214
+ clientUuid: o?.client_uuid,
215
215
  contextId: this.config.contextId,
216
216
  contextType: this.config.contextType,
217
- currentJobUuid: a?.job_uuid
217
+ currentJobUuid: o?.job_uuid
218
218
  });
219
- const c = this.config.systemPrompt || a.instructions || this.serverConfig?.system_prompt || "You are a helpful AI assistant.";
219
+ const c = this.config.systemPrompt || o.instructions || this.serverConfig?.system_prompt || "You are a helpful AI assistant.";
220
220
  await this.initializeAudio();
221
221
  let l = "wss://agent.deepgram.com/v1/agent/converse";
222
222
  const r = this.config.deepgramConfig || n || this.serverConfig?.deepgram_config;
223
223
  if (r?.tags && r.tags.length > 0) {
224
- const s = new URLSearchParams();
225
- r.tags.forEach((h) => s.append("tag", h)), l += `?${s.toString()}`;
224
+ const a = new URLSearchParams();
225
+ r.tags.forEach((h) => a.append("tag", h)), l += `?${a.toString()}`;
226
226
  }
227
227
  this.ws = new WebSocket(l, ["bearer", i]), this.ws.onopen = () => {
228
- const s = r || {
229
- think: { provider: { type: "open_ai", model: "gpt-5-nano" } },
228
+ const a = r || {
229
+ think: { provider: { type: "open_ai", model: "gpt-4.1-nano" } },
230
230
  speak: { provider: { type: "deepgram", model: "aura-2-thalia-en" } },
231
231
  listen: { provider: { type: "deepgram", version: "v2", model: "flux-general-en" } }
232
232
  }, h = {
@@ -244,39 +244,39 @@ class y {
244
244
  },
245
245
  agent: {
246
246
  language: "en",
247
- speak: s.speak || {
247
+ speak: a.speak || {
248
248
  provider: { type: "deepgram", model: "aura-2-thalia-en" }
249
249
  },
250
- listen: s.listen || {
250
+ listen: a.listen || {
251
251
  provider: { type: "deepgram", version: "v2", model: "flux-general-en" }
252
252
  },
253
253
  think: {
254
- provider: s.think?.provider || { type: "open_ai", model: "gpt-5-nano" },
254
+ provider: a.think?.provider || { type: "open_ai", model: "gpt-4.1-nano" },
255
255
  // Functions come from server config - no client-side defaults
256
- ...s.think?.functions && { functions: s.think.functions }
256
+ ...a.think?.functions && { functions: a.think.functions }
257
257
  },
258
258
  greeting: "Hi! I'm excited you chose to speak with me. Are you ready to start?"
259
259
  }
260
260
  };
261
- s.tags && s.tags.length > 0 && (h.tags = s.tags), this.ws.send(JSON.stringify(h)), this.emit({ type: "open", payload: { config: a, serverConfig: this.serverConfig } });
261
+ a.tags && a.tags.length > 0 && (h.tags = a.tags), this.ws.send(JSON.stringify(h)), this.emit({ type: "open", payload: { config: o, serverConfig: this.serverConfig } });
262
262
  };
263
- const d = c;
264
- this.ws.onmessage = (s) => {
265
- if (typeof s.data == "string") {
263
+ const u = c;
264
+ this.ws.onmessage = (a) => {
265
+ if (typeof a.data == "string") {
266
266
  try {
267
- if (JSON.parse(s.data).type === "SettingsApplied") {
268
- const u = {
267
+ if (JSON.parse(a.data).type === "SettingsApplied") {
268
+ const d = {
269
269
  type: "UpdatePrompt",
270
- prompt: d
270
+ prompt: u
271
271
  };
272
- this.ws.send(JSON.stringify(u)), this.startMicrophone();
272
+ this.ws.send(JSON.stringify(d)), this.startMicrophone();
273
273
  }
274
274
  } catch {
275
275
  }
276
- this.handleTextMessage(s.data);
277
- } else s.data instanceof Blob ? this.handleAudioData(s.data) : s.data instanceof ArrayBuffer && this.handleAudioBuffer(s.data);
278
- }, this.ws.onerror = (s) => {
279
- console.error("[GlydeVoice] WebSocket error:", s), this.emit({ type: "error", payload: s });
276
+ this.handleTextMessage(a.data);
277
+ } else a.data instanceof Blob ? this.handleAudioData(a.data) : a.data instanceof ArrayBuffer && this.handleAudioBuffer(a.data);
278
+ }, this.ws.onerror = (a) => {
279
+ console.error("[GlydeVoice] WebSocket error:", a), this.emit({ type: "error", payload: a });
280
280
  }, this.ws.onclose = () => {
281
281
  this.cleanup(), this.emit({ type: "close" });
282
282
  }, this.renderUI();
@@ -310,8 +310,8 @@ class y {
310
310
  } finally {
311
311
  URL.revokeObjectURL(e), URL.revokeObjectURL(t);
312
312
  }
313
- this.playbackWorkletNode = new AudioWorkletNode(this.audioContext, "audio-playback-processor"), this.playbackWorkletNode.connect(this.audioContext.destination), this.playbackWorkletNode.port.onmessage = (o) => {
314
- const { type: i } = o.data;
313
+ this.playbackWorkletNode = new AudioWorkletNode(this.audioContext, "audio-playback-processor"), this.playbackWorkletNode.connect(this.audioContext.destination), this.playbackWorkletNode.port.onmessage = (s) => {
314
+ const { type: i } = s.data;
315
315
  (i === "cleared" || i === "bufferEmpty") && (this.isAgentSpeaking = !1, this.agentAudioDoneReceived = !1, this.emit({ type: "agent_speaking", payload: !1 }));
316
316
  };
317
317
  }
@@ -335,8 +335,8 @@ class y {
335
335
  break;
336
336
  case "ConversationText":
337
337
  if (t.content && t.content.trim()) {
338
- const o = t.role === "assistant" ? "agent" : "user";
339
- this.config.onTranscript && this.config.onTranscript(t.content, o), this.emit({ type: "transcript", payload: { text: t.content, role: o } }), this.saveTranscript(t.content, t.role);
338
+ const s = t.role === "assistant" ? "agent" : "user";
339
+ this.config.onTranscript && this.config.onTranscript(t.content, s), this.emit({ type: "transcript", payload: { text: t.content, role: s } }), this.saveTranscript(t.content, t.role);
340
340
  }
341
341
  break;
342
342
  case "AgentStartedSpeaking":
@@ -372,11 +372,11 @@ class y {
372
372
  this.audioContext.state === "suspended" && this.audioContext.resume();
373
373
  const t = e.byteLength;
374
374
  if (t === 0) return;
375
- const o = t - t % 2;
376
- if (o === 0) return;
377
- const i = o === t ? e : e.slice(0, o), a = new Int16Array(i), n = new Float32Array(a.length);
378
- for (let r = 0; r < a.length; r++)
379
- n[r] = a[r] / 32768;
375
+ const s = t - t % 2;
376
+ if (s === 0) return;
377
+ const i = s === t ? e : e.slice(0, s), o = new Int16Array(i), n = new Float32Array(o.length);
378
+ for (let r = 0; r < o.length; r++)
379
+ n[r] = o[r] / 32768;
380
380
  const c = this.resample24kTo48k(n);
381
381
  !this.isAgentSpeaking && !this.agentAudioDoneReceived && (this.isAgentSpeaking = !0, this.emit({ type: "agent_speaking", payload: !0 }));
382
382
  const l = new Float32Array(c);
@@ -389,13 +389,13 @@ class y {
389
389
  * Resample audio from 24kHz to 48kHz using linear interpolation
390
390
  */
391
391
  resample24kTo48k(e) {
392
- const t = e.length * 2, o = new Float32Array(t);
393
- for (let a = 0; a < e.length - 1; a++) {
394
- const n = e[a], c = e[a + 1];
395
- o[a * 2] = n, o[a * 2 + 1] = (n + c) / 2;
392
+ const t = e.length * 2, s = new Float32Array(t);
393
+ for (let o = 0; o < e.length - 1; o++) {
394
+ const n = e[o], c = e[o + 1];
395
+ s[o * 2] = n, s[o * 2 + 1] = (n + c) / 2;
396
396
  }
397
397
  const i = e.length - 1;
398
- return o[i * 2] = e[i], o[i * 2 + 1] = e[i], o;
398
+ return s[i * 2] = e[i], s[i * 2 + 1] = e[i], s;
399
399
  }
400
400
  /**
401
401
  * Clear the playback buffer (for interruption handling)
@@ -506,26 +506,37 @@ class y {
506
506
  * Handle a function call request from Deepgram Voice Agent
507
507
  * Routes function execution through the Unity voice function endpoint for proper authentication
508
508
  *
509
- * @param request - The function call request from Deepgram
509
+ * Deepgram sends an array of functions in each request, so we process each one
510
+ * and send individual responses back.
511
+ *
512
+ * @param request - The function call request from Deepgram containing functions array
510
513
  * @see https://developers.deepgram.com/docs/voice-agents-function-calling
511
514
  */
512
515
  async handleFunctionCallRequest(e) {
513
- console.log("[GlydeVoice] Function call request:", e.function_name, e.input);
514
- let t;
515
- try {
516
- e.function_name === "end_conversation" ? t = await this.handleEndConversation(e.input) : t = await this.executeVoiceFunction(e.function_name, e.function_call_id, e.input);
517
- } catch (i) {
518
- console.error("[GlydeVoice] Function call error:", i), t = JSON.stringify({
519
- error: "Function execution failed",
520
- details: i instanceof Error ? i.message : String(i)
521
- });
516
+ for (const t of e.functions) {
517
+ console.log("[GlydeVoice] Function call request:", t.name, t.arguments);
518
+ let s = {};
519
+ try {
520
+ s = t.arguments ? JSON.parse(t.arguments) : {};
521
+ } catch (n) {
522
+ console.warn("[GlydeVoice] Failed to parse function arguments:", n);
523
+ }
524
+ let i;
525
+ try {
526
+ t.name === "end_conversation" ? i = await this.handleEndConversation(s) : i = await this.executeVoiceFunction(t.name, t.id, s);
527
+ } catch (n) {
528
+ console.error("[GlydeVoice] Function call error:", n), i = JSON.stringify({
529
+ error: "Function execution failed",
530
+ details: n instanceof Error ? n.message : String(n)
531
+ });
532
+ }
533
+ const o = {
534
+ type: "FunctionCallResponse",
535
+ function_call_id: t.id,
536
+ output: i
537
+ };
538
+ this.ws && this.ws.readyState === WebSocket.OPEN ? (this.ws.send(JSON.stringify(o)), console.log("[GlydeVoice] Function response sent:", t.name)) : console.error("[GlydeVoice] Cannot send function response - WebSocket not open");
522
539
  }
523
- const o = {
524
- type: "FunctionCallResponse",
525
- function_call_id: e.function_call_id,
526
- output: t
527
- };
528
- this.ws && this.ws.readyState === WebSocket.OPEN ? (this.ws.send(JSON.stringify(o)), console.log("[GlydeVoice] Function response sent:", e.function_name)) : console.error("[GlydeVoice] Cannot send function response - WebSocket not open");
529
540
  }
530
541
  /**
531
542
  * Execute a voice function through the Unity API with proper authentication
@@ -537,7 +548,7 @@ class y {
537
548
  * @param input - Function input parameters
538
549
  * @returns JSON string with function result
539
550
  */
540
- async executeVoiceFunction(e, t, o) {
551
+ async executeVoiceFunction(e, t, s) {
541
552
  console.log("[GlydeVoice] Executing voice function via Unity API:", e);
542
553
  try {
543
554
  const i = await fetch(`${this.unityUrl}/api/unity/voice/function`, {
@@ -546,7 +557,7 @@ class y {
546
557
  body: JSON.stringify({
547
558
  function_name: e,
548
559
  function_call_id: t,
549
- input: o,
560
+ input: s,
550
561
  context: {
551
562
  context_id: this.sessionContext.contextId,
552
563
  context_type: this.sessionContext.contextType,
@@ -558,9 +569,9 @@ class y {
558
569
  const n = await i.json().catch(() => ({}));
559
570
  throw new Error(n.error?.message || `Function call failed: ${i.status}`);
560
571
  }
561
- const a = await i.json();
562
- if (a.success && a.data?.output)
563
- return typeof a.data.output == "string" ? a.data.output : JSON.stringify(a.data.output);
572
+ const o = await i.json();
573
+ if (o.success && o.data?.output)
574
+ return typeof o.data.output == "string" ? o.data.output : JSON.stringify(o.data.output);
564
575
  throw new Error("Invalid response from voice function endpoint");
565
576
  } catch (i) {
566
577
  return console.error("[GlydeVoice] Voice function error:", i), JSON.stringify({
@@ -130,11 +130,11 @@ class AudioPlaybackProcessor extends AudioWorkletProcessor {
130
130
  }
131
131
 
132
132
  registerProcessor('audio-playback-processor', AudioPlaybackProcessor);
133
- `;class f{config;unityUrl;active=!1;serverConfig=null;ws=null;audioContext=null;mediaStream=null;captureWorkletNode=null;playbackWorkletNode=null;isMuted=!1;outputSampleRate=24e3;inputSampleRate=48e3;isAgentSpeaking=!1;agentAudioDoneReceived=!1;sessionContext={};constructor(e){this.config=e,this.unityUrl=e.unityBaseUrl||"https://api.glydeunity.com",!e.publishableKey&&!e.apiKey&&!e.authToken&&console.warn("[GlydeVoice] No authentication method provided. One of publishableKey, apiKey, or authToken is required.")}getAuthHeaders(){const e={"Content-Type":"application/json"};return this.config.publishableKey&&(e["x-publishable-key"]=this.config.publishableKey),this.config.apiKey&&(e["x-api-key"]=this.config.apiKey),this.config.authToken&&(e.Authorization=`Bearer ${this.config.authToken}`),e}async fetchConfig(){const e=`${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`,t=this.config.contextId?`${e}/${this.config.contextId}`:e,o=await fetch(t,{method:"GET",headers:this.getAuthHeaders()});if(!o.ok){const a=await o.json();throw new Error(a.error?.message||a.message||"Failed to fetch voice config")}const{data:i}=await o.json();return i}async start(){if(!this.active){this.active=!0;try{this.config.systemPrompt||(this.serverConfig=await this.fetchConfig(),console.log("[GlydeVoice] Fetched config:",this.serverConfig));const e={context_id:this.config.contextId,domain:typeof window<"u"?window.location.hostname:"localhost"};this.config.systemPrompt&&(e.system_prompt=this.config.systemPrompt),this.config.deepgramConfig&&(e.deepgram_config=this.config.deepgramConfig);const t=await fetch(`${this.unityUrl}/api/unity/voice/auth`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify(e)});if(!t.ok){const s=await t.json();throw new Error(s.error?.message||s.message||"Failed to authenticate voice session")}const{data:o}=await t.json(),{token:i,agent_config:a,deepgram_config:n}=o;this.setSessionContext({clientUuid:a?.client_uuid,contextId:this.config.contextId,contextType:this.config.contextType,currentJobUuid:a?.job_uuid});const d=this.config.systemPrompt||a.instructions||this.serverConfig?.system_prompt||"You are a helpful AI assistant.";await this.initializeAudio();let h="wss://agent.deepgram.com/v1/agent/converse";const r=this.config.deepgramConfig||n||this.serverConfig?.deepgram_config;if(r?.tags&&r.tags.length>0){const s=new URLSearchParams;r.tags.forEach(u=>s.append("tag",u)),h+=`?${s.toString()}`}this.ws=new WebSocket(h,["bearer",i]),this.ws.onopen=()=>{const s=r||{think:{provider:{type:"open_ai",model:"gpt-5-nano"}},speak:{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}}},u={type:"Settings",audio:{input:{encoding:"linear16",sample_rate:this.inputSampleRate},output:{encoding:"linear16",sample_rate:this.outputSampleRate,container:"none"}},agent:{language:"en",speak:s.speak||{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:s.listen||{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}},think:{provider:s.think?.provider||{type:"open_ai",model:"gpt-5-nano"},...s.think?.functions&&{functions:s.think.functions}},greeting:"Hi! I'm excited you chose to speak with me. Are you ready to start?"}};s.tags&&s.tags.length>0&&(u.tags=s.tags),this.ws.send(JSON.stringify(u)),this.emit({type:"open",payload:{config:a,serverConfig:this.serverConfig}})};const g=d;this.ws.onmessage=s=>{if(typeof s.data=="string"){try{if(JSON.parse(s.data).type==="SettingsApplied"){const y={type:"UpdatePrompt",prompt:g};this.ws.send(JSON.stringify(y)),this.startMicrophone()}}catch{}this.handleTextMessage(s.data)}else s.data instanceof Blob?this.handleAudioData(s.data):s.data instanceof ArrayBuffer&&this.handleAudioBuffer(s.data)},this.ws.onerror=s=>{console.error("[GlydeVoice] WebSocket error:",s),this.emit({type:"error",payload:s})},this.ws.onclose=()=>{this.cleanup(),this.emit({type:"close"})},this.renderUI()}catch(e){throw console.error("[GlydeVoice] Error starting session:",e),this.active=!1,this.emit({type:"error",payload:e}),e}}}createWorkletBlobUrl(e){const t=new Blob([e],{type:"application/javascript"});return URL.createObjectURL(t)}async initializeAudio(){this.audioContext=new AudioContext({sampleRate:this.inputSampleRate});const e=this.createWorkletBlobUrl(l),t=this.createWorkletBlobUrl(p);try{await Promise.all([this.audioContext.audioWorklet.addModule(e),this.audioContext.audioWorklet.addModule(t)])}finally{URL.revokeObjectURL(e),URL.revokeObjectURL(t)}this.playbackWorkletNode=new AudioWorkletNode(this.audioContext,"audio-playback-processor"),this.playbackWorkletNode.connect(this.audioContext.destination),this.playbackWorkletNode.port.onmessage=o=>{const{type:i}=o.data;(i==="cleared"||i==="bufferEmpty")&&(this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!1}))}}handleTextMessage(e){try{const t=JSON.parse(e);switch(t.type){case"Welcome":this.emit({type:"ready"});break;case"SettingsApplied":break;case"UserStartedSpeaking":this.emit({type:"user_speaking",payload:!0}),this.clearPlaybackBuffer(),this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1;break;case"UserStoppedSpeaking":this.emit({type:"user_speaking",payload:!1});break;case"ConversationText":if(t.content&&t.content.trim()){const o=t.role==="assistant"?"agent":"user";this.config.onTranscript&&this.config.onTranscript(t.content,o),this.emit({type:"transcript",payload:{text:t.content,role:o}}),this.saveTranscript(t.content,t.role)}break;case"AgentStartedSpeaking":this.isAgentSpeaking=!0,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!0});break;case"AgentAudioDone":this.agentAudioDoneReceived=!0;break;case"Error":console.error("[GlydeVoice] Agent error:",t),this.emit({type:"error",payload:t});break;case"FunctionCallRequest":this.handleFunctionCallRequest(t);break}}catch(t){console.error("[GlydeVoice] Failed to parse message:",t)}}async handleAudioData(e){const t=await e.arrayBuffer();this.handleAudioBuffer(t)}handleAudioBuffer(e){if(!this.playbackWorkletNode||!this.audioContext)return;this.audioContext.state==="suspended"&&this.audioContext.resume();const t=e.byteLength;if(t===0)return;const o=t-t%2;if(o===0)return;const i=o===t?e:e.slice(0,o),a=new Int16Array(i),n=new Float32Array(a.length);for(let r=0;r<a.length;r++)n[r]=a[r]/32768;const d=this.resample24kTo48k(n);!this.isAgentSpeaking&&!this.agentAudioDoneReceived&&(this.isAgentSpeaking=!0,this.emit({type:"agent_speaking",payload:!0}));const h=new Float32Array(d);this.playbackWorkletNode.port.postMessage({type:"audio",data:h},[h.buffer])}resample24kTo48k(e){const t=e.length*2,o=new Float32Array(t);for(let a=0;a<e.length-1;a++){const n=e[a],d=e[a+1];o[a*2]=n,o[a*2+1]=(n+d)/2}const i=e.length-1;return o[i*2]=e[i],o[i*2+1]=e[i],o}clearPlaybackBuffer(){this.playbackWorkletNode&&this.playbackWorkletNode.port.postMessage({type:"clear"})}async startMicrophone(){if(!this.audioContext)throw new Error("Audio context not initialized");try{this.mediaStream=await navigator.mediaDevices.getUserMedia({audio:{channelCount:1,sampleRate:this.inputSampleRate,echoCancellation:!0,noiseSuppression:!0}});const e=this.audioContext.createMediaStreamSource(this.mediaStream);this.captureWorkletNode=new AudioWorkletNode(this.audioContext,"audio-capture-processor"),this.captureWorkletNode.port.onmessage=t=>{!this.active||!this.ws||this.ws.readyState!==WebSocket.OPEN||this.isMuted||this.ws.send(t.data)},e.connect(this.captureWorkletNode),this.emit({type:"microphone_ready"})}catch(e){throw console.error("[GlydeVoice] Microphone error:",e),e}}async saveTranscript(e,t){if(!(!this.config.contextId||!e))try{await fetch(`${this.unityUrl}/api/unity/voice/transcript`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({context_id:this.config.contextId,content:e,role:t==="assistant"?"assistant":"user"})})}catch{}}setMuted(e){this.isMuted=e}getMuted(){return this.isMuted}isActive(){return this.active}getServerConfig(){return this.serverConfig}stop(){this.active=!1,this.cleanup()}cleanup(){this.captureWorkletNode&&(this.captureWorkletNode.disconnect(),this.captureWorkletNode.port.close(),this.captureWorkletNode=null),this.playbackWorkletNode&&(this.playbackWorkletNode.disconnect(),this.playbackWorkletNode.port.close(),this.playbackWorkletNode=null),this.mediaStream&&(this.mediaStream.getTracks().forEach(e=>e.stop()),this.mediaStream=null),this.audioContext&&(this.audioContext.close(),this.audioContext=null),this.ws&&(this.ws.readyState===WebSocket.OPEN&&this.ws.close(),this.ws=null)}emit(e){this.config.onEvent&&this.config.onEvent(e)}renderUI(){if(!this.config.container)return;const e=typeof this.config.container=="string"?document.querySelector(this.config.container):this.config.container;e&&(e.innerHTML=`
133
+ `;class f{config;unityUrl;active=!1;serverConfig=null;ws=null;audioContext=null;mediaStream=null;captureWorkletNode=null;playbackWorkletNode=null;isMuted=!1;outputSampleRate=24e3;inputSampleRate=48e3;isAgentSpeaking=!1;agentAudioDoneReceived=!1;sessionContext={};constructor(e){this.config=e,this.unityUrl=e.unityBaseUrl||"https://api.glydeunity.com",!e.publishableKey&&!e.apiKey&&!e.authToken&&console.warn("[GlydeVoice] No authentication method provided. One of publishableKey, apiKey, or authToken is required.")}getAuthHeaders(){const e={"Content-Type":"application/json"};return this.config.publishableKey&&(e["x-publishable-key"]=this.config.publishableKey),this.config.apiKey&&(e["x-api-key"]=this.config.apiKey),this.config.authToken&&(e.Authorization=`Bearer ${this.config.authToken}`),e}async fetchConfig(){const e=`${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`,t=this.config.contextId?`${e}/${this.config.contextId}`:e,s=await fetch(t,{method:"GET",headers:this.getAuthHeaders()});if(!s.ok){const o=await s.json();throw new Error(o.error?.message||o.message||"Failed to fetch voice config")}const{data:i}=await s.json();return i}async start(){if(!this.active){this.active=!0;try{this.config.systemPrompt||(this.serverConfig=await this.fetchConfig(),console.log("[GlydeVoice] Fetched config:",this.serverConfig));const e={context_id:this.config.contextId,domain:typeof window<"u"?window.location.hostname:"localhost"};this.config.systemPrompt&&(e.system_prompt=this.config.systemPrompt),this.config.deepgramConfig&&(e.deepgram_config=this.config.deepgramConfig);const t=await fetch(`${this.unityUrl}/api/unity/voice/auth`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify(e)});if(!t.ok){const a=await t.json();throw new Error(a.error?.message||a.message||"Failed to authenticate voice session")}const{data:s}=await t.json(),{token:i,agent_config:o,deepgram_config:n}=s;this.setSessionContext({clientUuid:o?.client_uuid,contextId:this.config.contextId,contextType:this.config.contextType,currentJobUuid:o?.job_uuid});const d=this.config.systemPrompt||o.instructions||this.serverConfig?.system_prompt||"You are a helpful AI assistant.";await this.initializeAudio();let u="wss://agent.deepgram.com/v1/agent/converse";const r=this.config.deepgramConfig||n||this.serverConfig?.deepgram_config;if(r?.tags&&r.tags.length>0){const a=new URLSearchParams;r.tags.forEach(h=>a.append("tag",h)),u+=`?${a.toString()}`}this.ws=new WebSocket(u,["bearer",i]),this.ws.onopen=()=>{const a=r||{think:{provider:{type:"open_ai",model:"gpt-4.1-nano"}},speak:{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}}},h={type:"Settings",audio:{input:{encoding:"linear16",sample_rate:this.inputSampleRate},output:{encoding:"linear16",sample_rate:this.outputSampleRate,container:"none"}},agent:{language:"en",speak:a.speak||{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:a.listen||{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}},think:{provider:a.think?.provider||{type:"open_ai",model:"gpt-4.1-nano"},...a.think?.functions&&{functions:a.think.functions}},greeting:"Hi! I'm excited you chose to speak with me. Are you ready to start?"}};a.tags&&a.tags.length>0&&(h.tags=a.tags),this.ws.send(JSON.stringify(h)),this.emit({type:"open",payload:{config:o,serverConfig:this.serverConfig}})};const g=d;this.ws.onmessage=a=>{if(typeof a.data=="string"){try{if(JSON.parse(a.data).type==="SettingsApplied"){const y={type:"UpdatePrompt",prompt:g};this.ws.send(JSON.stringify(y)),this.startMicrophone()}}catch{}this.handleTextMessage(a.data)}else a.data instanceof Blob?this.handleAudioData(a.data):a.data instanceof ArrayBuffer&&this.handleAudioBuffer(a.data)},this.ws.onerror=a=>{console.error("[GlydeVoice] WebSocket error:",a),this.emit({type:"error",payload:a})},this.ws.onclose=()=>{this.cleanup(),this.emit({type:"close"})},this.renderUI()}catch(e){throw console.error("[GlydeVoice] Error starting session:",e),this.active=!1,this.emit({type:"error",payload:e}),e}}}createWorkletBlobUrl(e){const t=new Blob([e],{type:"application/javascript"});return URL.createObjectURL(t)}async initializeAudio(){this.audioContext=new AudioContext({sampleRate:this.inputSampleRate});const e=this.createWorkletBlobUrl(l),t=this.createWorkletBlobUrl(p);try{await Promise.all([this.audioContext.audioWorklet.addModule(e),this.audioContext.audioWorklet.addModule(t)])}finally{URL.revokeObjectURL(e),URL.revokeObjectURL(t)}this.playbackWorkletNode=new AudioWorkletNode(this.audioContext,"audio-playback-processor"),this.playbackWorkletNode.connect(this.audioContext.destination),this.playbackWorkletNode.port.onmessage=s=>{const{type:i}=s.data;(i==="cleared"||i==="bufferEmpty")&&(this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!1}))}}handleTextMessage(e){try{const t=JSON.parse(e);switch(t.type){case"Welcome":this.emit({type:"ready"});break;case"SettingsApplied":break;case"UserStartedSpeaking":this.emit({type:"user_speaking",payload:!0}),this.clearPlaybackBuffer(),this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1;break;case"UserStoppedSpeaking":this.emit({type:"user_speaking",payload:!1});break;case"ConversationText":if(t.content&&t.content.trim()){const s=t.role==="assistant"?"agent":"user";this.config.onTranscript&&this.config.onTranscript(t.content,s),this.emit({type:"transcript",payload:{text:t.content,role:s}}),this.saveTranscript(t.content,t.role)}break;case"AgentStartedSpeaking":this.isAgentSpeaking=!0,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!0});break;case"AgentAudioDone":this.agentAudioDoneReceived=!0;break;case"Error":console.error("[GlydeVoice] Agent error:",t),this.emit({type:"error",payload:t});break;case"FunctionCallRequest":this.handleFunctionCallRequest(t);break}}catch(t){console.error("[GlydeVoice] Failed to parse message:",t)}}async handleAudioData(e){const t=await e.arrayBuffer();this.handleAudioBuffer(t)}handleAudioBuffer(e){if(!this.playbackWorkletNode||!this.audioContext)return;this.audioContext.state==="suspended"&&this.audioContext.resume();const t=e.byteLength;if(t===0)return;const s=t-t%2;if(s===0)return;const i=s===t?e:e.slice(0,s),o=new Int16Array(i),n=new Float32Array(o.length);for(let r=0;r<o.length;r++)n[r]=o[r]/32768;const d=this.resample24kTo48k(n);!this.isAgentSpeaking&&!this.agentAudioDoneReceived&&(this.isAgentSpeaking=!0,this.emit({type:"agent_speaking",payload:!0}));const u=new Float32Array(d);this.playbackWorkletNode.port.postMessage({type:"audio",data:u},[u.buffer])}resample24kTo48k(e){const t=e.length*2,s=new Float32Array(t);for(let o=0;o<e.length-1;o++){const n=e[o],d=e[o+1];s[o*2]=n,s[o*2+1]=(n+d)/2}const i=e.length-1;return s[i*2]=e[i],s[i*2+1]=e[i],s}clearPlaybackBuffer(){this.playbackWorkletNode&&this.playbackWorkletNode.port.postMessage({type:"clear"})}async startMicrophone(){if(!this.audioContext)throw new Error("Audio context not initialized");try{this.mediaStream=await navigator.mediaDevices.getUserMedia({audio:{channelCount:1,sampleRate:this.inputSampleRate,echoCancellation:!0,noiseSuppression:!0}});const e=this.audioContext.createMediaStreamSource(this.mediaStream);this.captureWorkletNode=new AudioWorkletNode(this.audioContext,"audio-capture-processor"),this.captureWorkletNode.port.onmessage=t=>{!this.active||!this.ws||this.ws.readyState!==WebSocket.OPEN||this.isMuted||this.ws.send(t.data)},e.connect(this.captureWorkletNode),this.emit({type:"microphone_ready"})}catch(e){throw console.error("[GlydeVoice] Microphone error:",e),e}}async saveTranscript(e,t){if(!(!this.config.contextId||!e))try{await fetch(`${this.unityUrl}/api/unity/voice/transcript`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({context_id:this.config.contextId,content:e,role:t==="assistant"?"assistant":"user"})})}catch{}}setMuted(e){this.isMuted=e}getMuted(){return this.isMuted}isActive(){return this.active}getServerConfig(){return this.serverConfig}stop(){this.active=!1,this.cleanup()}cleanup(){this.captureWorkletNode&&(this.captureWorkletNode.disconnect(),this.captureWorkletNode.port.close(),this.captureWorkletNode=null),this.playbackWorkletNode&&(this.playbackWorkletNode.disconnect(),this.playbackWorkletNode.port.close(),this.playbackWorkletNode=null),this.mediaStream&&(this.mediaStream.getTracks().forEach(e=>e.stop()),this.mediaStream=null),this.audioContext&&(this.audioContext.close(),this.audioContext=null),this.ws&&(this.ws.readyState===WebSocket.OPEN&&this.ws.close(),this.ws=null)}emit(e){this.config.onEvent&&this.config.onEvent(e)}renderUI(){if(!this.config.container)return;const e=typeof this.config.container=="string"?document.querySelector(this.config.container):this.config.container;e&&(e.innerHTML=`
134
134
  <div style="padding: 20px; border: 1px solid #ccc; border-radius: 8px; background: #fff;">
135
135
  <h3>Glyde Voice Agent</h3>
136
136
  <p>Status: Active</p>
137
137
  <p>Context: ${this.config.contextType}</p>
138
138
  <button onclick="this.closest('div').remove()">Close</button>
139
139
  </div>
140
- `)}async handleFunctionCallRequest(e){console.log("[GlydeVoice] Function call request:",e.function_name,e.input);let t;try{e.function_name==="end_conversation"?t=await this.handleEndConversation(e.input):t=await this.executeVoiceFunction(e.function_name,e.function_call_id,e.input)}catch(i){console.error("[GlydeVoice] Function call error:",i),t=JSON.stringify({error:"Function execution failed",details:i instanceof Error?i.message:String(i)})}const o={type:"FunctionCallResponse",function_call_id:e.function_call_id,output:t};this.ws&&this.ws.readyState===WebSocket.OPEN?(this.ws.send(JSON.stringify(o)),console.log("[GlydeVoice] Function response sent:",e.function_name)):console.error("[GlydeVoice] Cannot send function response - WebSocket not open")}async executeVoiceFunction(e,t,o){console.log("[GlydeVoice] Executing voice function via Unity API:",e);try{const i=await fetch(`${this.unityUrl}/api/unity/voice/function`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({function_name:e,function_call_id:t,input:o,context:{context_id:this.sessionContext.contextId,context_type:this.sessionContext.contextType,current_job_uuid:this.sessionContext.currentJobUuid}})});if(!i.ok){const n=await i.json().catch(()=>({}));throw new Error(n.error?.message||`Function call failed: ${i.status}`)}const a=await i.json();if(a.success&&a.data?.output)return typeof a.data.output=="string"?a.data.output:JSON.stringify(a.data.output);throw new Error("Invalid response from voice function endpoint")}catch(i){return console.error("[GlydeVoice] Voice function error:",i),JSON.stringify({success:!1,error:i instanceof Error?i.message:"Function execution failed",fallback_message:"I apologize, but I'm having trouble with that request right now. Is there something else I can help you with?"})}}async handleEndConversation(e){const t=e.item||"user request";return console.log(`[GlydeVoice] End conversation triggered by: ${t}`),setTimeout(()=>{this.stop()},2e3),JSON.stringify({success:!0,message:"Conversation ending. Say goodbye to the user.",trigger_phrase:t})}setSessionContext(e){this.sessionContext={...this.sessionContext,...e},console.log("[GlydeVoice] Session context updated:",{hasContextId:!!e.contextId,contextType:e.contextType,hasJobUuid:!!e.currentJobUuid})}}c.GlydeVoice=f,Object.defineProperty(c,Symbol.toStringTag,{value:"Module"})}));
140
+ `)}async handleFunctionCallRequest(e){for(const t of e.functions){console.log("[GlydeVoice] Function call request:",t.name,t.arguments);let s={};try{s=t.arguments?JSON.parse(t.arguments):{}}catch(n){console.warn("[GlydeVoice] Failed to parse function arguments:",n)}let i;try{t.name==="end_conversation"?i=await this.handleEndConversation(s):i=await this.executeVoiceFunction(t.name,t.id,s)}catch(n){console.error("[GlydeVoice] Function call error:",n),i=JSON.stringify({error:"Function execution failed",details:n instanceof Error?n.message:String(n)})}const o={type:"FunctionCallResponse",function_call_id:t.id,output:i};this.ws&&this.ws.readyState===WebSocket.OPEN?(this.ws.send(JSON.stringify(o)),console.log("[GlydeVoice] Function response sent:",t.name)):console.error("[GlydeVoice] Cannot send function response - WebSocket not open")}}async executeVoiceFunction(e,t,s){console.log("[GlydeVoice] Executing voice function via Unity API:",e);try{const i=await fetch(`${this.unityUrl}/api/unity/voice/function`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({function_name:e,function_call_id:t,input:s,context:{context_id:this.sessionContext.contextId,context_type:this.sessionContext.contextType,current_job_uuid:this.sessionContext.currentJobUuid}})});if(!i.ok){const n=await i.json().catch(()=>({}));throw new Error(n.error?.message||`Function call failed: ${i.status}`)}const o=await i.json();if(o.success&&o.data?.output)return typeof o.data.output=="string"?o.data.output:JSON.stringify(o.data.output);throw new Error("Invalid response from voice function endpoint")}catch(i){return console.error("[GlydeVoice] Voice function error:",i),JSON.stringify({success:!1,error:i instanceof Error?i.message:"Function execution failed",fallback_message:"I apologize, but I'm having trouble with that request right now. Is there something else I can help you with?"})}}async handleEndConversation(e){const t=e.item||"user request";return console.log(`[GlydeVoice] End conversation triggered by: ${t}`),setTimeout(()=>{this.stop()},2e3),JSON.stringify({success:!0,message:"Conversation ending. Say goodbye to the user.",trigger_phrase:t})}setSessionContext(e){this.sessionContext={...this.sessionContext,...e},console.log("[GlydeVoice] Session context updated:",{hasContextId:!!e.contextId,contextType:e.contextType,hasJobUuid:!!e.currentJobUuid})}}c.GlydeVoice=f,Object.defineProperty(c,Symbol.toStringTag,{value:"Module"})}));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@glydeunity/voice-sdk",
3
- "version": "1.3.3",
3
+ "version": "1.3.5",
4
4
  "description": "GLYDE Voice Agent SDK - AI-powered voice interactions for web applications",
5
5
  "type": "module",
6
6
  "main": "./dist/voice-sdk.umd.js",