@glydeunity/voice-sdk 1.2.1 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -174,15 +174,15 @@ class g {
174
174
  * @returns Voice configuration including system prompt, tools, and Deepgram settings
175
175
  */
176
176
  async fetchConfig() {
177
- const e = `${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`, t = this.config.contextId ? `${e}/${this.config.contextId}` : e, i = await fetch(t, {
177
+ const e = `${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`, t = this.config.contextId ? `${e}/${this.config.contextId}` : e, s = await fetch(t, {
178
178
  method: "GET",
179
179
  headers: this.getAuthHeaders()
180
180
  });
181
- if (!i.ok) {
182
- const a = await i.json();
181
+ if (!s.ok) {
182
+ const a = await s.json();
183
183
  throw new Error(a.error?.message || a.message || "Failed to fetch voice config");
184
184
  }
185
- const { data: o } = await i.json();
185
+ const { data: o } = await s.json();
186
186
  return o;
187
187
  }
188
188
  /**
@@ -204,17 +204,48 @@ class g {
204
204
  body: JSON.stringify(e)
205
205
  });
206
206
  if (!t.ok) {
207
- const s = await t.json();
208
- throw new Error(s.error?.message || s.message || "Failed to authenticate voice session");
207
+ const i = await t.json();
208
+ throw new Error(i.error?.message || i.message || "Failed to authenticate voice session");
209
209
  }
210
- const { data: i } = await t.json(), { token: o, agent_config: a, deepgram_config: r } = i, l = this.config.systemPrompt || a.instructions || this.serverConfig?.system_prompt || "You are a helpful AI assistant.";
210
+ const { data: s } = await t.json(), { token: o, agent_config: a, deepgram_config: r } = s, l = this.config.systemPrompt || a.instructions || this.serverConfig?.system_prompt || "You are a helpful AI assistant.";
211
211
  await this.initializeAudio();
212
212
  const c = "wss://agent.deepgram.com/v1/agent/converse";
213
213
  this.ws = new WebSocket(c, ["bearer", o]), this.ws.onopen = () => {
214
- const s = this.config.deepgramConfig || r || this.serverConfig?.deepgram_config || {
215
- think: { provider: { type: "open_ai", model: "gpt-4o-mini" } },
214
+ const i = this.config.deepgramConfig || r || this.serverConfig?.deepgram_config || {
215
+ think: {
216
+ provider: { type: "open_ai", model: "gpt-4.1-mini" },
217
+ functions: [
218
+ {
219
+ name: "end_conversation",
220
+ description: `You are an AI assistant that monitors conversations and ends them when specific stop phrases are detected.
221
+
222
+ Here is a list of phrases to listen for but not restricted to:
223
+ -stop
224
+ -shut up
225
+ -go away
226
+ -turn off
227
+ -stop listening
228
+
229
+ Before ending the conversation, always say a brief, polite goodbye such as "Goodbye!", "Take care!", or "Have a great day!".
230
+
231
+ When monitoring the conversation, pay close attention to any input that matches or closely resembles the phrases listed above. The matching should be case-insensitive and allow for minor variations or typos.
232
+
233
+ End the conversation immediately if:
234
+ 1. The user's input exactly matches any phrase in the list.
235
+ 2. The user's input is a close variation of any phrase in the list (e.g., "please shut up" instead of "shut up").
236
+ 3. The user's input clearly expresses a desire to end the conversation, even if it doesn't use the exact phrases listed.`,
237
+ parameters: {
238
+ type: "object",
239
+ properties: {
240
+ item: { type: "string", description: "The phrase or text that triggered the end of conversation" }
241
+ },
242
+ required: ["item"]
243
+ }
244
+ }
245
+ ]
246
+ },
216
247
  speak: { provider: { type: "deepgram", model: "aura-2-thalia-en" } },
217
- listen: { provider: { type: "deepgram", model: "nova-2", version: "latest" } }
248
+ listen: { provider: { type: "deepgram", version: "v2", model: "flux-general-en" } }
218
249
  }, h = {
219
250
  type: "Settings",
220
251
  audio: {
@@ -230,22 +261,38 @@ class g {
230
261
  },
231
262
  agent: {
232
263
  language: "en",
233
- speak: s.speak || {
264
+ speak: i.speak || {
234
265
  provider: { type: "deepgram", model: "aura-2-thalia-en" }
235
266
  },
236
- listen: s.listen || {
267
+ listen: i.listen || {
237
268
  provider: { type: "deepgram", version: "v2", model: "flux-general-en" }
238
269
  },
239
270
  think: {
240
- provider: s.think?.provider || { type: "open_ai", model: "gpt-4o-mini" },
241
- functions: s.think?.functions || [
271
+ provider: i.think?.provider || { type: "open_ai", model: "gpt-4.1-mini" },
272
+ functions: i.think?.functions || [
242
273
  {
243
274
  name: "end_conversation",
244
- description: "End the conversation when stop phrases are detected.",
275
+ description: `You are an AI assistant that monitors conversations and ends them when specific stop phrases are detected.
276
+
277
+ Here is a list of phrases to listen for but not restricted to:
278
+ -stop
279
+ -shut up
280
+ -go away
281
+ -turn off
282
+ -stop listening
283
+
284
+ Before ending the conversation, always say a brief, polite goodbye such as "Goodbye!", "Take care!", or "Have a great day!".
285
+
286
+ When monitoring the conversation, pay close attention to any input that matches or closely resembles the phrases listed above. The matching should be case-insensitive and allow for minor variations or typos.
287
+
288
+ End the conversation immediately if:
289
+ 1. The user's input exactly matches any phrase in the list.
290
+ 2. The user's input is a close variation of any phrase in the list (e.g., "please shut up" instead of "shut up").
291
+ 3. The user's input clearly expresses a desire to end the conversation, even if it doesn't use the exact phrases listed.`,
245
292
  parameters: {
246
293
  type: "object",
247
294
  properties: {
248
- item: { type: "string", description: "The phrase that triggered end of conversation" }
295
+ item: { type: "string", description: "The phrase or text that triggered the end of conversation" }
249
296
  },
250
297
  required: ["item"]
251
298
  }
@@ -258,10 +305,10 @@ class g {
258
305
  this.ws.send(JSON.stringify(h)), this.emit({ type: "open", payload: { config: a, serverConfig: this.serverConfig } });
259
306
  };
260
307
  const n = l;
261
- this.ws.onmessage = (s) => {
262
- if (typeof s.data == "string") {
308
+ this.ws.onmessage = (i) => {
309
+ if (typeof i.data == "string") {
263
310
  try {
264
- if (JSON.parse(s.data).type === "SettingsApplied") {
311
+ if (JSON.parse(i.data).type === "SettingsApplied") {
265
312
  const p = {
266
313
  type: "UpdatePrompt",
267
314
  prompt: n
@@ -270,10 +317,10 @@ class g {
270
317
  }
271
318
  } catch {
272
319
  }
273
- this.handleTextMessage(s.data);
274
- } else s.data instanceof Blob ? this.handleAudioData(s.data) : s.data instanceof ArrayBuffer && this.handleAudioBuffer(s.data);
275
- }, this.ws.onerror = (s) => {
276
- console.error("[GlydeVoice] WebSocket error:", s), this.emit({ type: "error", payload: s });
320
+ this.handleTextMessage(i.data);
321
+ } else i.data instanceof Blob ? this.handleAudioData(i.data) : i.data instanceof ArrayBuffer && this.handleAudioBuffer(i.data);
322
+ }, this.ws.onerror = (i) => {
323
+ console.error("[GlydeVoice] WebSocket error:", i), this.emit({ type: "error", payload: i });
277
324
  }, this.ws.onclose = () => {
278
325
  this.cleanup(), this.emit({ type: "close" });
279
326
  }, this.renderUI();
@@ -307,8 +354,8 @@ class g {
307
354
  } finally {
308
355
  URL.revokeObjectURL(e), URL.revokeObjectURL(t);
309
356
  }
310
- this.playbackWorkletNode = new AudioWorkletNode(this.audioContext, "audio-playback-processor"), this.playbackWorkletNode.connect(this.audioContext.destination), this.playbackWorkletNode.port.onmessage = (i) => {
311
- const { type: o } = i.data;
357
+ this.playbackWorkletNode = new AudioWorkletNode(this.audioContext, "audio-playback-processor"), this.playbackWorkletNode.connect(this.audioContext.destination), this.playbackWorkletNode.port.onmessage = (s) => {
358
+ const { type: o } = s.data;
312
359
  (o === "cleared" || o === "bufferEmpty") && (this.isAgentSpeaking = !1, this.agentAudioDoneReceived = !1, this.emit({ type: "agent_speaking", payload: !1 }));
313
360
  };
314
361
  }
@@ -332,8 +379,8 @@ class g {
332
379
  break;
333
380
  case "ConversationText":
334
381
  if (t.content && t.content.trim()) {
335
- const i = t.role === "assistant" ? "agent" : "user";
336
- this.config.onTranscript && this.config.onTranscript(t.content, i), this.emit({ type: "transcript", payload: { text: t.content, role: i } }), this.saveTranscript(t.content, t.role);
382
+ const s = t.role === "assistant" ? "agent" : "user";
383
+ this.config.onTranscript && this.config.onTranscript(t.content, s), this.emit({ type: "transcript", payload: { text: t.content, role: s } }), this.saveTranscript(t.content, t.role);
337
384
  }
338
385
  break;
339
386
  case "AgentStartedSpeaking":
@@ -366,9 +413,9 @@ class g {
366
413
  this.audioContext.state === "suspended" && this.audioContext.resume();
367
414
  const t = e.byteLength;
368
415
  if (t === 0) return;
369
- const i = t - t % 2;
370
- if (i === 0) return;
371
- const o = i === t ? e : e.slice(0, i), a = new Int16Array(o), r = new Float32Array(a.length);
416
+ const s = t - t % 2;
417
+ if (s === 0) return;
418
+ const o = s === t ? e : e.slice(0, s), a = new Int16Array(o), r = new Float32Array(a.length);
372
419
  for (let n = 0; n < a.length; n++)
373
420
  r[n] = a[n] / 32768;
374
421
  const l = this.resample24kTo48k(r);
@@ -383,13 +430,13 @@ class g {
383
430
  * Resample audio from 24kHz to 48kHz using linear interpolation
384
431
  */
385
432
  resample24kTo48k(e) {
386
- const t = e.length * 2, i = new Float32Array(t);
433
+ const t = e.length * 2, s = new Float32Array(t);
387
434
  for (let a = 0; a < e.length - 1; a++) {
388
435
  const r = e[a], l = e[a + 1];
389
- i[a * 2] = r, i[a * 2 + 1] = (r + l) / 2;
436
+ s[a * 2] = r, s[a * 2 + 1] = (r + l) / 2;
390
437
  }
391
438
  const o = e.length - 1;
392
- return i[o * 2] = e[o], i[o * 2 + 1] = e[o], i;
439
+ return s[o * 2] = e[o], s[o * 2 + 1] = e[o], s;
393
440
  }
394
441
  /**
395
442
  * Clear the playback buffer (for interruption handling)
@@ -130,7 +130,39 @@ class AudioPlaybackProcessor extends AudioWorkletProcessor {
130
130
  }
131
131
 
132
132
  registerProcessor('audio-playback-processor', AudioPlaybackProcessor);
133
- `;class f{config;unityUrl;active=!1;serverConfig=null;ws=null;audioContext=null;mediaStream=null;captureWorkletNode=null;playbackWorkletNode=null;isMuted=!1;outputSampleRate=24e3;inputSampleRate=48e3;isAgentSpeaking=!1;agentAudioDoneReceived=!1;constructor(e){this.config=e,this.unityUrl=e.unityBaseUrl||"https://api.glydeunity.com",!e.publishableKey&&!e.apiKey&&!e.authToken&&console.warn("[GlydeVoice] No authentication method provided. One of publishableKey, apiKey, or authToken is required.")}getAuthHeaders(){const e={"Content-Type":"application/json"};return this.config.publishableKey&&(e["x-publishable-key"]=this.config.publishableKey),this.config.apiKey&&(e["x-api-key"]=this.config.apiKey),this.config.authToken&&(e.Authorization=`Bearer ${this.config.authToken}`),e}async fetchConfig(){const e=`${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`,t=this.config.contextId?`${e}/${this.config.contextId}`:e,i=await fetch(t,{method:"GET",headers:this.getAuthHeaders()});if(!i.ok){const o=await i.json();throw new Error(o.error?.message||o.message||"Failed to fetch voice config")}const{data:a}=await i.json();return a}async start(){if(!this.active){this.active=!0;try{this.config.systemPrompt||(this.serverConfig=await this.fetchConfig(),console.log("[GlydeVoice] Fetched config:",this.serverConfig));const e={context_id:this.config.contextId,domain:typeof window<"u"?window.location.hostname:"localhost"};this.config.systemPrompt&&(e.system_prompt=this.config.systemPrompt),this.config.deepgramConfig&&(e.deepgram_config=this.config.deepgramConfig);const t=await fetch(`${this.unityUrl}/api/unity/voice/auth`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify(e)});if(!t.ok){const s=await t.json();throw new Error(s.error?.message||s.message||"Failed to authenticate voice session")}const{data:i}=await t.json(),{token:a,agent_config:o,deepgram_config:r}=i,h=this.config.systemPrompt||o.instructions||this.serverConfig?.system_prompt||"You are a helpful AI assistant.";await this.initializeAudio();const d="wss://agent.deepgram.com/v1/agent/converse";this.ws=new WebSocket(d,["bearer",a]),this.ws.onopen=()=>{const s=this.config.deepgramConfig||r||this.serverConfig?.deepgram_config||{think:{provider:{type:"open_ai",model:"gpt-4o-mini"}},speak:{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:{provider:{type:"deepgram",model:"nova-2",version:"latest"}}},p={type:"Settings",audio:{input:{encoding:"linear16",sample_rate:this.inputSampleRate},output:{encoding:"linear16",sample_rate:this.outputSampleRate,container:"none"}},agent:{language:"en",speak:s.speak||{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:s.listen||{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}},think:{provider:s.think?.provider||{type:"open_ai",model:"gpt-4o-mini"},functions:s.think?.functions||[{name:"end_conversation",description:"End the conversation when stop phrases are detected.",parameters:{type:"object",properties:{item:{type:"string",description:"The phrase that triggered end of conversation"}},required:["item"]}}]},greeting:"Hi! I'm ready to speak with you. How can I help you today?"}};this.ws.send(JSON.stringify(p)),this.emit({type:"open",payload:{config:o,serverConfig:this.serverConfig}})};const l=h;this.ws.onmessage=s=>{if(typeof s.data=="string"){try{if(JSON.parse(s.data).type==="SettingsApplied"){const g={type:"UpdatePrompt",prompt:l};this.ws.send(JSON.stringify(g)),this.startMicrophone()}}catch{}this.handleTextMessage(s.data)}else s.data instanceof Blob?this.handleAudioData(s.data):s.data instanceof ArrayBuffer&&this.handleAudioBuffer(s.data)},this.ws.onerror=s=>{console.error("[GlydeVoice] WebSocket error:",s),this.emit({type:"error",payload:s})},this.ws.onclose=()=>{this.cleanup(),this.emit({type:"close"})},this.renderUI()}catch(e){throw console.error("[GlydeVoice] Error starting session:",e),this.active=!1,this.emit({type:"error",payload:e}),e}}}createWorkletBlobUrl(e){const t=new Blob([e],{type:"application/javascript"});return URL.createObjectURL(t)}async initializeAudio(){this.audioContext=new AudioContext({sampleRate:this.inputSampleRate});const e=this.createWorkletBlobUrl(c),t=this.createWorkletBlobUrl(u);try{await Promise.all([this.audioContext.audioWorklet.addModule(e),this.audioContext.audioWorklet.addModule(t)])}finally{URL.revokeObjectURL(e),URL.revokeObjectURL(t)}this.playbackWorkletNode=new AudioWorkletNode(this.audioContext,"audio-playback-processor"),this.playbackWorkletNode.connect(this.audioContext.destination),this.playbackWorkletNode.port.onmessage=i=>{const{type:a}=i.data;(a==="cleared"||a==="bufferEmpty")&&(this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!1}))}}handleTextMessage(e){try{const t=JSON.parse(e);switch(t.type){case"Welcome":this.emit({type:"ready"});break;case"SettingsApplied":break;case"UserStartedSpeaking":this.emit({type:"user_speaking",payload:!0}),this.clearPlaybackBuffer(),this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1;break;case"UserStoppedSpeaking":this.emit({type:"user_speaking",payload:!1});break;case"ConversationText":if(t.content&&t.content.trim()){const i=t.role==="assistant"?"agent":"user";this.config.onTranscript&&this.config.onTranscript(t.content,i),this.emit({type:"transcript",payload:{text:t.content,role:i}}),this.saveTranscript(t.content,t.role)}break;case"AgentStartedSpeaking":this.isAgentSpeaking=!0,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!0});break;case"AgentAudioDone":this.agentAudioDoneReceived=!0;break;case"Error":console.error("[GlydeVoice] Agent error:",t),this.emit({type:"error",payload:t});break}}catch(t){console.error("[GlydeVoice] Failed to parse message:",t)}}async handleAudioData(e){const t=await e.arrayBuffer();this.handleAudioBuffer(t)}handleAudioBuffer(e){if(!this.playbackWorkletNode||!this.audioContext)return;this.audioContext.state==="suspended"&&this.audioContext.resume();const t=e.byteLength;if(t===0)return;const i=t-t%2;if(i===0)return;const a=i===t?e:e.slice(0,i),o=new Int16Array(a),r=new Float32Array(o.length);for(let l=0;l<o.length;l++)r[l]=o[l]/32768;const h=this.resample24kTo48k(r);!this.isAgentSpeaking&&!this.agentAudioDoneReceived&&(this.isAgentSpeaking=!0,this.emit({type:"agent_speaking",payload:!0}));const d=new Float32Array(h);this.playbackWorkletNode.port.postMessage({type:"audio",data:d},[d.buffer])}resample24kTo48k(e){const t=e.length*2,i=new Float32Array(t);for(let o=0;o<e.length-1;o++){const r=e[o],h=e[o+1];i[o*2]=r,i[o*2+1]=(r+h)/2}const a=e.length-1;return i[a*2]=e[a],i[a*2+1]=e[a],i}clearPlaybackBuffer(){this.playbackWorkletNode&&this.playbackWorkletNode.port.postMessage({type:"clear"})}async startMicrophone(){if(!this.audioContext)throw new Error("Audio context not initialized");try{this.mediaStream=await navigator.mediaDevices.getUserMedia({audio:{channelCount:1,sampleRate:this.inputSampleRate,echoCancellation:!0,noiseSuppression:!0}});const e=this.audioContext.createMediaStreamSource(this.mediaStream);this.captureWorkletNode=new AudioWorkletNode(this.audioContext,"audio-capture-processor"),this.captureWorkletNode.port.onmessage=t=>{!this.active||!this.ws||this.ws.readyState!==WebSocket.OPEN||this.isMuted||this.ws.send(t.data)},e.connect(this.captureWorkletNode),this.emit({type:"microphone_ready"})}catch(e){throw console.error("[GlydeVoice] Microphone error:",e),e}}async saveTranscript(e,t){if(!(!this.config.contextId||!e))try{await fetch(`${this.unityUrl}/api/unity/voice/transcript`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({context_id:this.config.contextId,content:e,role:t==="assistant"?"assistant":"user"})})}catch{}}setMuted(e){this.isMuted=e}getMuted(){return this.isMuted}isActive(){return this.active}getServerConfig(){return this.serverConfig}stop(){this.active=!1,this.cleanup()}cleanup(){this.captureWorkletNode&&(this.captureWorkletNode.disconnect(),this.captureWorkletNode.port.close(),this.captureWorkletNode=null),this.playbackWorkletNode&&(this.playbackWorkletNode.disconnect(),this.playbackWorkletNode.port.close(),this.playbackWorkletNode=null),this.mediaStream&&(this.mediaStream.getTracks().forEach(e=>e.stop()),this.mediaStream=null),this.audioContext&&(this.audioContext.close(),this.audioContext=null),this.ws&&(this.ws.readyState===WebSocket.OPEN&&this.ws.close(),this.ws=null)}emit(e){this.config.onEvent&&this.config.onEvent(e)}renderUI(){if(!this.config.container)return;const e=typeof this.config.container=="string"?document.querySelector(this.config.container):this.config.container;e&&(e.innerHTML=`
133
+ `;class f{config;unityUrl;active=!1;serverConfig=null;ws=null;audioContext=null;mediaStream=null;captureWorkletNode=null;playbackWorkletNode=null;isMuted=!1;outputSampleRate=24e3;inputSampleRate=48e3;isAgentSpeaking=!1;agentAudioDoneReceived=!1;constructor(e){this.config=e,this.unityUrl=e.unityBaseUrl||"https://api.glydeunity.com",!e.publishableKey&&!e.apiKey&&!e.authToken&&console.warn("[GlydeVoice] No authentication method provided. One of publishableKey, apiKey, or authToken is required.")}getAuthHeaders(){const e={"Content-Type":"application/json"};return this.config.publishableKey&&(e["x-publishable-key"]=this.config.publishableKey),this.config.apiKey&&(e["x-api-key"]=this.config.apiKey),this.config.authToken&&(e.Authorization=`Bearer ${this.config.authToken}`),e}async fetchConfig(){const e=`${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`,t=this.config.contextId?`${e}/${this.config.contextId}`:e,i=await fetch(t,{method:"GET",headers:this.getAuthHeaders()});if(!i.ok){const o=await i.json();throw new Error(o.error?.message||o.message||"Failed to fetch voice config")}const{data:a}=await i.json();return a}async start(){if(!this.active){this.active=!0;try{this.config.systemPrompt||(this.serverConfig=await this.fetchConfig(),console.log("[GlydeVoice] Fetched config:",this.serverConfig));const e={context_id:this.config.contextId,domain:typeof window<"u"?window.location.hostname:"localhost"};this.config.systemPrompt&&(e.system_prompt=this.config.systemPrompt),this.config.deepgramConfig&&(e.deepgram_config=this.config.deepgramConfig);const t=await fetch(`${this.unityUrl}/api/unity/voice/auth`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify(e)});if(!t.ok){const s=await t.json();throw new Error(s.error?.message||s.message||"Failed to authenticate voice session")}const{data:i}=await t.json(),{token:a,agent_config:o,deepgram_config:r}=i,h=this.config.systemPrompt||o.instructions||this.serverConfig?.system_prompt||"You are a helpful AI assistant.";await this.initializeAudio();const p="wss://agent.deepgram.com/v1/agent/converse";this.ws=new WebSocket(p,["bearer",a]),this.ws.onopen=()=>{const s=this.config.deepgramConfig||r||this.serverConfig?.deepgram_config||{think:{provider:{type:"open_ai",model:"gpt-4.1-mini"},functions:[{name:"end_conversation",description:`You are an AI assistant that monitors conversations and ends them when specific stop phrases are detected.
134
+
135
+ Here is a list of phrases to listen for but not restricted to:
136
+ -stop
137
+ -shut up
138
+ -go away
139
+ -turn off
140
+ -stop listening
141
+
142
+ Before ending the conversation, always say a brief, polite goodbye such as "Goodbye!", "Take care!", or "Have a great day!".
143
+
144
+ When monitoring the conversation, pay close attention to any input that matches or closely resembles the phrases listed above. The matching should be case-insensitive and allow for minor variations or typos.
145
+
146
+ End the conversation immediately if:
147
+ 1. The user's input exactly matches any phrase in the list.
148
+ 2. The user's input is a close variation of any phrase in the list (e.g., "please shut up" instead of "shut up").
149
+ 3. The user's input clearly expresses a desire to end the conversation, even if it doesn't use the exact phrases listed.`,parameters:{type:"object",properties:{item:{type:"string",description:"The phrase or text that triggered the end of conversation"}},required:["item"]}}]},speak:{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}}},d={type:"Settings",audio:{input:{encoding:"linear16",sample_rate:this.inputSampleRate},output:{encoding:"linear16",sample_rate:this.outputSampleRate,container:"none"}},agent:{language:"en",speak:s.speak||{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:s.listen||{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}},think:{provider:s.think?.provider||{type:"open_ai",model:"gpt-4.1-mini"},functions:s.think?.functions||[{name:"end_conversation",description:`You are an AI assistant that monitors conversations and ends them when specific stop phrases are detected.
150
+
151
+ Here is a list of phrases to listen for but not restricted to:
152
+ -stop
153
+ -shut up
154
+ -go away
155
+ -turn off
156
+ -stop listening
157
+
158
+ Before ending the conversation, always say a brief, polite goodbye such as "Goodbye!", "Take care!", or "Have a great day!".
159
+
160
+ When monitoring the conversation, pay close attention to any input that matches or closely resembles the phrases listed above. The matching should be case-insensitive and allow for minor variations or typos.
161
+
162
+ End the conversation immediately if:
163
+ 1. The user's input exactly matches any phrase in the list.
164
+ 2. The user's input is a close variation of any phrase in the list (e.g., "please shut up" instead of "shut up").
165
+ 3. The user's input clearly expresses a desire to end the conversation, even if it doesn't use the exact phrases listed.`,parameters:{type:"object",properties:{item:{type:"string",description:"The phrase or text that triggered the end of conversation"}},required:["item"]}}]},greeting:"Hi! I'm ready to speak with you. How can I help you today?"}};this.ws.send(JSON.stringify(d)),this.emit({type:"open",payload:{config:o,serverConfig:this.serverConfig}})};const l=h;this.ws.onmessage=s=>{if(typeof s.data=="string"){try{if(JSON.parse(s.data).type==="SettingsApplied"){const g={type:"UpdatePrompt",prompt:l};this.ws.send(JSON.stringify(g)),this.startMicrophone()}}catch{}this.handleTextMessage(s.data)}else s.data instanceof Blob?this.handleAudioData(s.data):s.data instanceof ArrayBuffer&&this.handleAudioBuffer(s.data)},this.ws.onerror=s=>{console.error("[GlydeVoice] WebSocket error:",s),this.emit({type:"error",payload:s})},this.ws.onclose=()=>{this.cleanup(),this.emit({type:"close"})},this.renderUI()}catch(e){throw console.error("[GlydeVoice] Error starting session:",e),this.active=!1,this.emit({type:"error",payload:e}),e}}}createWorkletBlobUrl(e){const t=new Blob([e],{type:"application/javascript"});return URL.createObjectURL(t)}async initializeAudio(){this.audioContext=new AudioContext({sampleRate:this.inputSampleRate});const e=this.createWorkletBlobUrl(c),t=this.createWorkletBlobUrl(u);try{await Promise.all([this.audioContext.audioWorklet.addModule(e),this.audioContext.audioWorklet.addModule(t)])}finally{URL.revokeObjectURL(e),URL.revokeObjectURL(t)}this.playbackWorkletNode=new AudioWorkletNode(this.audioContext,"audio-playback-processor"),this.playbackWorkletNode.connect(this.audioContext.destination),this.playbackWorkletNode.port.onmessage=i=>{const{type:a}=i.data;(a==="cleared"||a==="bufferEmpty")&&(this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!1}))}}handleTextMessage(e){try{const t=JSON.parse(e);switch(t.type){case"Welcome":this.emit({type:"ready"});break;case"SettingsApplied":break;case"UserStartedSpeaking":this.emit({type:"user_speaking",payload:!0}),this.clearPlaybackBuffer(),this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1;break;case"UserStoppedSpeaking":this.emit({type:"user_speaking",payload:!1});break;case"ConversationText":if(t.content&&t.content.trim()){const i=t.role==="assistant"?"agent":"user";this.config.onTranscript&&this.config.onTranscript(t.content,i),this.emit({type:"transcript",payload:{text:t.content,role:i}}),this.saveTranscript(t.content,t.role)}break;case"AgentStartedSpeaking":this.isAgentSpeaking=!0,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!0});break;case"AgentAudioDone":this.agentAudioDoneReceived=!0;break;case"Error":console.error("[GlydeVoice] Agent error:",t),this.emit({type:"error",payload:t});break}}catch(t){console.error("[GlydeVoice] Failed to parse message:",t)}}async handleAudioData(e){const t=await e.arrayBuffer();this.handleAudioBuffer(t)}handleAudioBuffer(e){if(!this.playbackWorkletNode||!this.audioContext)return;this.audioContext.state==="suspended"&&this.audioContext.resume();const t=e.byteLength;if(t===0)return;const i=t-t%2;if(i===0)return;const a=i===t?e:e.slice(0,i),o=new Int16Array(a),r=new Float32Array(o.length);for(let l=0;l<o.length;l++)r[l]=o[l]/32768;const h=this.resample24kTo48k(r);!this.isAgentSpeaking&&!this.agentAudioDoneReceived&&(this.isAgentSpeaking=!0,this.emit({type:"agent_speaking",payload:!0}));const p=new Float32Array(h);this.playbackWorkletNode.port.postMessage({type:"audio",data:p},[p.buffer])}resample24kTo48k(e){const t=e.length*2,i=new Float32Array(t);for(let o=0;o<e.length-1;o++){const r=e[o],h=e[o+1];i[o*2]=r,i[o*2+1]=(r+h)/2}const a=e.length-1;return i[a*2]=e[a],i[a*2+1]=e[a],i}clearPlaybackBuffer(){this.playbackWorkletNode&&this.playbackWorkletNode.port.postMessage({type:"clear"})}async startMicrophone(){if(!this.audioContext)throw new Error("Audio context not initialized");try{this.mediaStream=await navigator.mediaDevices.getUserMedia({audio:{channelCount:1,sampleRate:this.inputSampleRate,echoCancellation:!0,noiseSuppression:!0}});const e=this.audioContext.createMediaStreamSource(this.mediaStream);this.captureWorkletNode=new AudioWorkletNode(this.audioContext,"audio-capture-processor"),this.captureWorkletNode.port.onmessage=t=>{!this.active||!this.ws||this.ws.readyState!==WebSocket.OPEN||this.isMuted||this.ws.send(t.data)},e.connect(this.captureWorkletNode),this.emit({type:"microphone_ready"})}catch(e){throw console.error("[GlydeVoice] Microphone error:",e),e}}async saveTranscript(e,t){if(!(!this.config.contextId||!e))try{await fetch(`${this.unityUrl}/api/unity/voice/transcript`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({context_id:this.config.contextId,content:e,role:t==="assistant"?"assistant":"user"})})}catch{}}setMuted(e){this.isMuted=e}getMuted(){return this.isMuted}isActive(){return this.active}getServerConfig(){return this.serverConfig}stop(){this.active=!1,this.cleanup()}cleanup(){this.captureWorkletNode&&(this.captureWorkletNode.disconnect(),this.captureWorkletNode.port.close(),this.captureWorkletNode=null),this.playbackWorkletNode&&(this.playbackWorkletNode.disconnect(),this.playbackWorkletNode.port.close(),this.playbackWorkletNode=null),this.mediaStream&&(this.mediaStream.getTracks().forEach(e=>e.stop()),this.mediaStream=null),this.audioContext&&(this.audioContext.close(),this.audioContext=null),this.ws&&(this.ws.readyState===WebSocket.OPEN&&this.ws.close(),this.ws=null)}emit(e){this.config.onEvent&&this.config.onEvent(e)}renderUI(){if(!this.config.container)return;const e=typeof this.config.container=="string"?document.querySelector(this.config.container):this.config.container;e&&(e.innerHTML=`
134
166
  <div style="padding: 20px; border: 1px solid #ccc; border-radius: 8px; background: #fff;">
135
167
  <h3>Glyde Voice Agent</h3>
136
168
  <p>Status: Active</p>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@glydeunity/voice-sdk",
3
- "version": "1.2.1",
3
+ "version": "1.2.2",
4
4
  "description": "GLYDE Voice Agent SDK - AI-powered voice interactions for web applications",
5
5
  "type": "module",
6
6
  "main": "./dist/voice-sdk.umd.js",