@glydeunity/voice-sdk 1.2.0 → 1.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- const h = `
1
+ const d = `
2
2
  class AudioCaptureProcessor extends AudioWorkletProcessor {
3
3
  constructor() {
4
4
  super();
@@ -33,7 +33,7 @@ class AudioCaptureProcessor extends AudioWorkletProcessor {
33
33
  }
34
34
 
35
35
  registerProcessor('audio-capture-processor', AudioCaptureProcessor);
36
- `, p = `
36
+ `, u = `
37
37
  class AudioPlaybackProcessor extends AudioWorkletProcessor {
38
38
  constructor() {
39
39
  super();
@@ -131,7 +131,7 @@ class AudioPlaybackProcessor extends AudioWorkletProcessor {
131
131
 
132
132
  registerProcessor('audio-playback-processor', AudioPlaybackProcessor);
133
133
  `;
134
- class u {
134
+ class g {
135
135
  config;
136
136
  unityUrl;
137
137
  active = !1;
@@ -193,27 +193,60 @@ class u {
193
193
  this.active = !0;
194
194
  try {
195
195
  this.config.systemPrompt || (this.serverConfig = await this.fetchConfig(), console.log("[GlydeVoice] Fetched config:", this.serverConfig));
196
- const e = await fetch(`${this.unityUrl}/api/unity/voice/auth`, {
196
+ const e = {
197
+ context_id: this.config.contextId,
198
+ domain: typeof window < "u" ? window.location.hostname : "localhost"
199
+ };
200
+ this.config.systemPrompt && (e.system_prompt = this.config.systemPrompt), this.config.deepgramConfig && (e.deepgram_config = this.config.deepgramConfig);
201
+ const t = await fetch(`${this.unityUrl}/api/unity/voice/auth`, {
197
202
  method: "POST",
198
203
  headers: this.getAuthHeaders(),
199
- body: JSON.stringify({
200
- context_id: this.config.contextId,
201
- domain: typeof window < "u" ? window.location.hostname : "localhost"
202
- })
204
+ body: JSON.stringify(e)
203
205
  });
204
- if (!e.ok) {
205
- const i = await e.json();
206
+ if (!t.ok) {
207
+ const i = await t.json();
206
208
  throw new Error(i.error?.message || i.message || "Failed to authenticate voice session");
207
209
  }
208
- const { data: t } = await e.json(), { token: s, agent_config: o } = t, a = this.config.systemPrompt || this.serverConfig?.system_prompt || o.instructions || "You are a helpful AI assistant.";
210
+ const { data: s } = await t.json(), { token: o, agent_config: a, deepgram_config: r } = s, l = this.config.systemPrompt || a.instructions || this.serverConfig?.system_prompt || "You are a helpful AI assistant.";
209
211
  await this.initializeAudio();
210
- const r = "wss://agent.deepgram.com/v1/agent/converse";
211
- this.ws = new WebSocket(r, ["bearer", s]), this.ws.onopen = () => {
212
- const i = this.config.deepgramConfig || this.serverConfig?.deepgram_config || {
213
- think: { provider: { type: "open_ai", model: "gpt-4o-mini" } },
212
+ const c = "wss://agent.deepgram.com/v1/agent/converse";
213
+ this.ws = new WebSocket(c, ["bearer", o]), this.ws.onopen = () => {
214
+ const i = this.config.deepgramConfig || r || this.serverConfig?.deepgram_config || {
215
+ think: {
216
+ provider: { type: "open_ai", model: "gpt-4.1-mini" },
217
+ functions: [
218
+ {
219
+ name: "end_conversation",
220
+ description: `You are an AI assistant that monitors conversations and ends them when specific stop phrases are detected.
221
+
222
+ Here is a list of phrases to listen for but not restricted to:
223
+ -stop
224
+ -shut up
225
+ -go away
226
+ -turn off
227
+ -stop listening
228
+
229
+ Before ending the conversation, always say a brief, polite goodbye such as "Goodbye!", "Take care!", or "Have a great day!".
230
+
231
+ When monitoring the conversation, pay close attention to any input that matches or closely resembles the phrases listed above. The matching should be case-insensitive and allow for minor variations or typos.
232
+
233
+ End the conversation immediately if:
234
+ 1. The user's input exactly matches any phrase in the list.
235
+ 2. The user's input is a close variation of any phrase in the list (e.g., "please shut up" instead of "shut up").
236
+ 3. The user's input clearly expresses a desire to end the conversation, even if it doesn't use the exact phrases listed.`,
237
+ parameters: {
238
+ type: "object",
239
+ properties: {
240
+ item: { type: "string", description: "The phrase or text that triggered the end of conversation" }
241
+ },
242
+ required: ["item"]
243
+ }
244
+ }
245
+ ]
246
+ },
214
247
  speak: { provider: { type: "deepgram", model: "aura-2-thalia-en" } },
215
- listen: { provider: { type: "deepgram", model: "nova-2", version: "latest" } }
216
- }, n = {
248
+ listen: { provider: { type: "deepgram", version: "v2", model: "flux-general-en" } }
249
+ }, h = {
217
250
  type: "Settings",
218
251
  audio: {
219
252
  input: {
@@ -235,15 +268,31 @@ class u {
235
268
  provider: { type: "deepgram", version: "v2", model: "flux-general-en" }
236
269
  },
237
270
  think: {
238
- provider: i.think?.provider || { type: "open_ai", model: "gpt-4o-mini" },
271
+ provider: i.think?.provider || { type: "open_ai", model: "gpt-4.1-mini" },
239
272
  functions: i.think?.functions || [
240
273
  {
241
274
  name: "end_conversation",
242
- description: "End the conversation when stop phrases are detected.",
275
+ description: `You are an AI assistant that monitors conversations and ends them when specific stop phrases are detected.
276
+
277
+ Here is a list of phrases to listen for but not restricted to:
278
+ -stop
279
+ -shut up
280
+ -go away
281
+ -turn off
282
+ -stop listening
283
+
284
+ Before ending the conversation, always say a brief, polite goodbye such as "Goodbye!", "Take care!", or "Have a great day!".
285
+
286
+ When monitoring the conversation, pay close attention to any input that matches or closely resembles the phrases listed above. The matching should be case-insensitive and allow for minor variations or typos.
287
+
288
+ End the conversation immediately if:
289
+ 1. The user's input exactly matches any phrase in the list.
290
+ 2. The user's input is a close variation of any phrase in the list (e.g., "please shut up" instead of "shut up").
291
+ 3. The user's input clearly expresses a desire to end the conversation, even if it doesn't use the exact phrases listed.`,
243
292
  parameters: {
244
293
  type: "object",
245
294
  properties: {
246
- item: { type: "string", description: "The phrase that triggered end of conversation" }
295
+ item: { type: "string", description: "The phrase or text that triggered the end of conversation" }
247
296
  },
248
297
  required: ["item"]
249
298
  }
@@ -253,18 +302,18 @@ class u {
253
302
  greeting: "Hi! I'm ready to speak with you. How can I help you today?"
254
303
  }
255
304
  };
256
- this.ws.send(JSON.stringify(n)), this.emit({ type: "open", payload: { config: o, serverConfig: this.serverConfig } });
305
+ this.ws.send(JSON.stringify(h)), this.emit({ type: "open", payload: { config: a, serverConfig: this.serverConfig } });
257
306
  };
258
- const l = a;
307
+ const n = l;
259
308
  this.ws.onmessage = (i) => {
260
309
  if (typeof i.data == "string") {
261
310
  try {
262
311
  if (JSON.parse(i.data).type === "SettingsApplied") {
263
- const c = {
312
+ const p = {
264
313
  type: "UpdatePrompt",
265
- prompt: l
314
+ prompt: n
266
315
  };
267
- this.ws.send(JSON.stringify(c)), this.startMicrophone();
316
+ this.ws.send(JSON.stringify(p)), this.startMicrophone();
268
317
  }
269
318
  } catch {
270
319
  }
@@ -296,7 +345,7 @@ class u {
296
345
  */
297
346
  async initializeAudio() {
298
347
  this.audioContext = new AudioContext({ sampleRate: this.inputSampleRate });
299
- const e = this.createWorkletBlobUrl(h), t = this.createWorkletBlobUrl(p);
348
+ const e = this.createWorkletBlobUrl(d), t = this.createWorkletBlobUrl(u);
300
349
  try {
301
350
  await Promise.all([
302
351
  this.audioContext.audioWorklet.addModule(e),
@@ -371,11 +420,11 @@ class u {
371
420
  r[n] = a[n] / 32768;
372
421
  const l = this.resample24kTo48k(r);
373
422
  !this.isAgentSpeaking && !this.agentAudioDoneReceived && (this.isAgentSpeaking = !0, this.emit({ type: "agent_speaking", payload: !0 }));
374
- const i = new Float32Array(l);
423
+ const c = new Float32Array(l);
375
424
  this.playbackWorkletNode.port.postMessage({
376
425
  type: "audio",
377
- data: i
378
- }, [i.buffer]);
426
+ data: c
427
+ }, [c.buffer]);
379
428
  }
380
429
  /**
381
430
  * Resample audio from 24kHz to 48kHz using linear interpolation
@@ -496,5 +545,5 @@ class u {
496
545
  }
497
546
  }
498
547
  export {
499
- u as GlydeVoice
548
+ g as GlydeVoice
500
549
  };
@@ -1,4 +1,4 @@
1
- (function(l,c){typeof exports=="object"&&typeof module<"u"?c(exports):typeof define=="function"&&define.amd?define(["exports"],c):(l=typeof globalThis<"u"?globalThis:l||self,c(l.GlydeVoice={}))})(this,(function(l){"use strict";const c=`
1
+ (function(n,c){typeof exports=="object"&&typeof module<"u"?c(exports):typeof define=="function"&&define.amd?define(["exports"],c):(n=typeof globalThis<"u"?globalThis:n||self,c(n.GlydeVoice={}))})(this,(function(n){"use strict";const c=`
2
2
  class AudioCaptureProcessor extends AudioWorkletProcessor {
3
3
  constructor() {
4
4
  super();
@@ -33,7 +33,7 @@ class AudioCaptureProcessor extends AudioWorkletProcessor {
33
33
  }
34
34
 
35
35
  registerProcessor('audio-capture-processor', AudioCaptureProcessor);
36
- `,d=`
36
+ `,u=`
37
37
  class AudioPlaybackProcessor extends AudioWorkletProcessor {
38
38
  constructor() {
39
39
  super();
@@ -130,11 +130,43 @@ class AudioPlaybackProcessor extends AudioWorkletProcessor {
130
130
  }
131
131
 
132
132
  registerProcessor('audio-playback-processor', AudioPlaybackProcessor);
133
- `;class p{config;unityUrl;active=!1;serverConfig=null;ws=null;audioContext=null;mediaStream=null;captureWorkletNode=null;playbackWorkletNode=null;isMuted=!1;outputSampleRate=24e3;inputSampleRate=48e3;isAgentSpeaking=!1;agentAudioDoneReceived=!1;constructor(e){this.config=e,this.unityUrl=e.unityBaseUrl||"https://api.glydeunity.com",!e.publishableKey&&!e.apiKey&&!e.authToken&&console.warn("[GlydeVoice] No authentication method provided. One of publishableKey, apiKey, or authToken is required.")}getAuthHeaders(){const e={"Content-Type":"application/json"};return this.config.publishableKey&&(e["x-publishable-key"]=this.config.publishableKey),this.config.apiKey&&(e["x-api-key"]=this.config.apiKey),this.config.authToken&&(e.Authorization=`Bearer ${this.config.authToken}`),e}async fetchConfig(){const e=`${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`,t=this.config.contextId?`${e}/${this.config.contextId}`:e,s=await fetch(t,{method:"GET",headers:this.getAuthHeaders()});if(!s.ok){const o=await s.json();throw new Error(o.error?.message||o.message||"Failed to fetch voice config")}const{data:a}=await s.json();return a}async start(){if(!this.active){this.active=!0;try{this.config.systemPrompt||(this.serverConfig=await this.fetchConfig(),console.log("[GlydeVoice] Fetched config:",this.serverConfig));const e=await fetch(`${this.unityUrl}/api/unity/voice/auth`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({context_id:this.config.contextId,domain:typeof window<"u"?window.location.hostname:"localhost"})});if(!e.ok){const i=await e.json();throw new Error(i.error?.message||i.message||"Failed to authenticate voice session")}const{data:t}=await e.json(),{token:s,agent_config:a}=t,o=this.config.systemPrompt||this.serverConfig?.system_prompt||a.instructions||"You are a helpful AI assistant.";await this.initializeAudio();const r="wss://agent.deepgram.com/v1/agent/converse";this.ws=new WebSocket(r,["bearer",s]),this.ws.onopen=()=>{const i=this.config.deepgramConfig||this.serverConfig?.deepgram_config||{think:{provider:{type:"open_ai",model:"gpt-4o-mini"}},speak:{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:{provider:{type:"deepgram",model:"nova-2",version:"latest"}}},n={type:"Settings",audio:{input:{encoding:"linear16",sample_rate:this.inputSampleRate},output:{encoding:"linear16",sample_rate:this.outputSampleRate,container:"none"}},agent:{language:"en",speak:i.speak||{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:i.listen||{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}},think:{provider:i.think?.provider||{type:"open_ai",model:"gpt-4o-mini"},functions:i.think?.functions||[{name:"end_conversation",description:"End the conversation when stop phrases are detected.",parameters:{type:"object",properties:{item:{type:"string",description:"The phrase that triggered end of conversation"}},required:["item"]}}]},greeting:"Hi! I'm ready to speak with you. How can I help you today?"}};this.ws.send(JSON.stringify(n)),this.emit({type:"open",payload:{config:a,serverConfig:this.serverConfig}})};const h=o;this.ws.onmessage=i=>{if(typeof i.data=="string"){try{if(JSON.parse(i.data).type==="SettingsApplied"){const u={type:"UpdatePrompt",prompt:h};this.ws.send(JSON.stringify(u)),this.startMicrophone()}}catch{}this.handleTextMessage(i.data)}else i.data instanceof Blob?this.handleAudioData(i.data):i.data instanceof ArrayBuffer&&this.handleAudioBuffer(i.data)},this.ws.onerror=i=>{console.error("[GlydeVoice] WebSocket error:",i),this.emit({type:"error",payload:i})},this.ws.onclose=()=>{this.cleanup(),this.emit({type:"close"})},this.renderUI()}catch(e){throw console.error("[GlydeVoice] Error starting session:",e),this.active=!1,this.emit({type:"error",payload:e}),e}}}createWorkletBlobUrl(e){const t=new Blob([e],{type:"application/javascript"});return URL.createObjectURL(t)}async initializeAudio(){this.audioContext=new AudioContext({sampleRate:this.inputSampleRate});const e=this.createWorkletBlobUrl(c),t=this.createWorkletBlobUrl(d);try{await Promise.all([this.audioContext.audioWorklet.addModule(e),this.audioContext.audioWorklet.addModule(t)])}finally{URL.revokeObjectURL(e),URL.revokeObjectURL(t)}this.playbackWorkletNode=new AudioWorkletNode(this.audioContext,"audio-playback-processor"),this.playbackWorkletNode.connect(this.audioContext.destination),this.playbackWorkletNode.port.onmessage=s=>{const{type:a}=s.data;(a==="cleared"||a==="bufferEmpty")&&(this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!1}))}}handleTextMessage(e){try{const t=JSON.parse(e);switch(t.type){case"Welcome":this.emit({type:"ready"});break;case"SettingsApplied":break;case"UserStartedSpeaking":this.emit({type:"user_speaking",payload:!0}),this.clearPlaybackBuffer(),this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1;break;case"UserStoppedSpeaking":this.emit({type:"user_speaking",payload:!1});break;case"ConversationText":if(t.content&&t.content.trim()){const s=t.role==="assistant"?"agent":"user";this.config.onTranscript&&this.config.onTranscript(t.content,s),this.emit({type:"transcript",payload:{text:t.content,role:s}}),this.saveTranscript(t.content,t.role)}break;case"AgentStartedSpeaking":this.isAgentSpeaking=!0,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!0});break;case"AgentAudioDone":this.agentAudioDoneReceived=!0;break;case"Error":console.error("[GlydeVoice] Agent error:",t),this.emit({type:"error",payload:t});break}}catch(t){console.error("[GlydeVoice] Failed to parse message:",t)}}async handleAudioData(e){const t=await e.arrayBuffer();this.handleAudioBuffer(t)}handleAudioBuffer(e){if(!this.playbackWorkletNode||!this.audioContext)return;this.audioContext.state==="suspended"&&this.audioContext.resume();const t=e.byteLength;if(t===0)return;const s=t-t%2;if(s===0)return;const a=s===t?e:e.slice(0,s),o=new Int16Array(a),r=new Float32Array(o.length);for(let n=0;n<o.length;n++)r[n]=o[n]/32768;const h=this.resample24kTo48k(r);!this.isAgentSpeaking&&!this.agentAudioDoneReceived&&(this.isAgentSpeaking=!0,this.emit({type:"agent_speaking",payload:!0}));const i=new Float32Array(h);this.playbackWorkletNode.port.postMessage({type:"audio",data:i},[i.buffer])}resample24kTo48k(e){const t=e.length*2,s=new Float32Array(t);for(let o=0;o<e.length-1;o++){const r=e[o],h=e[o+1];s[o*2]=r,s[o*2+1]=(r+h)/2}const a=e.length-1;return s[a*2]=e[a],s[a*2+1]=e[a],s}clearPlaybackBuffer(){this.playbackWorkletNode&&this.playbackWorkletNode.port.postMessage({type:"clear"})}async startMicrophone(){if(!this.audioContext)throw new Error("Audio context not initialized");try{this.mediaStream=await navigator.mediaDevices.getUserMedia({audio:{channelCount:1,sampleRate:this.inputSampleRate,echoCancellation:!0,noiseSuppression:!0}});const e=this.audioContext.createMediaStreamSource(this.mediaStream);this.captureWorkletNode=new AudioWorkletNode(this.audioContext,"audio-capture-processor"),this.captureWorkletNode.port.onmessage=t=>{!this.active||!this.ws||this.ws.readyState!==WebSocket.OPEN||this.isMuted||this.ws.send(t.data)},e.connect(this.captureWorkletNode),this.emit({type:"microphone_ready"})}catch(e){throw console.error("[GlydeVoice] Microphone error:",e),e}}async saveTranscript(e,t){if(!(!this.config.contextId||!e))try{await fetch(`${this.unityUrl}/api/unity/voice/transcript`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({context_id:this.config.contextId,content:e,role:t==="assistant"?"assistant":"user"})})}catch{}}setMuted(e){this.isMuted=e}getMuted(){return this.isMuted}isActive(){return this.active}getServerConfig(){return this.serverConfig}stop(){this.active=!1,this.cleanup()}cleanup(){this.captureWorkletNode&&(this.captureWorkletNode.disconnect(),this.captureWorkletNode.port.close(),this.captureWorkletNode=null),this.playbackWorkletNode&&(this.playbackWorkletNode.disconnect(),this.playbackWorkletNode.port.close(),this.playbackWorkletNode=null),this.mediaStream&&(this.mediaStream.getTracks().forEach(e=>e.stop()),this.mediaStream=null),this.audioContext&&(this.audioContext.close(),this.audioContext=null),this.ws&&(this.ws.readyState===WebSocket.OPEN&&this.ws.close(),this.ws=null)}emit(e){this.config.onEvent&&this.config.onEvent(e)}renderUI(){if(!this.config.container)return;const e=typeof this.config.container=="string"?document.querySelector(this.config.container):this.config.container;e&&(e.innerHTML=`
133
+ `;class f{config;unityUrl;active=!1;serverConfig=null;ws=null;audioContext=null;mediaStream=null;captureWorkletNode=null;playbackWorkletNode=null;isMuted=!1;outputSampleRate=24e3;inputSampleRate=48e3;isAgentSpeaking=!1;agentAudioDoneReceived=!1;constructor(e){this.config=e,this.unityUrl=e.unityBaseUrl||"https://api.glydeunity.com",!e.publishableKey&&!e.apiKey&&!e.authToken&&console.warn("[GlydeVoice] No authentication method provided. One of publishableKey, apiKey, or authToken is required.")}getAuthHeaders(){const e={"Content-Type":"application/json"};return this.config.publishableKey&&(e["x-publishable-key"]=this.config.publishableKey),this.config.apiKey&&(e["x-api-key"]=this.config.apiKey),this.config.authToken&&(e.Authorization=`Bearer ${this.config.authToken}`),e}async fetchConfig(){const e=`${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`,t=this.config.contextId?`${e}/${this.config.contextId}`:e,i=await fetch(t,{method:"GET",headers:this.getAuthHeaders()});if(!i.ok){const o=await i.json();throw new Error(o.error?.message||o.message||"Failed to fetch voice config")}const{data:a}=await i.json();return a}async start(){if(!this.active){this.active=!0;try{this.config.systemPrompt||(this.serverConfig=await this.fetchConfig(),console.log("[GlydeVoice] Fetched config:",this.serverConfig));const e={context_id:this.config.contextId,domain:typeof window<"u"?window.location.hostname:"localhost"};this.config.systemPrompt&&(e.system_prompt=this.config.systemPrompt),this.config.deepgramConfig&&(e.deepgram_config=this.config.deepgramConfig);const t=await fetch(`${this.unityUrl}/api/unity/voice/auth`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify(e)});if(!t.ok){const s=await t.json();throw new Error(s.error?.message||s.message||"Failed to authenticate voice session")}const{data:i}=await t.json(),{token:a,agent_config:o,deepgram_config:r}=i,h=this.config.systemPrompt||o.instructions||this.serverConfig?.system_prompt||"You are a helpful AI assistant.";await this.initializeAudio();const p="wss://agent.deepgram.com/v1/agent/converse";this.ws=new WebSocket(p,["bearer",a]),this.ws.onopen=()=>{const s=this.config.deepgramConfig||r||this.serverConfig?.deepgram_config||{think:{provider:{type:"open_ai",model:"gpt-4.1-mini"},functions:[{name:"end_conversation",description:`You are an AI assistant that monitors conversations and ends them when specific stop phrases are detected.
134
+
135
+ Here is a list of phrases to listen for but not restricted to:
136
+ -stop
137
+ -shut up
138
+ -go away
139
+ -turn off
140
+ -stop listening
141
+
142
+ Before ending the conversation, always say a brief, polite goodbye such as "Goodbye!", "Take care!", or "Have a great day!".
143
+
144
+ When monitoring the conversation, pay close attention to any input that matches or closely resembles the phrases listed above. The matching should be case-insensitive and allow for minor variations or typos.
145
+
146
+ End the conversation immediately if:
147
+ 1. The user's input exactly matches any phrase in the list.
148
+ 2. The user's input is a close variation of any phrase in the list (e.g., "please shut up" instead of "shut up").
149
+ 3. The user's input clearly expresses a desire to end the conversation, even if it doesn't use the exact phrases listed.`,parameters:{type:"object",properties:{item:{type:"string",description:"The phrase or text that triggered the end of conversation"}},required:["item"]}}]},speak:{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}}},d={type:"Settings",audio:{input:{encoding:"linear16",sample_rate:this.inputSampleRate},output:{encoding:"linear16",sample_rate:this.outputSampleRate,container:"none"}},agent:{language:"en",speak:s.speak||{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:s.listen||{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}},think:{provider:s.think?.provider||{type:"open_ai",model:"gpt-4.1-mini"},functions:s.think?.functions||[{name:"end_conversation",description:`You are an AI assistant that monitors conversations and ends them when specific stop phrases are detected.
150
+
151
+ Here is a list of phrases to listen for but not restricted to:
152
+ -stop
153
+ -shut up
154
+ -go away
155
+ -turn off
156
+ -stop listening
157
+
158
+ Before ending the conversation, always say a brief, polite goodbye such as "Goodbye!", "Take care!", or "Have a great day!".
159
+
160
+ When monitoring the conversation, pay close attention to any input that matches or closely resembles the phrases listed above. The matching should be case-insensitive and allow for minor variations or typos.
161
+
162
+ End the conversation immediately if:
163
+ 1. The user's input exactly matches any phrase in the list.
164
+ 2. The user's input is a close variation of any phrase in the list (e.g., "please shut up" instead of "shut up").
165
+ 3. The user's input clearly expresses a desire to end the conversation, even if it doesn't use the exact phrases listed.`,parameters:{type:"object",properties:{item:{type:"string",description:"The phrase or text that triggered the end of conversation"}},required:["item"]}}]},greeting:"Hi! I'm ready to speak with you. How can I help you today?"}};this.ws.send(JSON.stringify(d)),this.emit({type:"open",payload:{config:o,serverConfig:this.serverConfig}})};const l=h;this.ws.onmessage=s=>{if(typeof s.data=="string"){try{if(JSON.parse(s.data).type==="SettingsApplied"){const g={type:"UpdatePrompt",prompt:l};this.ws.send(JSON.stringify(g)),this.startMicrophone()}}catch{}this.handleTextMessage(s.data)}else s.data instanceof Blob?this.handleAudioData(s.data):s.data instanceof ArrayBuffer&&this.handleAudioBuffer(s.data)},this.ws.onerror=s=>{console.error("[GlydeVoice] WebSocket error:",s),this.emit({type:"error",payload:s})},this.ws.onclose=()=>{this.cleanup(),this.emit({type:"close"})},this.renderUI()}catch(e){throw console.error("[GlydeVoice] Error starting session:",e),this.active=!1,this.emit({type:"error",payload:e}),e}}}createWorkletBlobUrl(e){const t=new Blob([e],{type:"application/javascript"});return URL.createObjectURL(t)}async initializeAudio(){this.audioContext=new AudioContext({sampleRate:this.inputSampleRate});const e=this.createWorkletBlobUrl(c),t=this.createWorkletBlobUrl(u);try{await Promise.all([this.audioContext.audioWorklet.addModule(e),this.audioContext.audioWorklet.addModule(t)])}finally{URL.revokeObjectURL(e),URL.revokeObjectURL(t)}this.playbackWorkletNode=new AudioWorkletNode(this.audioContext,"audio-playback-processor"),this.playbackWorkletNode.connect(this.audioContext.destination),this.playbackWorkletNode.port.onmessage=i=>{const{type:a}=i.data;(a==="cleared"||a==="bufferEmpty")&&(this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!1}))}}handleTextMessage(e){try{const t=JSON.parse(e);switch(t.type){case"Welcome":this.emit({type:"ready"});break;case"SettingsApplied":break;case"UserStartedSpeaking":this.emit({type:"user_speaking",payload:!0}),this.clearPlaybackBuffer(),this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1;break;case"UserStoppedSpeaking":this.emit({type:"user_speaking",payload:!1});break;case"ConversationText":if(t.content&&t.content.trim()){const i=t.role==="assistant"?"agent":"user";this.config.onTranscript&&this.config.onTranscript(t.content,i),this.emit({type:"transcript",payload:{text:t.content,role:i}}),this.saveTranscript(t.content,t.role)}break;case"AgentStartedSpeaking":this.isAgentSpeaking=!0,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!0});break;case"AgentAudioDone":this.agentAudioDoneReceived=!0;break;case"Error":console.error("[GlydeVoice] Agent error:",t),this.emit({type:"error",payload:t});break}}catch(t){console.error("[GlydeVoice] Failed to parse message:",t)}}async handleAudioData(e){const t=await e.arrayBuffer();this.handleAudioBuffer(t)}handleAudioBuffer(e){if(!this.playbackWorkletNode||!this.audioContext)return;this.audioContext.state==="suspended"&&this.audioContext.resume();const t=e.byteLength;if(t===0)return;const i=t-t%2;if(i===0)return;const a=i===t?e:e.slice(0,i),o=new Int16Array(a),r=new Float32Array(o.length);for(let l=0;l<o.length;l++)r[l]=o[l]/32768;const h=this.resample24kTo48k(r);!this.isAgentSpeaking&&!this.agentAudioDoneReceived&&(this.isAgentSpeaking=!0,this.emit({type:"agent_speaking",payload:!0}));const p=new Float32Array(h);this.playbackWorkletNode.port.postMessage({type:"audio",data:p},[p.buffer])}resample24kTo48k(e){const t=e.length*2,i=new Float32Array(t);for(let o=0;o<e.length-1;o++){const r=e[o],h=e[o+1];i[o*2]=r,i[o*2+1]=(r+h)/2}const a=e.length-1;return i[a*2]=e[a],i[a*2+1]=e[a],i}clearPlaybackBuffer(){this.playbackWorkletNode&&this.playbackWorkletNode.port.postMessage({type:"clear"})}async startMicrophone(){if(!this.audioContext)throw new Error("Audio context not initialized");try{this.mediaStream=await navigator.mediaDevices.getUserMedia({audio:{channelCount:1,sampleRate:this.inputSampleRate,echoCancellation:!0,noiseSuppression:!0}});const e=this.audioContext.createMediaStreamSource(this.mediaStream);this.captureWorkletNode=new AudioWorkletNode(this.audioContext,"audio-capture-processor"),this.captureWorkletNode.port.onmessage=t=>{!this.active||!this.ws||this.ws.readyState!==WebSocket.OPEN||this.isMuted||this.ws.send(t.data)},e.connect(this.captureWorkletNode),this.emit({type:"microphone_ready"})}catch(e){throw console.error("[GlydeVoice] Microphone error:",e),e}}async saveTranscript(e,t){if(!(!this.config.contextId||!e))try{await fetch(`${this.unityUrl}/api/unity/voice/transcript`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({context_id:this.config.contextId,content:e,role:t==="assistant"?"assistant":"user"})})}catch{}}setMuted(e){this.isMuted=e}getMuted(){return this.isMuted}isActive(){return this.active}getServerConfig(){return this.serverConfig}stop(){this.active=!1,this.cleanup()}cleanup(){this.captureWorkletNode&&(this.captureWorkletNode.disconnect(),this.captureWorkletNode.port.close(),this.captureWorkletNode=null),this.playbackWorkletNode&&(this.playbackWorkletNode.disconnect(),this.playbackWorkletNode.port.close(),this.playbackWorkletNode=null),this.mediaStream&&(this.mediaStream.getTracks().forEach(e=>e.stop()),this.mediaStream=null),this.audioContext&&(this.audioContext.close(),this.audioContext=null),this.ws&&(this.ws.readyState===WebSocket.OPEN&&this.ws.close(),this.ws=null)}emit(e){this.config.onEvent&&this.config.onEvent(e)}renderUI(){if(!this.config.container)return;const e=typeof this.config.container=="string"?document.querySelector(this.config.container):this.config.container;e&&(e.innerHTML=`
134
166
  <div style="padding: 20px; border: 1px solid #ccc; border-radius: 8px; background: #fff;">
135
167
  <h3>Glyde Voice Agent</h3>
136
168
  <p>Status: Active</p>
137
169
  <p>Context: ${this.config.contextType}</p>
138
170
  <button onclick="this.closest('div').remove()">Close</button>
139
171
  </div>
140
- `)}}l.GlydeVoice=p,Object.defineProperty(l,Symbol.toStringTag,{value:"Module"})}));
172
+ `)}}n.GlydeVoice=f,Object.defineProperty(n,Symbol.toStringTag,{value:"Module"})}));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@glydeunity/voice-sdk",
3
- "version": "1.2.0",
3
+ "version": "1.2.2",
4
4
  "description": "GLYDE Voice Agent SDK - AI-powered voice interactions for web applications",
5
5
  "type": "module",
6
6
  "main": "./dist/voice-sdk.umd.js",