@glydeunity/voice-sdk 1.2.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- const h = `
1
+ const d = `
2
2
  class AudioCaptureProcessor extends AudioWorkletProcessor {
3
3
  constructor() {
4
4
  super();
@@ -33,7 +33,7 @@ class AudioCaptureProcessor extends AudioWorkletProcessor {
33
33
  }
34
34
 
35
35
  registerProcessor('audio-capture-processor', AudioCaptureProcessor);
36
- `, p = `
36
+ `, u = `
37
37
  class AudioPlaybackProcessor extends AudioWorkletProcessor {
38
38
  constructor() {
39
39
  super();
@@ -131,7 +131,7 @@ class AudioPlaybackProcessor extends AudioWorkletProcessor {
131
131
 
132
132
  registerProcessor('audio-playback-processor', AudioPlaybackProcessor);
133
133
  `;
134
- class u {
134
+ class g {
135
135
  config;
136
136
  unityUrl;
137
137
  active = !1;
@@ -174,15 +174,15 @@ class u {
174
174
  * @returns Voice configuration including system prompt, tools, and Deepgram settings
175
175
  */
176
176
  async fetchConfig() {
177
- const e = `${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`, t = this.config.contextId ? `${e}/${this.config.contextId}` : e, s = await fetch(t, {
177
+ const e = `${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`, t = this.config.contextId ? `${e}/${this.config.contextId}` : e, i = await fetch(t, {
178
178
  method: "GET",
179
179
  headers: this.getAuthHeaders()
180
180
  });
181
- if (!s.ok) {
182
- const a = await s.json();
181
+ if (!i.ok) {
182
+ const a = await i.json();
183
183
  throw new Error(a.error?.message || a.message || "Failed to fetch voice config");
184
184
  }
185
- const { data: o } = await s.json();
185
+ const { data: o } = await i.json();
186
186
  return o;
187
187
  }
188
188
  /**
@@ -193,27 +193,29 @@ class u {
193
193
  this.active = !0;
194
194
  try {
195
195
  this.config.systemPrompt || (this.serverConfig = await this.fetchConfig(), console.log("[GlydeVoice] Fetched config:", this.serverConfig));
196
- const e = await fetch(`${this.unityUrl}/api/unity/voice/auth`, {
196
+ const e = {
197
+ context_id: this.config.contextId,
198
+ domain: typeof window < "u" ? window.location.hostname : "localhost"
199
+ };
200
+ this.config.systemPrompt && (e.system_prompt = this.config.systemPrompt), this.config.deepgramConfig && (e.deepgram_config = this.config.deepgramConfig);
201
+ const t = await fetch(`${this.unityUrl}/api/unity/voice/auth`, {
197
202
  method: "POST",
198
203
  headers: this.getAuthHeaders(),
199
- body: JSON.stringify({
200
- context_id: this.config.contextId,
201
- domain: typeof window < "u" ? window.location.hostname : "localhost"
202
- })
204
+ body: JSON.stringify(e)
203
205
  });
204
- if (!e.ok) {
205
- const i = await e.json();
206
- throw new Error(i.error?.message || i.message || "Failed to authenticate voice session");
206
+ if (!t.ok) {
207
+ const s = await t.json();
208
+ throw new Error(s.error?.message || s.message || "Failed to authenticate voice session");
207
209
  }
208
- const { data: t } = await e.json(), { token: s, agent_config: o } = t, a = this.config.systemPrompt || this.serverConfig?.system_prompt || o.instructions || "You are a helpful AI assistant.";
210
+ const { data: i } = await t.json(), { token: o, agent_config: a, deepgram_config: r } = i, l = this.config.systemPrompt || a.instructions || this.serverConfig?.system_prompt || "You are a helpful AI assistant.";
209
211
  await this.initializeAudio();
210
- const r = "wss://agent.deepgram.com/v1/agent/converse";
211
- this.ws = new WebSocket(r, ["bearer", s]), this.ws.onopen = () => {
212
- const i = this.config.deepgramConfig || this.serverConfig?.deepgram_config || {
212
+ const c = "wss://agent.deepgram.com/v1/agent/converse";
213
+ this.ws = new WebSocket(c, ["bearer", o]), this.ws.onopen = () => {
214
+ const s = this.config.deepgramConfig || r || this.serverConfig?.deepgram_config || {
213
215
  think: { provider: { type: "open_ai", model: "gpt-4o-mini" } },
214
216
  speak: { provider: { type: "deepgram", model: "aura-2-thalia-en" } },
215
217
  listen: { provider: { type: "deepgram", model: "nova-2", version: "latest" } }
216
- }, n = {
218
+ }, h = {
217
219
  type: "Settings",
218
220
  audio: {
219
221
  input: {
@@ -228,15 +230,15 @@ class u {
228
230
  },
229
231
  agent: {
230
232
  language: "en",
231
- speak: i.speak || {
233
+ speak: s.speak || {
232
234
  provider: { type: "deepgram", model: "aura-2-thalia-en" }
233
235
  },
234
- listen: i.listen || {
236
+ listen: s.listen || {
235
237
  provider: { type: "deepgram", version: "v2", model: "flux-general-en" }
236
238
  },
237
239
  think: {
238
- provider: i.think?.provider || { type: "open_ai", model: "gpt-4o-mini" },
239
- functions: i.think?.functions || [
240
+ provider: s.think?.provider || { type: "open_ai", model: "gpt-4o-mini" },
241
+ functions: s.think?.functions || [
240
242
  {
241
243
  name: "end_conversation",
242
244
  description: "End the conversation when stop phrases are detected.",
@@ -253,25 +255,25 @@ class u {
253
255
  greeting: "Hi! I'm ready to speak with you. How can I help you today?"
254
256
  }
255
257
  };
256
- this.ws.send(JSON.stringify(n)), this.emit({ type: "open", payload: { config: o, serverConfig: this.serverConfig } });
258
+ this.ws.send(JSON.stringify(h)), this.emit({ type: "open", payload: { config: a, serverConfig: this.serverConfig } });
257
259
  };
258
- const l = a;
259
- this.ws.onmessage = (i) => {
260
- if (typeof i.data == "string") {
260
+ const n = l;
261
+ this.ws.onmessage = (s) => {
262
+ if (typeof s.data == "string") {
261
263
  try {
262
- if (JSON.parse(i.data).type === "SettingsApplied") {
263
- const c = {
264
+ if (JSON.parse(s.data).type === "SettingsApplied") {
265
+ const p = {
264
266
  type: "UpdatePrompt",
265
- prompt: l
267
+ prompt: n
266
268
  };
267
- this.ws.send(JSON.stringify(c)), this.startMicrophone();
269
+ this.ws.send(JSON.stringify(p)), this.startMicrophone();
268
270
  }
269
271
  } catch {
270
272
  }
271
- this.handleTextMessage(i.data);
272
- } else i.data instanceof Blob ? this.handleAudioData(i.data) : i.data instanceof ArrayBuffer && this.handleAudioBuffer(i.data);
273
- }, this.ws.onerror = (i) => {
274
- console.error("[GlydeVoice] WebSocket error:", i), this.emit({ type: "error", payload: i });
273
+ this.handleTextMessage(s.data);
274
+ } else s.data instanceof Blob ? this.handleAudioData(s.data) : s.data instanceof ArrayBuffer && this.handleAudioBuffer(s.data);
275
+ }, this.ws.onerror = (s) => {
276
+ console.error("[GlydeVoice] WebSocket error:", s), this.emit({ type: "error", payload: s });
275
277
  }, this.ws.onclose = () => {
276
278
  this.cleanup(), this.emit({ type: "close" });
277
279
  }, this.renderUI();
@@ -296,7 +298,7 @@ class u {
296
298
  */
297
299
  async initializeAudio() {
298
300
  this.audioContext = new AudioContext({ sampleRate: this.inputSampleRate });
299
- const e = this.createWorkletBlobUrl(h), t = this.createWorkletBlobUrl(p);
301
+ const e = this.createWorkletBlobUrl(d), t = this.createWorkletBlobUrl(u);
300
302
  try {
301
303
  await Promise.all([
302
304
  this.audioContext.audioWorklet.addModule(e),
@@ -305,8 +307,8 @@ class u {
305
307
  } finally {
306
308
  URL.revokeObjectURL(e), URL.revokeObjectURL(t);
307
309
  }
308
- this.playbackWorkletNode = new AudioWorkletNode(this.audioContext, "audio-playback-processor"), this.playbackWorkletNode.connect(this.audioContext.destination), this.playbackWorkletNode.port.onmessage = (s) => {
309
- const { type: o } = s.data;
310
+ this.playbackWorkletNode = new AudioWorkletNode(this.audioContext, "audio-playback-processor"), this.playbackWorkletNode.connect(this.audioContext.destination), this.playbackWorkletNode.port.onmessage = (i) => {
311
+ const { type: o } = i.data;
310
312
  (o === "cleared" || o === "bufferEmpty") && (this.isAgentSpeaking = !1, this.agentAudioDoneReceived = !1, this.emit({ type: "agent_speaking", payload: !1 }));
311
313
  };
312
314
  }
@@ -330,8 +332,8 @@ class u {
330
332
  break;
331
333
  case "ConversationText":
332
334
  if (t.content && t.content.trim()) {
333
- const s = t.role === "assistant" ? "agent" : "user";
334
- this.config.onTranscript && this.config.onTranscript(t.content, s), this.emit({ type: "transcript", payload: { text: t.content, role: s } }), this.saveTranscript(t.content, t.role);
335
+ const i = t.role === "assistant" ? "agent" : "user";
336
+ this.config.onTranscript && this.config.onTranscript(t.content, i), this.emit({ type: "transcript", payload: { text: t.content, role: i } }), this.saveTranscript(t.content, t.role);
335
337
  }
336
338
  break;
337
339
  case "AgentStartedSpeaking":
@@ -364,30 +366,30 @@ class u {
364
366
  this.audioContext.state === "suspended" && this.audioContext.resume();
365
367
  const t = e.byteLength;
366
368
  if (t === 0) return;
367
- const s = t - t % 2;
368
- if (s === 0) return;
369
- const o = s === t ? e : e.slice(0, s), a = new Int16Array(o), r = new Float32Array(a.length);
369
+ const i = t - t % 2;
370
+ if (i === 0) return;
371
+ const o = i === t ? e : e.slice(0, i), a = new Int16Array(o), r = new Float32Array(a.length);
370
372
  for (let n = 0; n < a.length; n++)
371
373
  r[n] = a[n] / 32768;
372
374
  const l = this.resample24kTo48k(r);
373
375
  !this.isAgentSpeaking && !this.agentAudioDoneReceived && (this.isAgentSpeaking = !0, this.emit({ type: "agent_speaking", payload: !0 }));
374
- const i = new Float32Array(l);
376
+ const c = new Float32Array(l);
375
377
  this.playbackWorkletNode.port.postMessage({
376
378
  type: "audio",
377
- data: i
378
- }, [i.buffer]);
379
+ data: c
380
+ }, [c.buffer]);
379
381
  }
380
382
  /**
381
383
  * Resample audio from 24kHz to 48kHz using linear interpolation
382
384
  */
383
385
  resample24kTo48k(e) {
384
- const t = e.length * 2, s = new Float32Array(t);
386
+ const t = e.length * 2, i = new Float32Array(t);
385
387
  for (let a = 0; a < e.length - 1; a++) {
386
388
  const r = e[a], l = e[a + 1];
387
- s[a * 2] = r, s[a * 2 + 1] = (r + l) / 2;
389
+ i[a * 2] = r, i[a * 2 + 1] = (r + l) / 2;
388
390
  }
389
391
  const o = e.length - 1;
390
- return s[o * 2] = e[o], s[o * 2 + 1] = e[o], s;
392
+ return i[o * 2] = e[o], i[o * 2 + 1] = e[o], i;
391
393
  }
392
394
  /**
393
395
  * Clear the playback buffer (for interruption handling)
@@ -496,5 +498,5 @@ class u {
496
498
  }
497
499
  }
498
500
  export {
499
- u as GlydeVoice
501
+ g as GlydeVoice
500
502
  };
@@ -1,4 +1,4 @@
1
- (function(l,c){typeof exports=="object"&&typeof module<"u"?c(exports):typeof define=="function"&&define.amd?define(["exports"],c):(l=typeof globalThis<"u"?globalThis:l||self,c(l.GlydeVoice={}))})(this,(function(l){"use strict";const c=`
1
+ (function(n,c){typeof exports=="object"&&typeof module<"u"?c(exports):typeof define=="function"&&define.amd?define(["exports"],c):(n=typeof globalThis<"u"?globalThis:n||self,c(n.GlydeVoice={}))})(this,(function(n){"use strict";const c=`
2
2
  class AudioCaptureProcessor extends AudioWorkletProcessor {
3
3
  constructor() {
4
4
  super();
@@ -33,7 +33,7 @@ class AudioCaptureProcessor extends AudioWorkletProcessor {
33
33
  }
34
34
 
35
35
  registerProcessor('audio-capture-processor', AudioCaptureProcessor);
36
- `,d=`
36
+ `,u=`
37
37
  class AudioPlaybackProcessor extends AudioWorkletProcessor {
38
38
  constructor() {
39
39
  super();
@@ -130,11 +130,11 @@ class AudioPlaybackProcessor extends AudioWorkletProcessor {
130
130
  }
131
131
 
132
132
  registerProcessor('audio-playback-processor', AudioPlaybackProcessor);
133
- `;class p{config;unityUrl;active=!1;serverConfig=null;ws=null;audioContext=null;mediaStream=null;captureWorkletNode=null;playbackWorkletNode=null;isMuted=!1;outputSampleRate=24e3;inputSampleRate=48e3;isAgentSpeaking=!1;agentAudioDoneReceived=!1;constructor(e){this.config=e,this.unityUrl=e.unityBaseUrl||"https://api.glydeunity.com",!e.publishableKey&&!e.apiKey&&!e.authToken&&console.warn("[GlydeVoice] No authentication method provided. One of publishableKey, apiKey, or authToken is required.")}getAuthHeaders(){const e={"Content-Type":"application/json"};return this.config.publishableKey&&(e["x-publishable-key"]=this.config.publishableKey),this.config.apiKey&&(e["x-api-key"]=this.config.apiKey),this.config.authToken&&(e.Authorization=`Bearer ${this.config.authToken}`),e}async fetchConfig(){const e=`${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`,t=this.config.contextId?`${e}/${this.config.contextId}`:e,s=await fetch(t,{method:"GET",headers:this.getAuthHeaders()});if(!s.ok){const o=await s.json();throw new Error(o.error?.message||o.message||"Failed to fetch voice config")}const{data:a}=await s.json();return a}async start(){if(!this.active){this.active=!0;try{this.config.systemPrompt||(this.serverConfig=await this.fetchConfig(),console.log("[GlydeVoice] Fetched config:",this.serverConfig));const e=await fetch(`${this.unityUrl}/api/unity/voice/auth`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({context_id:this.config.contextId,domain:typeof window<"u"?window.location.hostname:"localhost"})});if(!e.ok){const i=await e.json();throw new Error(i.error?.message||i.message||"Failed to authenticate voice session")}const{data:t}=await e.json(),{token:s,agent_config:a}=t,o=this.config.systemPrompt||this.serverConfig?.system_prompt||a.instructions||"You are a helpful AI assistant.";await this.initializeAudio();const r="wss://agent.deepgram.com/v1/agent/converse";this.ws=new WebSocket(r,["bearer",s]),this.ws.onopen=()=>{const i=this.config.deepgramConfig||this.serverConfig?.deepgram_config||{think:{provider:{type:"open_ai",model:"gpt-4o-mini"}},speak:{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:{provider:{type:"deepgram",model:"nova-2",version:"latest"}}},n={type:"Settings",audio:{input:{encoding:"linear16",sample_rate:this.inputSampleRate},output:{encoding:"linear16",sample_rate:this.outputSampleRate,container:"none"}},agent:{language:"en",speak:i.speak||{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:i.listen||{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}},think:{provider:i.think?.provider||{type:"open_ai",model:"gpt-4o-mini"},functions:i.think?.functions||[{name:"end_conversation",description:"End the conversation when stop phrases are detected.",parameters:{type:"object",properties:{item:{type:"string",description:"The phrase that triggered end of conversation"}},required:["item"]}}]},greeting:"Hi! I'm ready to speak with you. How can I help you today?"}};this.ws.send(JSON.stringify(n)),this.emit({type:"open",payload:{config:a,serverConfig:this.serverConfig}})};const h=o;this.ws.onmessage=i=>{if(typeof i.data=="string"){try{if(JSON.parse(i.data).type==="SettingsApplied"){const u={type:"UpdatePrompt",prompt:h};this.ws.send(JSON.stringify(u)),this.startMicrophone()}}catch{}this.handleTextMessage(i.data)}else i.data instanceof Blob?this.handleAudioData(i.data):i.data instanceof ArrayBuffer&&this.handleAudioBuffer(i.data)},this.ws.onerror=i=>{console.error("[GlydeVoice] WebSocket error:",i),this.emit({type:"error",payload:i})},this.ws.onclose=()=>{this.cleanup(),this.emit({type:"close"})},this.renderUI()}catch(e){throw console.error("[GlydeVoice] Error starting session:",e),this.active=!1,this.emit({type:"error",payload:e}),e}}}createWorkletBlobUrl(e){const t=new Blob([e],{type:"application/javascript"});return URL.createObjectURL(t)}async initializeAudio(){this.audioContext=new AudioContext({sampleRate:this.inputSampleRate});const e=this.createWorkletBlobUrl(c),t=this.createWorkletBlobUrl(d);try{await Promise.all([this.audioContext.audioWorklet.addModule(e),this.audioContext.audioWorklet.addModule(t)])}finally{URL.revokeObjectURL(e),URL.revokeObjectURL(t)}this.playbackWorkletNode=new AudioWorkletNode(this.audioContext,"audio-playback-processor"),this.playbackWorkletNode.connect(this.audioContext.destination),this.playbackWorkletNode.port.onmessage=s=>{const{type:a}=s.data;(a==="cleared"||a==="bufferEmpty")&&(this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!1}))}}handleTextMessage(e){try{const t=JSON.parse(e);switch(t.type){case"Welcome":this.emit({type:"ready"});break;case"SettingsApplied":break;case"UserStartedSpeaking":this.emit({type:"user_speaking",payload:!0}),this.clearPlaybackBuffer(),this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1;break;case"UserStoppedSpeaking":this.emit({type:"user_speaking",payload:!1});break;case"ConversationText":if(t.content&&t.content.trim()){const s=t.role==="assistant"?"agent":"user";this.config.onTranscript&&this.config.onTranscript(t.content,s),this.emit({type:"transcript",payload:{text:t.content,role:s}}),this.saveTranscript(t.content,t.role)}break;case"AgentStartedSpeaking":this.isAgentSpeaking=!0,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!0});break;case"AgentAudioDone":this.agentAudioDoneReceived=!0;break;case"Error":console.error("[GlydeVoice] Agent error:",t),this.emit({type:"error",payload:t});break}}catch(t){console.error("[GlydeVoice] Failed to parse message:",t)}}async handleAudioData(e){const t=await e.arrayBuffer();this.handleAudioBuffer(t)}handleAudioBuffer(e){if(!this.playbackWorkletNode||!this.audioContext)return;this.audioContext.state==="suspended"&&this.audioContext.resume();const t=e.byteLength;if(t===0)return;const s=t-t%2;if(s===0)return;const a=s===t?e:e.slice(0,s),o=new Int16Array(a),r=new Float32Array(o.length);for(let n=0;n<o.length;n++)r[n]=o[n]/32768;const h=this.resample24kTo48k(r);!this.isAgentSpeaking&&!this.agentAudioDoneReceived&&(this.isAgentSpeaking=!0,this.emit({type:"agent_speaking",payload:!0}));const i=new Float32Array(h);this.playbackWorkletNode.port.postMessage({type:"audio",data:i},[i.buffer])}resample24kTo48k(e){const t=e.length*2,s=new Float32Array(t);for(let o=0;o<e.length-1;o++){const r=e[o],h=e[o+1];s[o*2]=r,s[o*2+1]=(r+h)/2}const a=e.length-1;return s[a*2]=e[a],s[a*2+1]=e[a],s}clearPlaybackBuffer(){this.playbackWorkletNode&&this.playbackWorkletNode.port.postMessage({type:"clear"})}async startMicrophone(){if(!this.audioContext)throw new Error("Audio context not initialized");try{this.mediaStream=await navigator.mediaDevices.getUserMedia({audio:{channelCount:1,sampleRate:this.inputSampleRate,echoCancellation:!0,noiseSuppression:!0}});const e=this.audioContext.createMediaStreamSource(this.mediaStream);this.captureWorkletNode=new AudioWorkletNode(this.audioContext,"audio-capture-processor"),this.captureWorkletNode.port.onmessage=t=>{!this.active||!this.ws||this.ws.readyState!==WebSocket.OPEN||this.isMuted||this.ws.send(t.data)},e.connect(this.captureWorkletNode),this.emit({type:"microphone_ready"})}catch(e){throw console.error("[GlydeVoice] Microphone error:",e),e}}async saveTranscript(e,t){if(!(!this.config.contextId||!e))try{await fetch(`${this.unityUrl}/api/unity/voice/transcript`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({context_id:this.config.contextId,content:e,role:t==="assistant"?"assistant":"user"})})}catch{}}setMuted(e){this.isMuted=e}getMuted(){return this.isMuted}isActive(){return this.active}getServerConfig(){return this.serverConfig}stop(){this.active=!1,this.cleanup()}cleanup(){this.captureWorkletNode&&(this.captureWorkletNode.disconnect(),this.captureWorkletNode.port.close(),this.captureWorkletNode=null),this.playbackWorkletNode&&(this.playbackWorkletNode.disconnect(),this.playbackWorkletNode.port.close(),this.playbackWorkletNode=null),this.mediaStream&&(this.mediaStream.getTracks().forEach(e=>e.stop()),this.mediaStream=null),this.audioContext&&(this.audioContext.close(),this.audioContext=null),this.ws&&(this.ws.readyState===WebSocket.OPEN&&this.ws.close(),this.ws=null)}emit(e){this.config.onEvent&&this.config.onEvent(e)}renderUI(){if(!this.config.container)return;const e=typeof this.config.container=="string"?document.querySelector(this.config.container):this.config.container;e&&(e.innerHTML=`
133
+ `;class f{config;unityUrl;active=!1;serverConfig=null;ws=null;audioContext=null;mediaStream=null;captureWorkletNode=null;playbackWorkletNode=null;isMuted=!1;outputSampleRate=24e3;inputSampleRate=48e3;isAgentSpeaking=!1;agentAudioDoneReceived=!1;constructor(e){this.config=e,this.unityUrl=e.unityBaseUrl||"https://api.glydeunity.com",!e.publishableKey&&!e.apiKey&&!e.authToken&&console.warn("[GlydeVoice] No authentication method provided. One of publishableKey, apiKey, or authToken is required.")}getAuthHeaders(){const e={"Content-Type":"application/json"};return this.config.publishableKey&&(e["x-publishable-key"]=this.config.publishableKey),this.config.apiKey&&(e["x-api-key"]=this.config.apiKey),this.config.authToken&&(e.Authorization=`Bearer ${this.config.authToken}`),e}async fetchConfig(){const e=`${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`,t=this.config.contextId?`${e}/${this.config.contextId}`:e,i=await fetch(t,{method:"GET",headers:this.getAuthHeaders()});if(!i.ok){const o=await i.json();throw new Error(o.error?.message||o.message||"Failed to fetch voice config")}const{data:a}=await i.json();return a}async start(){if(!this.active){this.active=!0;try{this.config.systemPrompt||(this.serverConfig=await this.fetchConfig(),console.log("[GlydeVoice] Fetched config:",this.serverConfig));const e={context_id:this.config.contextId,domain:typeof window<"u"?window.location.hostname:"localhost"};this.config.systemPrompt&&(e.system_prompt=this.config.systemPrompt),this.config.deepgramConfig&&(e.deepgram_config=this.config.deepgramConfig);const t=await fetch(`${this.unityUrl}/api/unity/voice/auth`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify(e)});if(!t.ok){const s=await t.json();throw new Error(s.error?.message||s.message||"Failed to authenticate voice session")}const{data:i}=await t.json(),{token:a,agent_config:o,deepgram_config:r}=i,h=this.config.systemPrompt||o.instructions||this.serverConfig?.system_prompt||"You are a helpful AI assistant.";await this.initializeAudio();const d="wss://agent.deepgram.com/v1/agent/converse";this.ws=new WebSocket(d,["bearer",a]),this.ws.onopen=()=>{const s=this.config.deepgramConfig||r||this.serverConfig?.deepgram_config||{think:{provider:{type:"open_ai",model:"gpt-4o-mini"}},speak:{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:{provider:{type:"deepgram",model:"nova-2",version:"latest"}}},p={type:"Settings",audio:{input:{encoding:"linear16",sample_rate:this.inputSampleRate},output:{encoding:"linear16",sample_rate:this.outputSampleRate,container:"none"}},agent:{language:"en",speak:s.speak||{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:s.listen||{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}},think:{provider:s.think?.provider||{type:"open_ai",model:"gpt-4o-mini"},functions:s.think?.functions||[{name:"end_conversation",description:"End the conversation when stop phrases are detected.",parameters:{type:"object",properties:{item:{type:"string",description:"The phrase that triggered end of conversation"}},required:["item"]}}]},greeting:"Hi! I'm ready to speak with you. How can I help you today?"}};this.ws.send(JSON.stringify(p)),this.emit({type:"open",payload:{config:o,serverConfig:this.serverConfig}})};const l=h;this.ws.onmessage=s=>{if(typeof s.data=="string"){try{if(JSON.parse(s.data).type==="SettingsApplied"){const g={type:"UpdatePrompt",prompt:l};this.ws.send(JSON.stringify(g)),this.startMicrophone()}}catch{}this.handleTextMessage(s.data)}else s.data instanceof Blob?this.handleAudioData(s.data):s.data instanceof ArrayBuffer&&this.handleAudioBuffer(s.data)},this.ws.onerror=s=>{console.error("[GlydeVoice] WebSocket error:",s),this.emit({type:"error",payload:s})},this.ws.onclose=()=>{this.cleanup(),this.emit({type:"close"})},this.renderUI()}catch(e){throw console.error("[GlydeVoice] Error starting session:",e),this.active=!1,this.emit({type:"error",payload:e}),e}}}createWorkletBlobUrl(e){const t=new Blob([e],{type:"application/javascript"});return URL.createObjectURL(t)}async initializeAudio(){this.audioContext=new AudioContext({sampleRate:this.inputSampleRate});const e=this.createWorkletBlobUrl(c),t=this.createWorkletBlobUrl(u);try{await Promise.all([this.audioContext.audioWorklet.addModule(e),this.audioContext.audioWorklet.addModule(t)])}finally{URL.revokeObjectURL(e),URL.revokeObjectURL(t)}this.playbackWorkletNode=new AudioWorkletNode(this.audioContext,"audio-playback-processor"),this.playbackWorkletNode.connect(this.audioContext.destination),this.playbackWorkletNode.port.onmessage=i=>{const{type:a}=i.data;(a==="cleared"||a==="bufferEmpty")&&(this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!1}))}}handleTextMessage(e){try{const t=JSON.parse(e);switch(t.type){case"Welcome":this.emit({type:"ready"});break;case"SettingsApplied":break;case"UserStartedSpeaking":this.emit({type:"user_speaking",payload:!0}),this.clearPlaybackBuffer(),this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1;break;case"UserStoppedSpeaking":this.emit({type:"user_speaking",payload:!1});break;case"ConversationText":if(t.content&&t.content.trim()){const i=t.role==="assistant"?"agent":"user";this.config.onTranscript&&this.config.onTranscript(t.content,i),this.emit({type:"transcript",payload:{text:t.content,role:i}}),this.saveTranscript(t.content,t.role)}break;case"AgentStartedSpeaking":this.isAgentSpeaking=!0,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!0});break;case"AgentAudioDone":this.agentAudioDoneReceived=!0;break;case"Error":console.error("[GlydeVoice] Agent error:",t),this.emit({type:"error",payload:t});break}}catch(t){console.error("[GlydeVoice] Failed to parse message:",t)}}async handleAudioData(e){const t=await e.arrayBuffer();this.handleAudioBuffer(t)}handleAudioBuffer(e){if(!this.playbackWorkletNode||!this.audioContext)return;this.audioContext.state==="suspended"&&this.audioContext.resume();const t=e.byteLength;if(t===0)return;const i=t-t%2;if(i===0)return;const a=i===t?e:e.slice(0,i),o=new Int16Array(a),r=new Float32Array(o.length);for(let l=0;l<o.length;l++)r[l]=o[l]/32768;const h=this.resample24kTo48k(r);!this.isAgentSpeaking&&!this.agentAudioDoneReceived&&(this.isAgentSpeaking=!0,this.emit({type:"agent_speaking",payload:!0}));const d=new Float32Array(h);this.playbackWorkletNode.port.postMessage({type:"audio",data:d},[d.buffer])}resample24kTo48k(e){const t=e.length*2,i=new Float32Array(t);for(let o=0;o<e.length-1;o++){const r=e[o],h=e[o+1];i[o*2]=r,i[o*2+1]=(r+h)/2}const a=e.length-1;return i[a*2]=e[a],i[a*2+1]=e[a],i}clearPlaybackBuffer(){this.playbackWorkletNode&&this.playbackWorkletNode.port.postMessage({type:"clear"})}async startMicrophone(){if(!this.audioContext)throw new Error("Audio context not initialized");try{this.mediaStream=await navigator.mediaDevices.getUserMedia({audio:{channelCount:1,sampleRate:this.inputSampleRate,echoCancellation:!0,noiseSuppression:!0}});const e=this.audioContext.createMediaStreamSource(this.mediaStream);this.captureWorkletNode=new AudioWorkletNode(this.audioContext,"audio-capture-processor"),this.captureWorkletNode.port.onmessage=t=>{!this.active||!this.ws||this.ws.readyState!==WebSocket.OPEN||this.isMuted||this.ws.send(t.data)},e.connect(this.captureWorkletNode),this.emit({type:"microphone_ready"})}catch(e){throw console.error("[GlydeVoice] Microphone error:",e),e}}async saveTranscript(e,t){if(!(!this.config.contextId||!e))try{await fetch(`${this.unityUrl}/api/unity/voice/transcript`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({context_id:this.config.contextId,content:e,role:t==="assistant"?"assistant":"user"})})}catch{}}setMuted(e){this.isMuted=e}getMuted(){return this.isMuted}isActive(){return this.active}getServerConfig(){return this.serverConfig}stop(){this.active=!1,this.cleanup()}cleanup(){this.captureWorkletNode&&(this.captureWorkletNode.disconnect(),this.captureWorkletNode.port.close(),this.captureWorkletNode=null),this.playbackWorkletNode&&(this.playbackWorkletNode.disconnect(),this.playbackWorkletNode.port.close(),this.playbackWorkletNode=null),this.mediaStream&&(this.mediaStream.getTracks().forEach(e=>e.stop()),this.mediaStream=null),this.audioContext&&(this.audioContext.close(),this.audioContext=null),this.ws&&(this.ws.readyState===WebSocket.OPEN&&this.ws.close(),this.ws=null)}emit(e){this.config.onEvent&&this.config.onEvent(e)}renderUI(){if(!this.config.container)return;const e=typeof this.config.container=="string"?document.querySelector(this.config.container):this.config.container;e&&(e.innerHTML=`
134
134
  <div style="padding: 20px; border: 1px solid #ccc; border-radius: 8px; background: #fff;">
135
135
  <h3>Glyde Voice Agent</h3>
136
136
  <p>Status: Active</p>
137
137
  <p>Context: ${this.config.contextType}</p>
138
138
  <button onclick="this.closest('div').remove()">Close</button>
139
139
  </div>
140
- `)}}l.GlydeVoice=p,Object.defineProperty(l,Symbol.toStringTag,{value:"Module"})}));
140
+ `)}}n.GlydeVoice=f,Object.defineProperty(n,Symbol.toStringTag,{value:"Module"})}));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@glydeunity/voice-sdk",
3
- "version": "1.2.0",
3
+ "version": "1.2.1",
4
4
  "description": "GLYDE Voice Agent SDK - AI-powered voice interactions for web applications",
5
5
  "type": "module",
6
6
  "main": "./dist/voice-sdk.umd.js",