@glydeunity/voice-sdk 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -89,7 +89,15 @@ export declare class GlydeVoice {
89
89
  */
90
90
  start(): Promise<void>;
91
91
  /**
92
- * Initialize the audio system with both capture and playback worklets
92
+ * Create a blob URL from inline JavaScript code for AudioWorklet modules.
93
+ * This avoids CORS issues when the SDK is loaded from a different origin than the page.
94
+ * @param code - The JavaScript code to convert to a blob URL
95
+ * @returns A blob URL that can be used with audioWorklet.addModule()
96
+ */
97
+ private createWorkletBlobUrl;
98
+ /**
99
+ * Initialize the audio system with both capture and playback worklets.
100
+ * Uses inline blob URLs to avoid CORS issues when SDK is embedded in external apps.
93
101
  */
94
102
  private initializeAudio;
95
103
  /**
@@ -1,4 +1,137 @@
1
- class l {
1
+ const d = `
2
+ class AudioCaptureProcessor extends AudioWorkletProcessor {
3
+ constructor() {
4
+ super();
5
+ this.bufferSize = 4096;
6
+ this.buffer = new Float32Array(this.bufferSize);
7
+ this.bufferIndex = 0;
8
+ }
9
+
10
+ process(inputs) {
11
+ const input = inputs[0];
12
+ if (!input || !input[0]) return true;
13
+
14
+ const samples = input[0];
15
+
16
+ for (let i = 0; i < samples.length; i++) {
17
+ this.buffer[this.bufferIndex++] = samples[i];
18
+
19
+ if (this.bufferIndex >= this.bufferSize) {
20
+ const pcm16 = new Int16Array(this.bufferSize);
21
+ for (let j = 0; j < this.bufferSize; j++) {
22
+ const s = Math.max(-1, Math.min(1, this.buffer[j]));
23
+ pcm16[j] = s < 0 ? s * 0x8000 : s * 0x7FFF;
24
+ }
25
+
26
+ this.port.postMessage(pcm16.buffer, [pcm16.buffer]);
27
+ this.bufferIndex = 0;
28
+ }
29
+ }
30
+
31
+ return true;
32
+ }
33
+ }
34
+
35
+ registerProcessor('audio-capture-processor', AudioCaptureProcessor);
36
+ `, u = `
37
+ class AudioPlaybackProcessor extends AudioWorkletProcessor {
38
+ constructor() {
39
+ super();
40
+
41
+ this.bufferSize = 48000 * 15;
42
+ this.buffer = new Float32Array(this.bufferSize);
43
+ this.writeIndex = 0;
44
+ this.readIndex = 0;
45
+ this.samplesAvailable = 0;
46
+ this.isPlaying = false;
47
+
48
+ this.port.onmessage = (event) => {
49
+ const { type, data } = event.data;
50
+
51
+ switch (type) {
52
+ case 'audio':
53
+ const audioData = data instanceof Float32Array ? data : new Float32Array(data);
54
+ this.writeAudio(audioData);
55
+ break;
56
+ case 'clear':
57
+ this.clearBuffer();
58
+ break;
59
+ }
60
+ };
61
+ }
62
+
63
+ writeAudio(samples) {
64
+ if (!samples || samples.length === 0) return;
65
+
66
+ const samplesToWrite = samples.length;
67
+
68
+ if (this.samplesAvailable + samplesToWrite > this.bufferSize) {
69
+ const overflow = (this.samplesAvailable + samplesToWrite) - this.bufferSize;
70
+ this.readIndex = (this.readIndex + overflow) % this.bufferSize;
71
+ this.samplesAvailable -= overflow;
72
+ }
73
+
74
+ for (let i = 0; i < samplesToWrite; i++) {
75
+ this.buffer[this.writeIndex] = samples[i];
76
+ this.writeIndex = (this.writeIndex + 1) % this.bufferSize;
77
+ }
78
+
79
+ this.samplesAvailable += samplesToWrite;
80
+ this.isPlaying = true;
81
+ }
82
+
83
+ clearBuffer() {
84
+ this.readIndex = 0;
85
+ this.writeIndex = 0;
86
+ this.samplesAvailable = 0;
87
+ this.isPlaying = false;
88
+ this.port.postMessage({ type: 'cleared' });
89
+ }
90
+
91
+ process(inputs, outputs) {
92
+ const output = outputs[0];
93
+ if (!output || !output[0]) return true;
94
+
95
+ const outputChannel = output[0];
96
+ const samplesToRead = outputChannel.length;
97
+
98
+ if (this.samplesAvailable >= samplesToRead) {
99
+ for (let i = 0; i < samplesToRead; i++) {
100
+ outputChannel[i] = this.buffer[this.readIndex];
101
+ this.readIndex = (this.readIndex + 1) % this.bufferSize;
102
+ }
103
+ this.samplesAvailable -= samplesToRead;
104
+ } else if (this.samplesAvailable > 0) {
105
+ let i = 0;
106
+ while (this.samplesAvailable > 0 && i < samplesToRead) {
107
+ outputChannel[i] = this.buffer[this.readIndex];
108
+ this.readIndex = (this.readIndex + 1) % this.bufferSize;
109
+ this.samplesAvailable--;
110
+ i++;
111
+ }
112
+ while (i < samplesToRead) {
113
+ outputChannel[i] = 0;
114
+ i++;
115
+ }
116
+
117
+ if (this.isPlaying) {
118
+ this.isPlaying = false;
119
+ this.port.postMessage({ type: 'bufferEmpty' });
120
+ }
121
+ } else {
122
+ for (let i = 0; i < samplesToRead; i++) {
123
+ outputChannel[i] = 0;
124
+ }
125
+ this.isPlaying = false;
126
+ }
127
+
128
+ return true;
129
+ }
130
+ }
131
+
132
+ registerProcessor('audio-playback-processor', AudioPlaybackProcessor);
133
+ `;
134
+ class g {
2
135
  config;
3
136
  unityUrl;
4
137
  active = !1;
@@ -41,16 +174,16 @@ class l {
41
174
  * @returns Voice configuration including system prompt, tools, and Deepgram settings
42
175
  */
43
176
  async fetchConfig() {
44
- const e = `${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`, t = this.config.contextId ? `${e}/${this.config.contextId}` : e, o = await fetch(t, {
177
+ const e = `${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`, t = this.config.contextId ? `${e}/${this.config.contextId}` : e, i = await fetch(t, {
45
178
  method: "GET",
46
179
  headers: this.getAuthHeaders()
47
180
  });
48
- if (!o.ok) {
49
- const a = await o.json();
181
+ if (!i.ok) {
182
+ const a = await i.json();
50
183
  throw new Error(a.error?.message || a.message || "Failed to fetch voice config");
51
184
  }
52
- const { data: s } = await o.json();
53
- return s;
185
+ const { data: o } = await i.json();
186
+ return o;
54
187
  }
55
188
  /**
56
189
  * Initialize and start the voice session
@@ -60,27 +193,29 @@ class l {
60
193
  this.active = !0;
61
194
  try {
62
195
  this.config.systemPrompt || (this.serverConfig = await this.fetchConfig(), console.log("[GlydeVoice] Fetched config:", this.serverConfig));
63
- const e = await fetch(`${this.unityUrl}/api/unity/voice/auth`, {
196
+ const e = {
197
+ context_id: this.config.contextId,
198
+ domain: typeof window < "u" ? window.location.hostname : "localhost"
199
+ };
200
+ this.config.systemPrompt && (e.system_prompt = this.config.systemPrompt), this.config.deepgramConfig && (e.deepgram_config = this.config.deepgramConfig);
201
+ const t = await fetch(`${this.unityUrl}/api/unity/voice/auth`, {
64
202
  method: "POST",
65
203
  headers: this.getAuthHeaders(),
66
- body: JSON.stringify({
67
- context_id: this.config.contextId,
68
- domain: typeof window < "u" ? window.location.hostname : "localhost"
69
- })
204
+ body: JSON.stringify(e)
70
205
  });
71
- if (!e.ok) {
72
- const i = await e.json();
73
- throw new Error(i.error?.message || i.message || "Failed to authenticate voice session");
206
+ if (!t.ok) {
207
+ const s = await t.json();
208
+ throw new Error(s.error?.message || s.message || "Failed to authenticate voice session");
74
209
  }
75
- const { data: t } = await e.json(), { token: o, agent_config: s } = t, a = this.config.systemPrompt || this.serverConfig?.system_prompt || s.instructions || "You are a helpful AI assistant.";
210
+ const { data: i } = await t.json(), { token: o, agent_config: a, deepgram_config: r } = i, l = this.config.systemPrompt || a.instructions || this.serverConfig?.system_prompt || "You are a helpful AI assistant.";
76
211
  await this.initializeAudio();
77
- const n = "wss://agent.deepgram.com/v1/agent/converse";
78
- this.ws = new WebSocket(n, ["bearer", o]), this.ws.onopen = () => {
79
- const i = this.config.deepgramConfig || this.serverConfig?.deepgram_config || {
212
+ const c = "wss://agent.deepgram.com/v1/agent/converse";
213
+ this.ws = new WebSocket(c, ["bearer", o]), this.ws.onopen = () => {
214
+ const s = this.config.deepgramConfig || r || this.serverConfig?.deepgram_config || {
80
215
  think: { provider: { type: "open_ai", model: "gpt-4o-mini" } },
81
216
  speak: { provider: { type: "deepgram", model: "aura-2-thalia-en" } },
82
217
  listen: { provider: { type: "deepgram", model: "nova-2", version: "latest" } }
83
- }, r = {
218
+ }, h = {
84
219
  type: "Settings",
85
220
  audio: {
86
221
  input: {
@@ -95,15 +230,15 @@ class l {
95
230
  },
96
231
  agent: {
97
232
  language: "en",
98
- speak: i.speak || {
233
+ speak: s.speak || {
99
234
  provider: { type: "deepgram", model: "aura-2-thalia-en" }
100
235
  },
101
- listen: i.listen || {
236
+ listen: s.listen || {
102
237
  provider: { type: "deepgram", version: "v2", model: "flux-general-en" }
103
238
  },
104
239
  think: {
105
- provider: i.think?.provider || { type: "open_ai", model: "gpt-4o-mini" },
106
- functions: i.think?.functions || [
240
+ provider: s.think?.provider || { type: "open_ai", model: "gpt-4o-mini" },
241
+ functions: s.think?.functions || [
107
242
  {
108
243
  name: "end_conversation",
109
244
  description: "End the conversation when stop phrases are detected.",
@@ -120,25 +255,25 @@ class l {
120
255
  greeting: "Hi! I'm ready to speak with you. How can I help you today?"
121
256
  }
122
257
  };
123
- this.ws.send(JSON.stringify(r)), this.emit({ type: "open", payload: { config: s, serverConfig: this.serverConfig } });
258
+ this.ws.send(JSON.stringify(h)), this.emit({ type: "open", payload: { config: a, serverConfig: this.serverConfig } });
124
259
  };
125
- const c = a;
126
- this.ws.onmessage = (i) => {
127
- if (typeof i.data == "string") {
260
+ const n = l;
261
+ this.ws.onmessage = (s) => {
262
+ if (typeof s.data == "string") {
128
263
  try {
129
- if (JSON.parse(i.data).type === "SettingsApplied") {
130
- const d = {
264
+ if (JSON.parse(s.data).type === "SettingsApplied") {
265
+ const p = {
131
266
  type: "UpdatePrompt",
132
- prompt: c
267
+ prompt: n
133
268
  };
134
- this.ws.send(JSON.stringify(d)), this.startMicrophone();
269
+ this.ws.send(JSON.stringify(p)), this.startMicrophone();
135
270
  }
136
271
  } catch {
137
272
  }
138
- this.handleTextMessage(i.data);
139
- } else i.data instanceof Blob ? this.handleAudioData(i.data) : i.data instanceof ArrayBuffer && this.handleAudioBuffer(i.data);
140
- }, this.ws.onerror = (i) => {
141
- console.error("[GlydeVoice] WebSocket error:", i), this.emit({ type: "error", payload: i });
273
+ this.handleTextMessage(s.data);
274
+ } else s.data instanceof Blob ? this.handleAudioData(s.data) : s.data instanceof ArrayBuffer && this.handleAudioBuffer(s.data);
275
+ }, this.ws.onerror = (s) => {
276
+ console.error("[GlydeVoice] WebSocket error:", s), this.emit({ type: "error", payload: s });
142
277
  }, this.ws.onclose = () => {
143
278
  this.cleanup(), this.emit({ type: "close" });
144
279
  }, this.renderUI();
@@ -148,15 +283,33 @@ class l {
148
283
  }
149
284
  }
150
285
  /**
151
- * Initialize the audio system with both capture and playback worklets
286
+ * Create a blob URL from inline JavaScript code for AudioWorklet modules.
287
+ * This avoids CORS issues when the SDK is loaded from a different origin than the page.
288
+ * @param code - The JavaScript code to convert to a blob URL
289
+ * @returns A blob URL that can be used with audioWorklet.addModule()
290
+ */
291
+ createWorkletBlobUrl(e) {
292
+ const t = new Blob([e], { type: "application/javascript" });
293
+ return URL.createObjectURL(t);
294
+ }
295
+ /**
296
+ * Initialize the audio system with both capture and playback worklets.
297
+ * Uses inline blob URLs to avoid CORS issues when SDK is embedded in external apps.
152
298
  */
153
299
  async initializeAudio() {
154
- this.audioContext = new AudioContext({ sampleRate: this.inputSampleRate }), await Promise.all([
155
- this.audioContext.audioWorklet.addModule("/audio-processor.js"),
156
- this.audioContext.audioWorklet.addModule("/audio-playback-processor.js")
157
- ]), this.playbackWorkletNode = new AudioWorkletNode(this.audioContext, "audio-playback-processor"), this.playbackWorkletNode.connect(this.audioContext.destination), this.playbackWorkletNode.port.onmessage = (e) => {
158
- const { type: t } = e.data;
159
- (t === "cleared" || t === "bufferEmpty") && (this.isAgentSpeaking = !1, this.agentAudioDoneReceived = !1, this.emit({ type: "agent_speaking", payload: !1 }));
300
+ this.audioContext = new AudioContext({ sampleRate: this.inputSampleRate });
301
+ const e = this.createWorkletBlobUrl(d), t = this.createWorkletBlobUrl(u);
302
+ try {
303
+ await Promise.all([
304
+ this.audioContext.audioWorklet.addModule(e),
305
+ this.audioContext.audioWorklet.addModule(t)
306
+ ]);
307
+ } finally {
308
+ URL.revokeObjectURL(e), URL.revokeObjectURL(t);
309
+ }
310
+ this.playbackWorkletNode = new AudioWorkletNode(this.audioContext, "audio-playback-processor"), this.playbackWorkletNode.connect(this.audioContext.destination), this.playbackWorkletNode.port.onmessage = (i) => {
311
+ const { type: o } = i.data;
312
+ (o === "cleared" || o === "bufferEmpty") && (this.isAgentSpeaking = !1, this.agentAudioDoneReceived = !1, this.emit({ type: "agent_speaking", payload: !1 }));
160
313
  };
161
314
  }
162
315
  /**
@@ -179,8 +332,8 @@ class l {
179
332
  break;
180
333
  case "ConversationText":
181
334
  if (t.content && t.content.trim()) {
182
- const o = t.role === "assistant" ? "agent" : "user";
183
- this.config.onTranscript && this.config.onTranscript(t.content, o), this.emit({ type: "transcript", payload: { text: t.content, role: o } }), this.saveTranscript(t.content, t.role);
335
+ const i = t.role === "assistant" ? "agent" : "user";
336
+ this.config.onTranscript && this.config.onTranscript(t.content, i), this.emit({ type: "transcript", payload: { text: t.content, role: i } }), this.saveTranscript(t.content, t.role);
184
337
  }
185
338
  break;
186
339
  case "AgentStartedSpeaking":
@@ -213,30 +366,30 @@ class l {
213
366
  this.audioContext.state === "suspended" && this.audioContext.resume();
214
367
  const t = e.byteLength;
215
368
  if (t === 0) return;
216
- const o = t - t % 2;
217
- if (o === 0) return;
218
- const s = o === t ? e : e.slice(0, o), a = new Int16Array(s), n = new Float32Array(a.length);
219
- for (let r = 0; r < a.length; r++)
220
- n[r] = a[r] / 32768;
221
- const c = this.resample24kTo48k(n);
369
+ const i = t - t % 2;
370
+ if (i === 0) return;
371
+ const o = i === t ? e : e.slice(0, i), a = new Int16Array(o), r = new Float32Array(a.length);
372
+ for (let n = 0; n < a.length; n++)
373
+ r[n] = a[n] / 32768;
374
+ const l = this.resample24kTo48k(r);
222
375
  !this.isAgentSpeaking && !this.agentAudioDoneReceived && (this.isAgentSpeaking = !0, this.emit({ type: "agent_speaking", payload: !0 }));
223
- const i = new Float32Array(c);
376
+ const c = new Float32Array(l);
224
377
  this.playbackWorkletNode.port.postMessage({
225
378
  type: "audio",
226
- data: i
227
- }, [i.buffer]);
379
+ data: c
380
+ }, [c.buffer]);
228
381
  }
229
382
  /**
230
383
  * Resample audio from 24kHz to 48kHz using linear interpolation
231
384
  */
232
385
  resample24kTo48k(e) {
233
- const t = e.length * 2, o = new Float32Array(t);
386
+ const t = e.length * 2, i = new Float32Array(t);
234
387
  for (let a = 0; a < e.length - 1; a++) {
235
- const n = e[a], c = e[a + 1];
236
- o[a * 2] = n, o[a * 2 + 1] = (n + c) / 2;
388
+ const r = e[a], l = e[a + 1];
389
+ i[a * 2] = r, i[a * 2 + 1] = (r + l) / 2;
237
390
  }
238
- const s = e.length - 1;
239
- return o[s * 2] = e[s], o[s * 2 + 1] = e[s], o;
391
+ const o = e.length - 1;
392
+ return i[o * 2] = e[o], i[o * 2 + 1] = e[o], i;
240
393
  }
241
394
  /**
242
395
  * Clear the playback buffer (for interruption handling)
@@ -345,5 +498,5 @@ class l {
345
498
  }
346
499
  }
347
500
  export {
348
- l as GlydeVoice
501
+ g as GlydeVoice
349
502
  };
@@ -1,8 +1,140 @@
1
- (function(c,d){typeof exports=="object"&&typeof module<"u"?d(exports):typeof define=="function"&&define.amd?define(["exports"],d):(c=typeof globalThis<"u"?globalThis:c||self,d(c.GlydeVoice={}))})(this,(function(c){"use strict";class d{config;unityUrl;active=!1;serverConfig=null;ws=null;audioContext=null;mediaStream=null;captureWorkletNode=null;playbackWorkletNode=null;isMuted=!1;outputSampleRate=24e3;inputSampleRate=48e3;isAgentSpeaking=!1;agentAudioDoneReceived=!1;constructor(e){this.config=e,this.unityUrl=e.unityBaseUrl||"https://api.glydeunity.com",!e.publishableKey&&!e.apiKey&&!e.authToken&&console.warn("[GlydeVoice] No authentication method provided. One of publishableKey, apiKey, or authToken is required.")}getAuthHeaders(){const e={"Content-Type":"application/json"};return this.config.publishableKey&&(e["x-publishable-key"]=this.config.publishableKey),this.config.apiKey&&(e["x-api-key"]=this.config.apiKey),this.config.authToken&&(e.Authorization=`Bearer ${this.config.authToken}`),e}async fetchConfig(){const e=`${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`,t=this.config.contextId?`${e}/${this.config.contextId}`:e,o=await fetch(t,{method:"GET",headers:this.getAuthHeaders()});if(!o.ok){const s=await o.json();throw new Error(s.error?.message||s.message||"Failed to fetch voice config")}const{data:a}=await o.json();return a}async start(){if(!this.active){this.active=!0;try{this.config.systemPrompt||(this.serverConfig=await this.fetchConfig(),console.log("[GlydeVoice] Fetched config:",this.serverConfig));const e=await fetch(`${this.unityUrl}/api/unity/voice/auth`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({context_id:this.config.contextId,domain:typeof window<"u"?window.location.hostname:"localhost"})});if(!e.ok){const i=await e.json();throw new Error(i.error?.message||i.message||"Failed to authenticate voice session")}const{data:t}=await e.json(),{token:o,agent_config:a}=t,s=this.config.systemPrompt||this.serverConfig?.system_prompt||a.instructions||"You are a helpful AI assistant.";await this.initializeAudio();const n="wss://agent.deepgram.com/v1/agent/converse";this.ws=new WebSocket(n,["bearer",o]),this.ws.onopen=()=>{const i=this.config.deepgramConfig||this.serverConfig?.deepgram_config||{think:{provider:{type:"open_ai",model:"gpt-4o-mini"}},speak:{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:{provider:{type:"deepgram",model:"nova-2",version:"latest"}}},r={type:"Settings",audio:{input:{encoding:"linear16",sample_rate:this.inputSampleRate},output:{encoding:"linear16",sample_rate:this.outputSampleRate,container:"none"}},agent:{language:"en",speak:i.speak||{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:i.listen||{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}},think:{provider:i.think?.provider||{type:"open_ai",model:"gpt-4o-mini"},functions:i.think?.functions||[{name:"end_conversation",description:"End the conversation when stop phrases are detected.",parameters:{type:"object",properties:{item:{type:"string",description:"The phrase that triggered end of conversation"}},required:["item"]}}]},greeting:"Hi! I'm ready to speak with you. How can I help you today?"}};this.ws.send(JSON.stringify(r)),this.emit({type:"open",payload:{config:a,serverConfig:this.serverConfig}})};const l=s;this.ws.onmessage=i=>{if(typeof i.data=="string"){try{if(JSON.parse(i.data).type==="SettingsApplied"){const h={type:"UpdatePrompt",prompt:l};this.ws.send(JSON.stringify(h)),this.startMicrophone()}}catch{}this.handleTextMessage(i.data)}else i.data instanceof Blob?this.handleAudioData(i.data):i.data instanceof ArrayBuffer&&this.handleAudioBuffer(i.data)},this.ws.onerror=i=>{console.error("[GlydeVoice] WebSocket error:",i),this.emit({type:"error",payload:i})},this.ws.onclose=()=>{this.cleanup(),this.emit({type:"close"})},this.renderUI()}catch(e){throw console.error("[GlydeVoice] Error starting session:",e),this.active=!1,this.emit({type:"error",payload:e}),e}}}async initializeAudio(){this.audioContext=new AudioContext({sampleRate:this.inputSampleRate}),await Promise.all([this.audioContext.audioWorklet.addModule("/audio-processor.js"),this.audioContext.audioWorklet.addModule("/audio-playback-processor.js")]),this.playbackWorkletNode=new AudioWorkletNode(this.audioContext,"audio-playback-processor"),this.playbackWorkletNode.connect(this.audioContext.destination),this.playbackWorkletNode.port.onmessage=e=>{const{type:t}=e.data;(t==="cleared"||t==="bufferEmpty")&&(this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!1}))}}handleTextMessage(e){try{const t=JSON.parse(e);switch(t.type){case"Welcome":this.emit({type:"ready"});break;case"SettingsApplied":break;case"UserStartedSpeaking":this.emit({type:"user_speaking",payload:!0}),this.clearPlaybackBuffer(),this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1;break;case"UserStoppedSpeaking":this.emit({type:"user_speaking",payload:!1});break;case"ConversationText":if(t.content&&t.content.trim()){const o=t.role==="assistant"?"agent":"user";this.config.onTranscript&&this.config.onTranscript(t.content,o),this.emit({type:"transcript",payload:{text:t.content,role:o}}),this.saveTranscript(t.content,t.role)}break;case"AgentStartedSpeaking":this.isAgentSpeaking=!0,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!0});break;case"AgentAudioDone":this.agentAudioDoneReceived=!0;break;case"Error":console.error("[GlydeVoice] Agent error:",t),this.emit({type:"error",payload:t});break}}catch(t){console.error("[GlydeVoice] Failed to parse message:",t)}}async handleAudioData(e){const t=await e.arrayBuffer();this.handleAudioBuffer(t)}handleAudioBuffer(e){if(!this.playbackWorkletNode||!this.audioContext)return;this.audioContext.state==="suspended"&&this.audioContext.resume();const t=e.byteLength;if(t===0)return;const o=t-t%2;if(o===0)return;const a=o===t?e:e.slice(0,o),s=new Int16Array(a),n=new Float32Array(s.length);for(let r=0;r<s.length;r++)n[r]=s[r]/32768;const l=this.resample24kTo48k(n);!this.isAgentSpeaking&&!this.agentAudioDoneReceived&&(this.isAgentSpeaking=!0,this.emit({type:"agent_speaking",payload:!0}));const i=new Float32Array(l);this.playbackWorkletNode.port.postMessage({type:"audio",data:i},[i.buffer])}resample24kTo48k(e){const t=e.length*2,o=new Float32Array(t);for(let s=0;s<e.length-1;s++){const n=e[s],l=e[s+1];o[s*2]=n,o[s*2+1]=(n+l)/2}const a=e.length-1;return o[a*2]=e[a],o[a*2+1]=e[a],o}clearPlaybackBuffer(){this.playbackWorkletNode&&this.playbackWorkletNode.port.postMessage({type:"clear"})}async startMicrophone(){if(!this.audioContext)throw new Error("Audio context not initialized");try{this.mediaStream=await navigator.mediaDevices.getUserMedia({audio:{channelCount:1,sampleRate:this.inputSampleRate,echoCancellation:!0,noiseSuppression:!0}});const e=this.audioContext.createMediaStreamSource(this.mediaStream);this.captureWorkletNode=new AudioWorkletNode(this.audioContext,"audio-capture-processor"),this.captureWorkletNode.port.onmessage=t=>{!this.active||!this.ws||this.ws.readyState!==WebSocket.OPEN||this.isMuted||this.ws.send(t.data)},e.connect(this.captureWorkletNode),this.emit({type:"microphone_ready"})}catch(e){throw console.error("[GlydeVoice] Microphone error:",e),e}}async saveTranscript(e,t){if(!(!this.config.contextId||!e))try{await fetch(`${this.unityUrl}/api/unity/voice/transcript`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({context_id:this.config.contextId,content:e,role:t==="assistant"?"assistant":"user"})})}catch{}}setMuted(e){this.isMuted=e}getMuted(){return this.isMuted}isActive(){return this.active}getServerConfig(){return this.serverConfig}stop(){this.active=!1,this.cleanup()}cleanup(){this.captureWorkletNode&&(this.captureWorkletNode.disconnect(),this.captureWorkletNode.port.close(),this.captureWorkletNode=null),this.playbackWorkletNode&&(this.playbackWorkletNode.disconnect(),this.playbackWorkletNode.port.close(),this.playbackWorkletNode=null),this.mediaStream&&(this.mediaStream.getTracks().forEach(e=>e.stop()),this.mediaStream=null),this.audioContext&&(this.audioContext.close(),this.audioContext=null),this.ws&&(this.ws.readyState===WebSocket.OPEN&&this.ws.close(),this.ws=null)}emit(e){this.config.onEvent&&this.config.onEvent(e)}renderUI(){if(!this.config.container)return;const e=typeof this.config.container=="string"?document.querySelector(this.config.container):this.config.container;e&&(e.innerHTML=`
1
+ (function(n,c){typeof exports=="object"&&typeof module<"u"?c(exports):typeof define=="function"&&define.amd?define(["exports"],c):(n=typeof globalThis<"u"?globalThis:n||self,c(n.GlydeVoice={}))})(this,(function(n){"use strict";const c=`
2
+ class AudioCaptureProcessor extends AudioWorkletProcessor {
3
+ constructor() {
4
+ super();
5
+ this.bufferSize = 4096;
6
+ this.buffer = new Float32Array(this.bufferSize);
7
+ this.bufferIndex = 0;
8
+ }
9
+
10
+ process(inputs) {
11
+ const input = inputs[0];
12
+ if (!input || !input[0]) return true;
13
+
14
+ const samples = input[0];
15
+
16
+ for (let i = 0; i < samples.length; i++) {
17
+ this.buffer[this.bufferIndex++] = samples[i];
18
+
19
+ if (this.bufferIndex >= this.bufferSize) {
20
+ const pcm16 = new Int16Array(this.bufferSize);
21
+ for (let j = 0; j < this.bufferSize; j++) {
22
+ const s = Math.max(-1, Math.min(1, this.buffer[j]));
23
+ pcm16[j] = s < 0 ? s * 0x8000 : s * 0x7FFF;
24
+ }
25
+
26
+ this.port.postMessage(pcm16.buffer, [pcm16.buffer]);
27
+ this.bufferIndex = 0;
28
+ }
29
+ }
30
+
31
+ return true;
32
+ }
33
+ }
34
+
35
+ registerProcessor('audio-capture-processor', AudioCaptureProcessor);
36
+ `,u=`
37
+ class AudioPlaybackProcessor extends AudioWorkletProcessor {
38
+ constructor() {
39
+ super();
40
+
41
+ this.bufferSize = 48000 * 15;
42
+ this.buffer = new Float32Array(this.bufferSize);
43
+ this.writeIndex = 0;
44
+ this.readIndex = 0;
45
+ this.samplesAvailable = 0;
46
+ this.isPlaying = false;
47
+
48
+ this.port.onmessage = (event) => {
49
+ const { type, data } = event.data;
50
+
51
+ switch (type) {
52
+ case 'audio':
53
+ const audioData = data instanceof Float32Array ? data : new Float32Array(data);
54
+ this.writeAudio(audioData);
55
+ break;
56
+ case 'clear':
57
+ this.clearBuffer();
58
+ break;
59
+ }
60
+ };
61
+ }
62
+
63
+ writeAudio(samples) {
64
+ if (!samples || samples.length === 0) return;
65
+
66
+ const samplesToWrite = samples.length;
67
+
68
+ if (this.samplesAvailable + samplesToWrite > this.bufferSize) {
69
+ const overflow = (this.samplesAvailable + samplesToWrite) - this.bufferSize;
70
+ this.readIndex = (this.readIndex + overflow) % this.bufferSize;
71
+ this.samplesAvailable -= overflow;
72
+ }
73
+
74
+ for (let i = 0; i < samplesToWrite; i++) {
75
+ this.buffer[this.writeIndex] = samples[i];
76
+ this.writeIndex = (this.writeIndex + 1) % this.bufferSize;
77
+ }
78
+
79
+ this.samplesAvailable += samplesToWrite;
80
+ this.isPlaying = true;
81
+ }
82
+
83
+ clearBuffer() {
84
+ this.readIndex = 0;
85
+ this.writeIndex = 0;
86
+ this.samplesAvailable = 0;
87
+ this.isPlaying = false;
88
+ this.port.postMessage({ type: 'cleared' });
89
+ }
90
+
91
+ process(inputs, outputs) {
92
+ const output = outputs[0];
93
+ if (!output || !output[0]) return true;
94
+
95
+ const outputChannel = output[0];
96
+ const samplesToRead = outputChannel.length;
97
+
98
+ if (this.samplesAvailable >= samplesToRead) {
99
+ for (let i = 0; i < samplesToRead; i++) {
100
+ outputChannel[i] = this.buffer[this.readIndex];
101
+ this.readIndex = (this.readIndex + 1) % this.bufferSize;
102
+ }
103
+ this.samplesAvailable -= samplesToRead;
104
+ } else if (this.samplesAvailable > 0) {
105
+ let i = 0;
106
+ while (this.samplesAvailable > 0 && i < samplesToRead) {
107
+ outputChannel[i] = this.buffer[this.readIndex];
108
+ this.readIndex = (this.readIndex + 1) % this.bufferSize;
109
+ this.samplesAvailable--;
110
+ i++;
111
+ }
112
+ while (i < samplesToRead) {
113
+ outputChannel[i] = 0;
114
+ i++;
115
+ }
116
+
117
+ if (this.isPlaying) {
118
+ this.isPlaying = false;
119
+ this.port.postMessage({ type: 'bufferEmpty' });
120
+ }
121
+ } else {
122
+ for (let i = 0; i < samplesToRead; i++) {
123
+ outputChannel[i] = 0;
124
+ }
125
+ this.isPlaying = false;
126
+ }
127
+
128
+ return true;
129
+ }
130
+ }
131
+
132
+ registerProcessor('audio-playback-processor', AudioPlaybackProcessor);
133
+ `;class f{config;unityUrl;active=!1;serverConfig=null;ws=null;audioContext=null;mediaStream=null;captureWorkletNode=null;playbackWorkletNode=null;isMuted=!1;outputSampleRate=24e3;inputSampleRate=48e3;isAgentSpeaking=!1;agentAudioDoneReceived=!1;constructor(e){this.config=e,this.unityUrl=e.unityBaseUrl||"https://api.glydeunity.com",!e.publishableKey&&!e.apiKey&&!e.authToken&&console.warn("[GlydeVoice] No authentication method provided. One of publishableKey, apiKey, or authToken is required.")}getAuthHeaders(){const e={"Content-Type":"application/json"};return this.config.publishableKey&&(e["x-publishable-key"]=this.config.publishableKey),this.config.apiKey&&(e["x-api-key"]=this.config.apiKey),this.config.authToken&&(e.Authorization=`Bearer ${this.config.authToken}`),e}async fetchConfig(){const e=`${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`,t=this.config.contextId?`${e}/${this.config.contextId}`:e,i=await fetch(t,{method:"GET",headers:this.getAuthHeaders()});if(!i.ok){const o=await i.json();throw new Error(o.error?.message||o.message||"Failed to fetch voice config")}const{data:a}=await i.json();return a}async start(){if(!this.active){this.active=!0;try{this.config.systemPrompt||(this.serverConfig=await this.fetchConfig(),console.log("[GlydeVoice] Fetched config:",this.serverConfig));const e={context_id:this.config.contextId,domain:typeof window<"u"?window.location.hostname:"localhost"};this.config.systemPrompt&&(e.system_prompt=this.config.systemPrompt),this.config.deepgramConfig&&(e.deepgram_config=this.config.deepgramConfig);const t=await fetch(`${this.unityUrl}/api/unity/voice/auth`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify(e)});if(!t.ok){const s=await t.json();throw new Error(s.error?.message||s.message||"Failed to authenticate voice session")}const{data:i}=await t.json(),{token:a,agent_config:o,deepgram_config:r}=i,h=this.config.systemPrompt||o.instructions||this.serverConfig?.system_prompt||"You are a helpful AI assistant.";await this.initializeAudio();const d="wss://agent.deepgram.com/v1/agent/converse";this.ws=new WebSocket(d,["bearer",a]),this.ws.onopen=()=>{const s=this.config.deepgramConfig||r||this.serverConfig?.deepgram_config||{think:{provider:{type:"open_ai",model:"gpt-4o-mini"}},speak:{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:{provider:{type:"deepgram",model:"nova-2",version:"latest"}}},p={type:"Settings",audio:{input:{encoding:"linear16",sample_rate:this.inputSampleRate},output:{encoding:"linear16",sample_rate:this.outputSampleRate,container:"none"}},agent:{language:"en",speak:s.speak||{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:s.listen||{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}},think:{provider:s.think?.provider||{type:"open_ai",model:"gpt-4o-mini"},functions:s.think?.functions||[{name:"end_conversation",description:"End the conversation when stop phrases are detected.",parameters:{type:"object",properties:{item:{type:"string",description:"The phrase that triggered end of conversation"}},required:["item"]}}]},greeting:"Hi! I'm ready to speak with you. How can I help you today?"}};this.ws.send(JSON.stringify(p)),this.emit({type:"open",payload:{config:o,serverConfig:this.serverConfig}})};const l=h;this.ws.onmessage=s=>{if(typeof s.data=="string"){try{if(JSON.parse(s.data).type==="SettingsApplied"){const g={type:"UpdatePrompt",prompt:l};this.ws.send(JSON.stringify(g)),this.startMicrophone()}}catch{}this.handleTextMessage(s.data)}else s.data instanceof Blob?this.handleAudioData(s.data):s.data instanceof ArrayBuffer&&this.handleAudioBuffer(s.data)},this.ws.onerror=s=>{console.error("[GlydeVoice] WebSocket error:",s),this.emit({type:"error",payload:s})},this.ws.onclose=()=>{this.cleanup(),this.emit({type:"close"})},this.renderUI()}catch(e){throw console.error("[GlydeVoice] Error starting session:",e),this.active=!1,this.emit({type:"error",payload:e}),e}}}createWorkletBlobUrl(e){const t=new Blob([e],{type:"application/javascript"});return URL.createObjectURL(t)}async initializeAudio(){this.audioContext=new AudioContext({sampleRate:this.inputSampleRate});const e=this.createWorkletBlobUrl(c),t=this.createWorkletBlobUrl(u);try{await Promise.all([this.audioContext.audioWorklet.addModule(e),this.audioContext.audioWorklet.addModule(t)])}finally{URL.revokeObjectURL(e),URL.revokeObjectURL(t)}this.playbackWorkletNode=new AudioWorkletNode(this.audioContext,"audio-playback-processor"),this.playbackWorkletNode.connect(this.audioContext.destination),this.playbackWorkletNode.port.onmessage=i=>{const{type:a}=i.data;(a==="cleared"||a==="bufferEmpty")&&(this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!1}))}}handleTextMessage(e){try{const t=JSON.parse(e);switch(t.type){case"Welcome":this.emit({type:"ready"});break;case"SettingsApplied":break;case"UserStartedSpeaking":this.emit({type:"user_speaking",payload:!0}),this.clearPlaybackBuffer(),this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1;break;case"UserStoppedSpeaking":this.emit({type:"user_speaking",payload:!1});break;case"ConversationText":if(t.content&&t.content.trim()){const i=t.role==="assistant"?"agent":"user";this.config.onTranscript&&this.config.onTranscript(t.content,i),this.emit({type:"transcript",payload:{text:t.content,role:i}}),this.saveTranscript(t.content,t.role)}break;case"AgentStartedSpeaking":this.isAgentSpeaking=!0,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!0});break;case"AgentAudioDone":this.agentAudioDoneReceived=!0;break;case"Error":console.error("[GlydeVoice] Agent error:",t),this.emit({type:"error",payload:t});break}}catch(t){console.error("[GlydeVoice] Failed to parse message:",t)}}async handleAudioData(e){const t=await e.arrayBuffer();this.handleAudioBuffer(t)}handleAudioBuffer(e){if(!this.playbackWorkletNode||!this.audioContext)return;this.audioContext.state==="suspended"&&this.audioContext.resume();const t=e.byteLength;if(t===0)return;const i=t-t%2;if(i===0)return;const a=i===t?e:e.slice(0,i),o=new Int16Array(a),r=new Float32Array(o.length);for(let l=0;l<o.length;l++)r[l]=o[l]/32768;const h=this.resample24kTo48k(r);!this.isAgentSpeaking&&!this.agentAudioDoneReceived&&(this.isAgentSpeaking=!0,this.emit({type:"agent_speaking",payload:!0}));const d=new Float32Array(h);this.playbackWorkletNode.port.postMessage({type:"audio",data:d},[d.buffer])}resample24kTo48k(e){const t=e.length*2,i=new Float32Array(t);for(let o=0;o<e.length-1;o++){const r=e[o],h=e[o+1];i[o*2]=r,i[o*2+1]=(r+h)/2}const a=e.length-1;return i[a*2]=e[a],i[a*2+1]=e[a],i}clearPlaybackBuffer(){this.playbackWorkletNode&&this.playbackWorkletNode.port.postMessage({type:"clear"})}async startMicrophone(){if(!this.audioContext)throw new Error("Audio context not initialized");try{this.mediaStream=await navigator.mediaDevices.getUserMedia({audio:{channelCount:1,sampleRate:this.inputSampleRate,echoCancellation:!0,noiseSuppression:!0}});const e=this.audioContext.createMediaStreamSource(this.mediaStream);this.captureWorkletNode=new AudioWorkletNode(this.audioContext,"audio-capture-processor"),this.captureWorkletNode.port.onmessage=t=>{!this.active||!this.ws||this.ws.readyState!==WebSocket.OPEN||this.isMuted||this.ws.send(t.data)},e.connect(this.captureWorkletNode),this.emit({type:"microphone_ready"})}catch(e){throw console.error("[GlydeVoice] Microphone error:",e),e}}async saveTranscript(e,t){if(!(!this.config.contextId||!e))try{await fetch(`${this.unityUrl}/api/unity/voice/transcript`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({context_id:this.config.contextId,content:e,role:t==="assistant"?"assistant":"user"})})}catch{}}setMuted(e){this.isMuted=e}getMuted(){return this.isMuted}isActive(){return this.active}getServerConfig(){return this.serverConfig}stop(){this.active=!1,this.cleanup()}cleanup(){this.captureWorkletNode&&(this.captureWorkletNode.disconnect(),this.captureWorkletNode.port.close(),this.captureWorkletNode=null),this.playbackWorkletNode&&(this.playbackWorkletNode.disconnect(),this.playbackWorkletNode.port.close(),this.playbackWorkletNode=null),this.mediaStream&&(this.mediaStream.getTracks().forEach(e=>e.stop()),this.mediaStream=null),this.audioContext&&(this.audioContext.close(),this.audioContext=null),this.ws&&(this.ws.readyState===WebSocket.OPEN&&this.ws.close(),this.ws=null)}emit(e){this.config.onEvent&&this.config.onEvent(e)}renderUI(){if(!this.config.container)return;const e=typeof this.config.container=="string"?document.querySelector(this.config.container):this.config.container;e&&(e.innerHTML=`
2
134
  <div style="padding: 20px; border: 1px solid #ccc; border-radius: 8px; background: #fff;">
3
135
  <h3>Glyde Voice Agent</h3>
4
136
  <p>Status: Active</p>
5
137
  <p>Context: ${this.config.contextType}</p>
6
138
  <button onclick="this.closest('div').remove()">Close</button>
7
139
  </div>
8
- `)}}c.GlydeVoice=d,Object.defineProperty(c,Symbol.toStringTag,{value:"Module"})}));
140
+ `)}}n.GlydeVoice=f,Object.defineProperty(n,Symbol.toStringTag,{value:"Module"})}));
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@glydeunity/voice-sdk",
3
- "version": "1.1.0",
3
+ "version": "1.2.1",
4
4
  "description": "GLYDE Voice Agent SDK - AI-powered voice interactions for web applications",
5
5
  "type": "module",
6
6
  "main": "./dist/voice-sdk.umd.js",