@glydeunity/voice-sdk 1.0.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,500 @@
1
- import { G as r } from "./index-BbD4w_Sz.js";
1
+ const h = `
2
+ class AudioCaptureProcessor extends AudioWorkletProcessor {
3
+ constructor() {
4
+ super();
5
+ this.bufferSize = 4096;
6
+ this.buffer = new Float32Array(this.bufferSize);
7
+ this.bufferIndex = 0;
8
+ }
9
+
10
+ process(inputs) {
11
+ const input = inputs[0];
12
+ if (!input || !input[0]) return true;
13
+
14
+ const samples = input[0];
15
+
16
+ for (let i = 0; i < samples.length; i++) {
17
+ this.buffer[this.bufferIndex++] = samples[i];
18
+
19
+ if (this.bufferIndex >= this.bufferSize) {
20
+ const pcm16 = new Int16Array(this.bufferSize);
21
+ for (let j = 0; j < this.bufferSize; j++) {
22
+ const s = Math.max(-1, Math.min(1, this.buffer[j]));
23
+ pcm16[j] = s < 0 ? s * 0x8000 : s * 0x7FFF;
24
+ }
25
+
26
+ this.port.postMessage(pcm16.buffer, [pcm16.buffer]);
27
+ this.bufferIndex = 0;
28
+ }
29
+ }
30
+
31
+ return true;
32
+ }
33
+ }
34
+
35
+ registerProcessor('audio-capture-processor', AudioCaptureProcessor);
36
+ `, p = `
37
+ class AudioPlaybackProcessor extends AudioWorkletProcessor {
38
+ constructor() {
39
+ super();
40
+
41
+ this.bufferSize = 48000 * 15;
42
+ this.buffer = new Float32Array(this.bufferSize);
43
+ this.writeIndex = 0;
44
+ this.readIndex = 0;
45
+ this.samplesAvailable = 0;
46
+ this.isPlaying = false;
47
+
48
+ this.port.onmessage = (event) => {
49
+ const { type, data } = event.data;
50
+
51
+ switch (type) {
52
+ case 'audio':
53
+ const audioData = data instanceof Float32Array ? data : new Float32Array(data);
54
+ this.writeAudio(audioData);
55
+ break;
56
+ case 'clear':
57
+ this.clearBuffer();
58
+ break;
59
+ }
60
+ };
61
+ }
62
+
63
+ writeAudio(samples) {
64
+ if (!samples || samples.length === 0) return;
65
+
66
+ const samplesToWrite = samples.length;
67
+
68
+ if (this.samplesAvailable + samplesToWrite > this.bufferSize) {
69
+ const overflow = (this.samplesAvailable + samplesToWrite) - this.bufferSize;
70
+ this.readIndex = (this.readIndex + overflow) % this.bufferSize;
71
+ this.samplesAvailable -= overflow;
72
+ }
73
+
74
+ for (let i = 0; i < samplesToWrite; i++) {
75
+ this.buffer[this.writeIndex] = samples[i];
76
+ this.writeIndex = (this.writeIndex + 1) % this.bufferSize;
77
+ }
78
+
79
+ this.samplesAvailable += samplesToWrite;
80
+ this.isPlaying = true;
81
+ }
82
+
83
+ clearBuffer() {
84
+ this.readIndex = 0;
85
+ this.writeIndex = 0;
86
+ this.samplesAvailable = 0;
87
+ this.isPlaying = false;
88
+ this.port.postMessage({ type: 'cleared' });
89
+ }
90
+
91
+ process(inputs, outputs) {
92
+ const output = outputs[0];
93
+ if (!output || !output[0]) return true;
94
+
95
+ const outputChannel = output[0];
96
+ const samplesToRead = outputChannel.length;
97
+
98
+ if (this.samplesAvailable >= samplesToRead) {
99
+ for (let i = 0; i < samplesToRead; i++) {
100
+ outputChannel[i] = this.buffer[this.readIndex];
101
+ this.readIndex = (this.readIndex + 1) % this.bufferSize;
102
+ }
103
+ this.samplesAvailable -= samplesToRead;
104
+ } else if (this.samplesAvailable > 0) {
105
+ let i = 0;
106
+ while (this.samplesAvailable > 0 && i < samplesToRead) {
107
+ outputChannel[i] = this.buffer[this.readIndex];
108
+ this.readIndex = (this.readIndex + 1) % this.bufferSize;
109
+ this.samplesAvailable--;
110
+ i++;
111
+ }
112
+ while (i < samplesToRead) {
113
+ outputChannel[i] = 0;
114
+ i++;
115
+ }
116
+
117
+ if (this.isPlaying) {
118
+ this.isPlaying = false;
119
+ this.port.postMessage({ type: 'bufferEmpty' });
120
+ }
121
+ } else {
122
+ for (let i = 0; i < samplesToRead; i++) {
123
+ outputChannel[i] = 0;
124
+ }
125
+ this.isPlaying = false;
126
+ }
127
+
128
+ return true;
129
+ }
130
+ }
131
+
132
+ registerProcessor('audio-playback-processor', AudioPlaybackProcessor);
133
+ `;
134
+ class u {
135
+ config;
136
+ unityUrl;
137
+ active = !1;
138
+ serverConfig = null;
139
+ // WebSocket and Audio
140
+ ws = null;
141
+ audioContext = null;
142
+ mediaStream = null;
143
+ captureWorkletNode = null;
144
+ playbackWorkletNode = null;
145
+ isMuted = !1;
146
+ // Audio settings
147
+ outputSampleRate = 24e3;
148
+ // Deepgram TTS output rate
149
+ inputSampleRate = 48e3;
150
+ // Microphone input rate
151
+ // Agent state
152
+ isAgentSpeaking = !1;
153
+ agentAudioDoneReceived = !1;
154
+ /**
155
+ * Create a new GlydeVoice instance
156
+ * @param config - Configuration options
157
+ */
158
+ constructor(e) {
159
+ this.config = e, this.unityUrl = e.unityBaseUrl || "https://api.glydeunity.com", !e.publishableKey && !e.apiKey && !e.authToken && console.warn("[GlydeVoice] No authentication method provided. One of publishableKey, apiKey, or authToken is required.");
160
+ }
161
+ /**
162
+ * Get authentication headers based on configured auth method
163
+ * Supports publishableKey, apiKey, and JWT token (authToken)
164
+ * @returns Headers object with appropriate authentication
165
+ */
166
+ getAuthHeaders() {
167
+ const e = {
168
+ "Content-Type": "application/json"
169
+ };
170
+ return this.config.publishableKey && (e["x-publishable-key"] = this.config.publishableKey), this.config.apiKey && (e["x-api-key"] = this.config.apiKey), this.config.authToken && (e.Authorization = `Bearer ${this.config.authToken}`), e;
171
+ }
172
+ /**
173
+ * Fetch voice configuration from Unity API
174
+ * @returns Voice configuration including system prompt, tools, and Deepgram settings
175
+ */
176
+ async fetchConfig() {
177
+ const e = `${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`, t = this.config.contextId ? `${e}/${this.config.contextId}` : e, s = await fetch(t, {
178
+ method: "GET",
179
+ headers: this.getAuthHeaders()
180
+ });
181
+ if (!s.ok) {
182
+ const a = await s.json();
183
+ throw new Error(a.error?.message || a.message || "Failed to fetch voice config");
184
+ }
185
+ const { data: o } = await s.json();
186
+ return o;
187
+ }
188
+ /**
189
+ * Initialize and start the voice session
190
+ */
191
+ async start() {
192
+ if (!this.active) {
193
+ this.active = !0;
194
+ try {
195
+ this.config.systemPrompt || (this.serverConfig = await this.fetchConfig(), console.log("[GlydeVoice] Fetched config:", this.serverConfig));
196
+ const e = await fetch(`${this.unityUrl}/api/unity/voice/auth`, {
197
+ method: "POST",
198
+ headers: this.getAuthHeaders(),
199
+ body: JSON.stringify({
200
+ context_id: this.config.contextId,
201
+ domain: typeof window < "u" ? window.location.hostname : "localhost"
202
+ })
203
+ });
204
+ if (!e.ok) {
205
+ const i = await e.json();
206
+ throw new Error(i.error?.message || i.message || "Failed to authenticate voice session");
207
+ }
208
+ const { data: t } = await e.json(), { token: s, agent_config: o } = t, a = this.config.systemPrompt || this.serverConfig?.system_prompt || o.instructions || "You are a helpful AI assistant.";
209
+ await this.initializeAudio();
210
+ const r = "wss://agent.deepgram.com/v1/agent/converse";
211
+ this.ws = new WebSocket(r, ["bearer", s]), this.ws.onopen = () => {
212
+ const i = this.config.deepgramConfig || this.serverConfig?.deepgram_config || {
213
+ think: { provider: { type: "open_ai", model: "gpt-4o-mini" } },
214
+ speak: { provider: { type: "deepgram", model: "aura-2-thalia-en" } },
215
+ listen: { provider: { type: "deepgram", model: "nova-2", version: "latest" } }
216
+ }, n = {
217
+ type: "Settings",
218
+ audio: {
219
+ input: {
220
+ encoding: "linear16",
221
+ sample_rate: this.inputSampleRate
222
+ },
223
+ output: {
224
+ encoding: "linear16",
225
+ sample_rate: this.outputSampleRate,
226
+ container: "none"
227
+ }
228
+ },
229
+ agent: {
230
+ language: "en",
231
+ speak: i.speak || {
232
+ provider: { type: "deepgram", model: "aura-2-thalia-en" }
233
+ },
234
+ listen: i.listen || {
235
+ provider: { type: "deepgram", version: "v2", model: "flux-general-en" }
236
+ },
237
+ think: {
238
+ provider: i.think?.provider || { type: "open_ai", model: "gpt-4o-mini" },
239
+ functions: i.think?.functions || [
240
+ {
241
+ name: "end_conversation",
242
+ description: "End the conversation when stop phrases are detected.",
243
+ parameters: {
244
+ type: "object",
245
+ properties: {
246
+ item: { type: "string", description: "The phrase that triggered end of conversation" }
247
+ },
248
+ required: ["item"]
249
+ }
250
+ }
251
+ ]
252
+ },
253
+ greeting: "Hi! I'm ready to speak with you. How can I help you today?"
254
+ }
255
+ };
256
+ this.ws.send(JSON.stringify(n)), this.emit({ type: "open", payload: { config: o, serverConfig: this.serverConfig } });
257
+ };
258
+ const l = a;
259
+ this.ws.onmessage = (i) => {
260
+ if (typeof i.data == "string") {
261
+ try {
262
+ if (JSON.parse(i.data).type === "SettingsApplied") {
263
+ const c = {
264
+ type: "UpdatePrompt",
265
+ prompt: l
266
+ };
267
+ this.ws.send(JSON.stringify(c)), this.startMicrophone();
268
+ }
269
+ } catch {
270
+ }
271
+ this.handleTextMessage(i.data);
272
+ } else i.data instanceof Blob ? this.handleAudioData(i.data) : i.data instanceof ArrayBuffer && this.handleAudioBuffer(i.data);
273
+ }, this.ws.onerror = (i) => {
274
+ console.error("[GlydeVoice] WebSocket error:", i), this.emit({ type: "error", payload: i });
275
+ }, this.ws.onclose = () => {
276
+ this.cleanup(), this.emit({ type: "close" });
277
+ }, this.renderUI();
278
+ } catch (e) {
279
+ throw console.error("[GlydeVoice] Error starting session:", e), this.active = !1, this.emit({ type: "error", payload: e }), e;
280
+ }
281
+ }
282
+ }
283
+ /**
284
+ * Create a blob URL from inline JavaScript code for AudioWorklet modules.
285
+ * This avoids CORS issues when the SDK is loaded from a different origin than the page.
286
+ * @param code - The JavaScript code to convert to a blob URL
287
+ * @returns A blob URL that can be used with audioWorklet.addModule()
288
+ */
289
+ createWorkletBlobUrl(e) {
290
+ const t = new Blob([e], { type: "application/javascript" });
291
+ return URL.createObjectURL(t);
292
+ }
293
+ /**
294
+ * Initialize the audio system with both capture and playback worklets.
295
+ * Uses inline blob URLs to avoid CORS issues when SDK is embedded in external apps.
296
+ */
297
+ async initializeAudio() {
298
+ this.audioContext = new AudioContext({ sampleRate: this.inputSampleRate });
299
+ const e = this.createWorkletBlobUrl(h), t = this.createWorkletBlobUrl(p);
300
+ try {
301
+ await Promise.all([
302
+ this.audioContext.audioWorklet.addModule(e),
303
+ this.audioContext.audioWorklet.addModule(t)
304
+ ]);
305
+ } finally {
306
+ URL.revokeObjectURL(e), URL.revokeObjectURL(t);
307
+ }
308
+ this.playbackWorkletNode = new AudioWorkletNode(this.audioContext, "audio-playback-processor"), this.playbackWorkletNode.connect(this.audioContext.destination), this.playbackWorkletNode.port.onmessage = (s) => {
309
+ const { type: o } = s.data;
310
+ (o === "cleared" || o === "bufferEmpty") && (this.isAgentSpeaking = !1, this.agentAudioDoneReceived = !1, this.emit({ type: "agent_speaking", payload: !1 }));
311
+ };
312
+ }
313
+ /**
314
+ * Handle text messages from the Voice Agent
315
+ */
316
+ handleTextMessage(e) {
317
+ try {
318
+ const t = JSON.parse(e);
319
+ switch (t.type) {
320
+ case "Welcome":
321
+ this.emit({ type: "ready" });
322
+ break;
323
+ case "SettingsApplied":
324
+ break;
325
+ case "UserStartedSpeaking":
326
+ this.emit({ type: "user_speaking", payload: !0 }), this.clearPlaybackBuffer(), this.isAgentSpeaking = !1, this.agentAudioDoneReceived = !1;
327
+ break;
328
+ case "UserStoppedSpeaking":
329
+ this.emit({ type: "user_speaking", payload: !1 });
330
+ break;
331
+ case "ConversationText":
332
+ if (t.content && t.content.trim()) {
333
+ const s = t.role === "assistant" ? "agent" : "user";
334
+ this.config.onTranscript && this.config.onTranscript(t.content, s), this.emit({ type: "transcript", payload: { text: t.content, role: s } }), this.saveTranscript(t.content, t.role);
335
+ }
336
+ break;
337
+ case "AgentStartedSpeaking":
338
+ this.isAgentSpeaking = !0, this.agentAudioDoneReceived = !1, this.emit({ type: "agent_speaking", payload: !0 });
339
+ break;
340
+ case "AgentAudioDone":
341
+ this.agentAudioDoneReceived = !0;
342
+ break;
343
+ case "Error":
344
+ console.error("[GlydeVoice] Agent error:", t), this.emit({ type: "error", payload: t });
345
+ break;
346
+ }
347
+ } catch (t) {
348
+ console.error("[GlydeVoice] Failed to parse message:", t);
349
+ }
350
+ }
351
+ /**
352
+ * Handle binary audio data (Blob) from agent TTS
353
+ */
354
+ async handleAudioData(e) {
355
+ const t = await e.arrayBuffer();
356
+ this.handleAudioBuffer(t);
357
+ }
358
+ /**
359
+ * Handle binary audio buffer from agent TTS
360
+ * Deepgram sends linear16 PCM at 24kHz, we need to resample to 48kHz for playback
361
+ */
362
+ handleAudioBuffer(e) {
363
+ if (!this.playbackWorkletNode || !this.audioContext) return;
364
+ this.audioContext.state === "suspended" && this.audioContext.resume();
365
+ const t = e.byteLength;
366
+ if (t === 0) return;
367
+ const s = t - t % 2;
368
+ if (s === 0) return;
369
+ const o = s === t ? e : e.slice(0, s), a = new Int16Array(o), r = new Float32Array(a.length);
370
+ for (let n = 0; n < a.length; n++)
371
+ r[n] = a[n] / 32768;
372
+ const l = this.resample24kTo48k(r);
373
+ !this.isAgentSpeaking && !this.agentAudioDoneReceived && (this.isAgentSpeaking = !0, this.emit({ type: "agent_speaking", payload: !0 }));
374
+ const i = new Float32Array(l);
375
+ this.playbackWorkletNode.port.postMessage({
376
+ type: "audio",
377
+ data: i
378
+ }, [i.buffer]);
379
+ }
380
+ /**
381
+ * Resample audio from 24kHz to 48kHz using linear interpolation
382
+ */
383
+ resample24kTo48k(e) {
384
+ const t = e.length * 2, s = new Float32Array(t);
385
+ for (let a = 0; a < e.length - 1; a++) {
386
+ const r = e[a], l = e[a + 1];
387
+ s[a * 2] = r, s[a * 2 + 1] = (r + l) / 2;
388
+ }
389
+ const o = e.length - 1;
390
+ return s[o * 2] = e[o], s[o * 2 + 1] = e[o], s;
391
+ }
392
+ /**
393
+ * Clear the playback buffer (for interruption handling)
394
+ */
395
+ clearPlaybackBuffer() {
396
+ this.playbackWorkletNode && this.playbackWorkletNode.port.postMessage({ type: "clear" });
397
+ }
398
+ /**
399
+ * Start capturing microphone audio using AudioWorklet
400
+ */
401
+ async startMicrophone() {
402
+ if (!this.audioContext)
403
+ throw new Error("Audio context not initialized");
404
+ try {
405
+ this.mediaStream = await navigator.mediaDevices.getUserMedia({
406
+ audio: {
407
+ channelCount: 1,
408
+ sampleRate: this.inputSampleRate,
409
+ echoCancellation: !0,
410
+ noiseSuppression: !0
411
+ }
412
+ });
413
+ const e = this.audioContext.createMediaStreamSource(this.mediaStream);
414
+ this.captureWorkletNode = new AudioWorkletNode(this.audioContext, "audio-capture-processor"), this.captureWorkletNode.port.onmessage = (t) => {
415
+ !this.active || !this.ws || this.ws.readyState !== WebSocket.OPEN || this.isMuted || this.ws.send(t.data);
416
+ }, e.connect(this.captureWorkletNode), this.emit({ type: "microphone_ready" });
417
+ } catch (e) {
418
+ throw console.error("[GlydeVoice] Microphone error:", e), e;
419
+ }
420
+ }
421
+ /**
422
+ * Save transcript to Unity backend
423
+ */
424
+ async saveTranscript(e, t) {
425
+ if (!(!this.config.contextId || !e))
426
+ try {
427
+ await fetch(`${this.unityUrl}/api/unity/voice/transcript`, {
428
+ method: "POST",
429
+ headers: this.getAuthHeaders(),
430
+ body: JSON.stringify({
431
+ context_id: this.config.contextId,
432
+ content: e,
433
+ role: t === "assistant" ? "assistant" : "user"
434
+ })
435
+ });
436
+ } catch {
437
+ }
438
+ }
439
+ /**
440
+ * Toggle mute state
441
+ * @param muted - Whether to mute the microphone
442
+ */
443
+ setMuted(e) {
444
+ this.isMuted = e;
445
+ }
446
+ /**
447
+ * Get current mute state
448
+ */
449
+ getMuted() {
450
+ return this.isMuted;
451
+ }
452
+ /**
453
+ * Check if the voice agent is currently active
454
+ */
455
+ isActive() {
456
+ return this.active;
457
+ }
458
+ /**
459
+ * Get the current server configuration
460
+ */
461
+ getServerConfig() {
462
+ return this.serverConfig;
463
+ }
464
+ /**
465
+ * Stop the voice session
466
+ */
467
+ stop() {
468
+ this.active = !1, this.cleanup();
469
+ }
470
+ /**
471
+ * Cleanup resources
472
+ */
473
+ cleanup() {
474
+ this.captureWorkletNode && (this.captureWorkletNode.disconnect(), this.captureWorkletNode.port.close(), this.captureWorkletNode = null), this.playbackWorkletNode && (this.playbackWorkletNode.disconnect(), this.playbackWorkletNode.port.close(), this.playbackWorkletNode = null), this.mediaStream && (this.mediaStream.getTracks().forEach((e) => e.stop()), this.mediaStream = null), this.audioContext && (this.audioContext.close(), this.audioContext = null), this.ws && (this.ws.readyState === WebSocket.OPEN && this.ws.close(), this.ws = null);
475
+ }
476
+ /**
477
+ * Emit event to callback
478
+ */
479
+ emit(e) {
480
+ this.config.onEvent && this.config.onEvent(e);
481
+ }
482
+ /**
483
+ * Render a simple UI widget (optional)
484
+ */
485
+ renderUI() {
486
+ if (!this.config.container) return;
487
+ const e = typeof this.config.container == "string" ? document.querySelector(this.config.container) : this.config.container;
488
+ e && (e.innerHTML = `
489
+ <div style="padding: 20px; border: 1px solid #ccc; border-radius: 8px; background: #fff;">
490
+ <h3>Glyde Voice Agent</h3>
491
+ <p>Status: Active</p>
492
+ <p>Context: ${this.config.contextType}</p>
493
+ <button onclick="this.closest('div').remove()">Close</button>
494
+ </div>
495
+ `);
496
+ }
497
+ }
2
498
  export {
3
- r as GlydeVoice
499
+ u as GlydeVoice
4
500
  };