@glydeunity/voice-sdk 1.1.0 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +9 -1
- package/dist/voice-sdk.es.js +188 -37
- package/dist/voice-sdk.umd.js +134 -2
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -89,7 +89,15 @@ export declare class GlydeVoice {
|
|
|
89
89
|
*/
|
|
90
90
|
start(): Promise<void>;
|
|
91
91
|
/**
|
|
92
|
-
*
|
|
92
|
+
* Create a blob URL from inline JavaScript code for AudioWorklet modules.
|
|
93
|
+
* This avoids CORS issues when the SDK is loaded from a different origin than the page.
|
|
94
|
+
* @param code - The JavaScript code to convert to a blob URL
|
|
95
|
+
* @returns A blob URL that can be used with audioWorklet.addModule()
|
|
96
|
+
*/
|
|
97
|
+
private createWorkletBlobUrl;
|
|
98
|
+
/**
|
|
99
|
+
* Initialize the audio system with both capture and playback worklets.
|
|
100
|
+
* Uses inline blob URLs to avoid CORS issues when SDK is embedded in external apps.
|
|
93
101
|
*/
|
|
94
102
|
private initializeAudio;
|
|
95
103
|
/**
|
package/dist/voice-sdk.es.js
CHANGED
|
@@ -1,4 +1,137 @@
|
|
|
1
|
-
|
|
1
|
+
const h = `
|
|
2
|
+
class AudioCaptureProcessor extends AudioWorkletProcessor {
|
|
3
|
+
constructor() {
|
|
4
|
+
super();
|
|
5
|
+
this.bufferSize = 4096;
|
|
6
|
+
this.buffer = new Float32Array(this.bufferSize);
|
|
7
|
+
this.bufferIndex = 0;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
process(inputs) {
|
|
11
|
+
const input = inputs[0];
|
|
12
|
+
if (!input || !input[0]) return true;
|
|
13
|
+
|
|
14
|
+
const samples = input[0];
|
|
15
|
+
|
|
16
|
+
for (let i = 0; i < samples.length; i++) {
|
|
17
|
+
this.buffer[this.bufferIndex++] = samples[i];
|
|
18
|
+
|
|
19
|
+
if (this.bufferIndex >= this.bufferSize) {
|
|
20
|
+
const pcm16 = new Int16Array(this.bufferSize);
|
|
21
|
+
for (let j = 0; j < this.bufferSize; j++) {
|
|
22
|
+
const s = Math.max(-1, Math.min(1, this.buffer[j]));
|
|
23
|
+
pcm16[j] = s < 0 ? s * 0x8000 : s * 0x7FFF;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
this.port.postMessage(pcm16.buffer, [pcm16.buffer]);
|
|
27
|
+
this.bufferIndex = 0;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
return true;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
registerProcessor('audio-capture-processor', AudioCaptureProcessor);
|
|
36
|
+
`, p = `
|
|
37
|
+
class AudioPlaybackProcessor extends AudioWorkletProcessor {
|
|
38
|
+
constructor() {
|
|
39
|
+
super();
|
|
40
|
+
|
|
41
|
+
this.bufferSize = 48000 * 15;
|
|
42
|
+
this.buffer = new Float32Array(this.bufferSize);
|
|
43
|
+
this.writeIndex = 0;
|
|
44
|
+
this.readIndex = 0;
|
|
45
|
+
this.samplesAvailable = 0;
|
|
46
|
+
this.isPlaying = false;
|
|
47
|
+
|
|
48
|
+
this.port.onmessage = (event) => {
|
|
49
|
+
const { type, data } = event.data;
|
|
50
|
+
|
|
51
|
+
switch (type) {
|
|
52
|
+
case 'audio':
|
|
53
|
+
const audioData = data instanceof Float32Array ? data : new Float32Array(data);
|
|
54
|
+
this.writeAudio(audioData);
|
|
55
|
+
break;
|
|
56
|
+
case 'clear':
|
|
57
|
+
this.clearBuffer();
|
|
58
|
+
break;
|
|
59
|
+
}
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
writeAudio(samples) {
|
|
64
|
+
if (!samples || samples.length === 0) return;
|
|
65
|
+
|
|
66
|
+
const samplesToWrite = samples.length;
|
|
67
|
+
|
|
68
|
+
if (this.samplesAvailable + samplesToWrite > this.bufferSize) {
|
|
69
|
+
const overflow = (this.samplesAvailable + samplesToWrite) - this.bufferSize;
|
|
70
|
+
this.readIndex = (this.readIndex + overflow) % this.bufferSize;
|
|
71
|
+
this.samplesAvailable -= overflow;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
for (let i = 0; i < samplesToWrite; i++) {
|
|
75
|
+
this.buffer[this.writeIndex] = samples[i];
|
|
76
|
+
this.writeIndex = (this.writeIndex + 1) % this.bufferSize;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
this.samplesAvailable += samplesToWrite;
|
|
80
|
+
this.isPlaying = true;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
clearBuffer() {
|
|
84
|
+
this.readIndex = 0;
|
|
85
|
+
this.writeIndex = 0;
|
|
86
|
+
this.samplesAvailable = 0;
|
|
87
|
+
this.isPlaying = false;
|
|
88
|
+
this.port.postMessage({ type: 'cleared' });
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
process(inputs, outputs) {
|
|
92
|
+
const output = outputs[0];
|
|
93
|
+
if (!output || !output[0]) return true;
|
|
94
|
+
|
|
95
|
+
const outputChannel = output[0];
|
|
96
|
+
const samplesToRead = outputChannel.length;
|
|
97
|
+
|
|
98
|
+
if (this.samplesAvailable >= samplesToRead) {
|
|
99
|
+
for (let i = 0; i < samplesToRead; i++) {
|
|
100
|
+
outputChannel[i] = this.buffer[this.readIndex];
|
|
101
|
+
this.readIndex = (this.readIndex + 1) % this.bufferSize;
|
|
102
|
+
}
|
|
103
|
+
this.samplesAvailable -= samplesToRead;
|
|
104
|
+
} else if (this.samplesAvailable > 0) {
|
|
105
|
+
let i = 0;
|
|
106
|
+
while (this.samplesAvailable > 0 && i < samplesToRead) {
|
|
107
|
+
outputChannel[i] = this.buffer[this.readIndex];
|
|
108
|
+
this.readIndex = (this.readIndex + 1) % this.bufferSize;
|
|
109
|
+
this.samplesAvailable--;
|
|
110
|
+
i++;
|
|
111
|
+
}
|
|
112
|
+
while (i < samplesToRead) {
|
|
113
|
+
outputChannel[i] = 0;
|
|
114
|
+
i++;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
if (this.isPlaying) {
|
|
118
|
+
this.isPlaying = false;
|
|
119
|
+
this.port.postMessage({ type: 'bufferEmpty' });
|
|
120
|
+
}
|
|
121
|
+
} else {
|
|
122
|
+
for (let i = 0; i < samplesToRead; i++) {
|
|
123
|
+
outputChannel[i] = 0;
|
|
124
|
+
}
|
|
125
|
+
this.isPlaying = false;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return true;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
registerProcessor('audio-playback-processor', AudioPlaybackProcessor);
|
|
133
|
+
`;
|
|
134
|
+
class u {
|
|
2
135
|
config;
|
|
3
136
|
unityUrl;
|
|
4
137
|
active = !1;
|
|
@@ -41,16 +174,16 @@ class l {
|
|
|
41
174
|
* @returns Voice configuration including system prompt, tools, and Deepgram settings
|
|
42
175
|
*/
|
|
43
176
|
async fetchConfig() {
|
|
44
|
-
const e = `${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`, t = this.config.contextId ? `${e}/${this.config.contextId}` : e,
|
|
177
|
+
const e = `${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`, t = this.config.contextId ? `${e}/${this.config.contextId}` : e, s = await fetch(t, {
|
|
45
178
|
method: "GET",
|
|
46
179
|
headers: this.getAuthHeaders()
|
|
47
180
|
});
|
|
48
|
-
if (!
|
|
49
|
-
const a = await
|
|
181
|
+
if (!s.ok) {
|
|
182
|
+
const a = await s.json();
|
|
50
183
|
throw new Error(a.error?.message || a.message || "Failed to fetch voice config");
|
|
51
184
|
}
|
|
52
|
-
const { data:
|
|
53
|
-
return
|
|
185
|
+
const { data: o } = await s.json();
|
|
186
|
+
return o;
|
|
54
187
|
}
|
|
55
188
|
/**
|
|
56
189
|
* Initialize and start the voice session
|
|
@@ -72,15 +205,15 @@ class l {
|
|
|
72
205
|
const i = await e.json();
|
|
73
206
|
throw new Error(i.error?.message || i.message || "Failed to authenticate voice session");
|
|
74
207
|
}
|
|
75
|
-
const { data: t } = await e.json(), { token:
|
|
208
|
+
const { data: t } = await e.json(), { token: s, agent_config: o } = t, a = this.config.systemPrompt || this.serverConfig?.system_prompt || o.instructions || "You are a helpful AI assistant.";
|
|
76
209
|
await this.initializeAudio();
|
|
77
|
-
const
|
|
78
|
-
this.ws = new WebSocket(
|
|
210
|
+
const r = "wss://agent.deepgram.com/v1/agent/converse";
|
|
211
|
+
this.ws = new WebSocket(r, ["bearer", s]), this.ws.onopen = () => {
|
|
79
212
|
const i = this.config.deepgramConfig || this.serverConfig?.deepgram_config || {
|
|
80
213
|
think: { provider: { type: "open_ai", model: "gpt-4o-mini" } },
|
|
81
214
|
speak: { provider: { type: "deepgram", model: "aura-2-thalia-en" } },
|
|
82
215
|
listen: { provider: { type: "deepgram", model: "nova-2", version: "latest" } }
|
|
83
|
-
},
|
|
216
|
+
}, n = {
|
|
84
217
|
type: "Settings",
|
|
85
218
|
audio: {
|
|
86
219
|
input: {
|
|
@@ -120,18 +253,18 @@ class l {
|
|
|
120
253
|
greeting: "Hi! I'm ready to speak with you. How can I help you today?"
|
|
121
254
|
}
|
|
122
255
|
};
|
|
123
|
-
this.ws.send(JSON.stringify(
|
|
256
|
+
this.ws.send(JSON.stringify(n)), this.emit({ type: "open", payload: { config: o, serverConfig: this.serverConfig } });
|
|
124
257
|
};
|
|
125
|
-
const
|
|
258
|
+
const l = a;
|
|
126
259
|
this.ws.onmessage = (i) => {
|
|
127
260
|
if (typeof i.data == "string") {
|
|
128
261
|
try {
|
|
129
262
|
if (JSON.parse(i.data).type === "SettingsApplied") {
|
|
130
|
-
const
|
|
263
|
+
const c = {
|
|
131
264
|
type: "UpdatePrompt",
|
|
132
|
-
prompt:
|
|
265
|
+
prompt: l
|
|
133
266
|
};
|
|
134
|
-
this.ws.send(JSON.stringify(
|
|
267
|
+
this.ws.send(JSON.stringify(c)), this.startMicrophone();
|
|
135
268
|
}
|
|
136
269
|
} catch {
|
|
137
270
|
}
|
|
@@ -148,15 +281,33 @@ class l {
|
|
|
148
281
|
}
|
|
149
282
|
}
|
|
150
283
|
/**
|
|
151
|
-
*
|
|
284
|
+
* Create a blob URL from inline JavaScript code for AudioWorklet modules.
|
|
285
|
+
* This avoids CORS issues when the SDK is loaded from a different origin than the page.
|
|
286
|
+
* @param code - The JavaScript code to convert to a blob URL
|
|
287
|
+
* @returns A blob URL that can be used with audioWorklet.addModule()
|
|
288
|
+
*/
|
|
289
|
+
createWorkletBlobUrl(e) {
|
|
290
|
+
const t = new Blob([e], { type: "application/javascript" });
|
|
291
|
+
return URL.createObjectURL(t);
|
|
292
|
+
}
|
|
293
|
+
/**
|
|
294
|
+
* Initialize the audio system with both capture and playback worklets.
|
|
295
|
+
* Uses inline blob URLs to avoid CORS issues when SDK is embedded in external apps.
|
|
152
296
|
*/
|
|
153
297
|
async initializeAudio() {
|
|
154
|
-
this.audioContext = new AudioContext({ sampleRate: this.inputSampleRate })
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
298
|
+
this.audioContext = new AudioContext({ sampleRate: this.inputSampleRate });
|
|
299
|
+
const e = this.createWorkletBlobUrl(h), t = this.createWorkletBlobUrl(p);
|
|
300
|
+
try {
|
|
301
|
+
await Promise.all([
|
|
302
|
+
this.audioContext.audioWorklet.addModule(e),
|
|
303
|
+
this.audioContext.audioWorklet.addModule(t)
|
|
304
|
+
]);
|
|
305
|
+
} finally {
|
|
306
|
+
URL.revokeObjectURL(e), URL.revokeObjectURL(t);
|
|
307
|
+
}
|
|
308
|
+
this.playbackWorkletNode = new AudioWorkletNode(this.audioContext, "audio-playback-processor"), this.playbackWorkletNode.connect(this.audioContext.destination), this.playbackWorkletNode.port.onmessage = (s) => {
|
|
309
|
+
const { type: o } = s.data;
|
|
310
|
+
(o === "cleared" || o === "bufferEmpty") && (this.isAgentSpeaking = !1, this.agentAudioDoneReceived = !1, this.emit({ type: "agent_speaking", payload: !1 }));
|
|
160
311
|
};
|
|
161
312
|
}
|
|
162
313
|
/**
|
|
@@ -179,8 +330,8 @@ class l {
|
|
|
179
330
|
break;
|
|
180
331
|
case "ConversationText":
|
|
181
332
|
if (t.content && t.content.trim()) {
|
|
182
|
-
const
|
|
183
|
-
this.config.onTranscript && this.config.onTranscript(t.content,
|
|
333
|
+
const s = t.role === "assistant" ? "agent" : "user";
|
|
334
|
+
this.config.onTranscript && this.config.onTranscript(t.content, s), this.emit({ type: "transcript", payload: { text: t.content, role: s } }), this.saveTranscript(t.content, t.role);
|
|
184
335
|
}
|
|
185
336
|
break;
|
|
186
337
|
case "AgentStartedSpeaking":
|
|
@@ -213,14 +364,14 @@ class l {
|
|
|
213
364
|
this.audioContext.state === "suspended" && this.audioContext.resume();
|
|
214
365
|
const t = e.byteLength;
|
|
215
366
|
if (t === 0) return;
|
|
216
|
-
const
|
|
217
|
-
if (
|
|
218
|
-
const
|
|
219
|
-
for (let
|
|
220
|
-
n
|
|
221
|
-
const
|
|
367
|
+
const s = t - t % 2;
|
|
368
|
+
if (s === 0) return;
|
|
369
|
+
const o = s === t ? e : e.slice(0, s), a = new Int16Array(o), r = new Float32Array(a.length);
|
|
370
|
+
for (let n = 0; n < a.length; n++)
|
|
371
|
+
r[n] = a[n] / 32768;
|
|
372
|
+
const l = this.resample24kTo48k(r);
|
|
222
373
|
!this.isAgentSpeaking && !this.agentAudioDoneReceived && (this.isAgentSpeaking = !0, this.emit({ type: "agent_speaking", payload: !0 }));
|
|
223
|
-
const i = new Float32Array(
|
|
374
|
+
const i = new Float32Array(l);
|
|
224
375
|
this.playbackWorkletNode.port.postMessage({
|
|
225
376
|
type: "audio",
|
|
226
377
|
data: i
|
|
@@ -230,13 +381,13 @@ class l {
|
|
|
230
381
|
* Resample audio from 24kHz to 48kHz using linear interpolation
|
|
231
382
|
*/
|
|
232
383
|
resample24kTo48k(e) {
|
|
233
|
-
const t = e.length * 2,
|
|
384
|
+
const t = e.length * 2, s = new Float32Array(t);
|
|
234
385
|
for (let a = 0; a < e.length - 1; a++) {
|
|
235
|
-
const
|
|
236
|
-
|
|
386
|
+
const r = e[a], l = e[a + 1];
|
|
387
|
+
s[a * 2] = r, s[a * 2 + 1] = (r + l) / 2;
|
|
237
388
|
}
|
|
238
|
-
const
|
|
239
|
-
return o
|
|
389
|
+
const o = e.length - 1;
|
|
390
|
+
return s[o * 2] = e[o], s[o * 2 + 1] = e[o], s;
|
|
240
391
|
}
|
|
241
392
|
/**
|
|
242
393
|
* Clear the playback buffer (for interruption handling)
|
|
@@ -345,5 +496,5 @@ class l {
|
|
|
345
496
|
}
|
|
346
497
|
}
|
|
347
498
|
export {
|
|
348
|
-
|
|
499
|
+
u as GlydeVoice
|
|
349
500
|
};
|
package/dist/voice-sdk.umd.js
CHANGED
|
@@ -1,8 +1,140 @@
|
|
|
1
|
-
(function(c
|
|
1
|
+
(function(l,c){typeof exports=="object"&&typeof module<"u"?c(exports):typeof define=="function"&&define.amd?define(["exports"],c):(l=typeof globalThis<"u"?globalThis:l||self,c(l.GlydeVoice={}))})(this,(function(l){"use strict";const c=`
|
|
2
|
+
class AudioCaptureProcessor extends AudioWorkletProcessor {
|
|
3
|
+
constructor() {
|
|
4
|
+
super();
|
|
5
|
+
this.bufferSize = 4096;
|
|
6
|
+
this.buffer = new Float32Array(this.bufferSize);
|
|
7
|
+
this.bufferIndex = 0;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
process(inputs) {
|
|
11
|
+
const input = inputs[0];
|
|
12
|
+
if (!input || !input[0]) return true;
|
|
13
|
+
|
|
14
|
+
const samples = input[0];
|
|
15
|
+
|
|
16
|
+
for (let i = 0; i < samples.length; i++) {
|
|
17
|
+
this.buffer[this.bufferIndex++] = samples[i];
|
|
18
|
+
|
|
19
|
+
if (this.bufferIndex >= this.bufferSize) {
|
|
20
|
+
const pcm16 = new Int16Array(this.bufferSize);
|
|
21
|
+
for (let j = 0; j < this.bufferSize; j++) {
|
|
22
|
+
const s = Math.max(-1, Math.min(1, this.buffer[j]));
|
|
23
|
+
pcm16[j] = s < 0 ? s * 0x8000 : s * 0x7FFF;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
this.port.postMessage(pcm16.buffer, [pcm16.buffer]);
|
|
27
|
+
this.bufferIndex = 0;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
return true;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
registerProcessor('audio-capture-processor', AudioCaptureProcessor);
|
|
36
|
+
`,d=`
|
|
37
|
+
class AudioPlaybackProcessor extends AudioWorkletProcessor {
|
|
38
|
+
constructor() {
|
|
39
|
+
super();
|
|
40
|
+
|
|
41
|
+
this.bufferSize = 48000 * 15;
|
|
42
|
+
this.buffer = new Float32Array(this.bufferSize);
|
|
43
|
+
this.writeIndex = 0;
|
|
44
|
+
this.readIndex = 0;
|
|
45
|
+
this.samplesAvailable = 0;
|
|
46
|
+
this.isPlaying = false;
|
|
47
|
+
|
|
48
|
+
this.port.onmessage = (event) => {
|
|
49
|
+
const { type, data } = event.data;
|
|
50
|
+
|
|
51
|
+
switch (type) {
|
|
52
|
+
case 'audio':
|
|
53
|
+
const audioData = data instanceof Float32Array ? data : new Float32Array(data);
|
|
54
|
+
this.writeAudio(audioData);
|
|
55
|
+
break;
|
|
56
|
+
case 'clear':
|
|
57
|
+
this.clearBuffer();
|
|
58
|
+
break;
|
|
59
|
+
}
|
|
60
|
+
};
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
writeAudio(samples) {
|
|
64
|
+
if (!samples || samples.length === 0) return;
|
|
65
|
+
|
|
66
|
+
const samplesToWrite = samples.length;
|
|
67
|
+
|
|
68
|
+
if (this.samplesAvailable + samplesToWrite > this.bufferSize) {
|
|
69
|
+
const overflow = (this.samplesAvailable + samplesToWrite) - this.bufferSize;
|
|
70
|
+
this.readIndex = (this.readIndex + overflow) % this.bufferSize;
|
|
71
|
+
this.samplesAvailable -= overflow;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
for (let i = 0; i < samplesToWrite; i++) {
|
|
75
|
+
this.buffer[this.writeIndex] = samples[i];
|
|
76
|
+
this.writeIndex = (this.writeIndex + 1) % this.bufferSize;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
this.samplesAvailable += samplesToWrite;
|
|
80
|
+
this.isPlaying = true;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
clearBuffer() {
|
|
84
|
+
this.readIndex = 0;
|
|
85
|
+
this.writeIndex = 0;
|
|
86
|
+
this.samplesAvailable = 0;
|
|
87
|
+
this.isPlaying = false;
|
|
88
|
+
this.port.postMessage({ type: 'cleared' });
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
process(inputs, outputs) {
|
|
92
|
+
const output = outputs[0];
|
|
93
|
+
if (!output || !output[0]) return true;
|
|
94
|
+
|
|
95
|
+
const outputChannel = output[0];
|
|
96
|
+
const samplesToRead = outputChannel.length;
|
|
97
|
+
|
|
98
|
+
if (this.samplesAvailable >= samplesToRead) {
|
|
99
|
+
for (let i = 0; i < samplesToRead; i++) {
|
|
100
|
+
outputChannel[i] = this.buffer[this.readIndex];
|
|
101
|
+
this.readIndex = (this.readIndex + 1) % this.bufferSize;
|
|
102
|
+
}
|
|
103
|
+
this.samplesAvailable -= samplesToRead;
|
|
104
|
+
} else if (this.samplesAvailable > 0) {
|
|
105
|
+
let i = 0;
|
|
106
|
+
while (this.samplesAvailable > 0 && i < samplesToRead) {
|
|
107
|
+
outputChannel[i] = this.buffer[this.readIndex];
|
|
108
|
+
this.readIndex = (this.readIndex + 1) % this.bufferSize;
|
|
109
|
+
this.samplesAvailable--;
|
|
110
|
+
i++;
|
|
111
|
+
}
|
|
112
|
+
while (i < samplesToRead) {
|
|
113
|
+
outputChannel[i] = 0;
|
|
114
|
+
i++;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
if (this.isPlaying) {
|
|
118
|
+
this.isPlaying = false;
|
|
119
|
+
this.port.postMessage({ type: 'bufferEmpty' });
|
|
120
|
+
}
|
|
121
|
+
} else {
|
|
122
|
+
for (let i = 0; i < samplesToRead; i++) {
|
|
123
|
+
outputChannel[i] = 0;
|
|
124
|
+
}
|
|
125
|
+
this.isPlaying = false;
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return true;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
registerProcessor('audio-playback-processor', AudioPlaybackProcessor);
|
|
133
|
+
`;class p{config;unityUrl;active=!1;serverConfig=null;ws=null;audioContext=null;mediaStream=null;captureWorkletNode=null;playbackWorkletNode=null;isMuted=!1;outputSampleRate=24e3;inputSampleRate=48e3;isAgentSpeaking=!1;agentAudioDoneReceived=!1;constructor(e){this.config=e,this.unityUrl=e.unityBaseUrl||"https://api.glydeunity.com",!e.publishableKey&&!e.apiKey&&!e.authToken&&console.warn("[GlydeVoice] No authentication method provided. One of publishableKey, apiKey, or authToken is required.")}getAuthHeaders(){const e={"Content-Type":"application/json"};return this.config.publishableKey&&(e["x-publishable-key"]=this.config.publishableKey),this.config.apiKey&&(e["x-api-key"]=this.config.apiKey),this.config.authToken&&(e.Authorization=`Bearer ${this.config.authToken}`),e}async fetchConfig(){const e=`${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`,t=this.config.contextId?`${e}/${this.config.contextId}`:e,s=await fetch(t,{method:"GET",headers:this.getAuthHeaders()});if(!s.ok){const o=await s.json();throw new Error(o.error?.message||o.message||"Failed to fetch voice config")}const{data:a}=await s.json();return a}async start(){if(!this.active){this.active=!0;try{this.config.systemPrompt||(this.serverConfig=await this.fetchConfig(),console.log("[GlydeVoice] Fetched config:",this.serverConfig));const e=await fetch(`${this.unityUrl}/api/unity/voice/auth`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({context_id:this.config.contextId,domain:typeof window<"u"?window.location.hostname:"localhost"})});if(!e.ok){const i=await e.json();throw new Error(i.error?.message||i.message||"Failed to authenticate voice session")}const{data:t}=await e.json(),{token:s,agent_config:a}=t,o=this.config.systemPrompt||this.serverConfig?.system_prompt||a.instructions||"You are a helpful AI assistant.";await this.initializeAudio();const r="wss://agent.deepgram.com/v1/agent/converse";this.ws=new WebSocket(r,["bearer",s]),this.ws.onopen=()=>{const i=this.config.deepgramConfig||this.serverConfig?.deepgram_config||{think:{provider:{type:"open_ai",model:"gpt-4o-mini"}},speak:{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:{provider:{type:"deepgram",model:"nova-2",version:"latest"}}},n={type:"Settings",audio:{input:{encoding:"linear16",sample_rate:this.inputSampleRate},output:{encoding:"linear16",sample_rate:this.outputSampleRate,container:"none"}},agent:{language:"en",speak:i.speak||{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:i.listen||{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}},think:{provider:i.think?.provider||{type:"open_ai",model:"gpt-4o-mini"},functions:i.think?.functions||[{name:"end_conversation",description:"End the conversation when stop phrases are detected.",parameters:{type:"object",properties:{item:{type:"string",description:"The phrase that triggered end of conversation"}},required:["item"]}}]},greeting:"Hi! I'm ready to speak with you. How can I help you today?"}};this.ws.send(JSON.stringify(n)),this.emit({type:"open",payload:{config:a,serverConfig:this.serverConfig}})};const h=o;this.ws.onmessage=i=>{if(typeof i.data=="string"){try{if(JSON.parse(i.data).type==="SettingsApplied"){const u={type:"UpdatePrompt",prompt:h};this.ws.send(JSON.stringify(u)),this.startMicrophone()}}catch{}this.handleTextMessage(i.data)}else i.data instanceof Blob?this.handleAudioData(i.data):i.data instanceof ArrayBuffer&&this.handleAudioBuffer(i.data)},this.ws.onerror=i=>{console.error("[GlydeVoice] WebSocket error:",i),this.emit({type:"error",payload:i})},this.ws.onclose=()=>{this.cleanup(),this.emit({type:"close"})},this.renderUI()}catch(e){throw console.error("[GlydeVoice] Error starting session:",e),this.active=!1,this.emit({type:"error",payload:e}),e}}}createWorkletBlobUrl(e){const t=new Blob([e],{type:"application/javascript"});return URL.createObjectURL(t)}async initializeAudio(){this.audioContext=new AudioContext({sampleRate:this.inputSampleRate});const e=this.createWorkletBlobUrl(c),t=this.createWorkletBlobUrl(d);try{await Promise.all([this.audioContext.audioWorklet.addModule(e),this.audioContext.audioWorklet.addModule(t)])}finally{URL.revokeObjectURL(e),URL.revokeObjectURL(t)}this.playbackWorkletNode=new AudioWorkletNode(this.audioContext,"audio-playback-processor"),this.playbackWorkletNode.connect(this.audioContext.destination),this.playbackWorkletNode.port.onmessage=s=>{const{type:a}=s.data;(a==="cleared"||a==="bufferEmpty")&&(this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!1}))}}handleTextMessage(e){try{const t=JSON.parse(e);switch(t.type){case"Welcome":this.emit({type:"ready"});break;case"SettingsApplied":break;case"UserStartedSpeaking":this.emit({type:"user_speaking",payload:!0}),this.clearPlaybackBuffer(),this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1;break;case"UserStoppedSpeaking":this.emit({type:"user_speaking",payload:!1});break;case"ConversationText":if(t.content&&t.content.trim()){const s=t.role==="assistant"?"agent":"user";this.config.onTranscript&&this.config.onTranscript(t.content,s),this.emit({type:"transcript",payload:{text:t.content,role:s}}),this.saveTranscript(t.content,t.role)}break;case"AgentStartedSpeaking":this.isAgentSpeaking=!0,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!0});break;case"AgentAudioDone":this.agentAudioDoneReceived=!0;break;case"Error":console.error("[GlydeVoice] Agent error:",t),this.emit({type:"error",payload:t});break}}catch(t){console.error("[GlydeVoice] Failed to parse message:",t)}}async handleAudioData(e){const t=await e.arrayBuffer();this.handleAudioBuffer(t)}handleAudioBuffer(e){if(!this.playbackWorkletNode||!this.audioContext)return;this.audioContext.state==="suspended"&&this.audioContext.resume();const t=e.byteLength;if(t===0)return;const s=t-t%2;if(s===0)return;const a=s===t?e:e.slice(0,s),o=new Int16Array(a),r=new Float32Array(o.length);for(let n=0;n<o.length;n++)r[n]=o[n]/32768;const h=this.resample24kTo48k(r);!this.isAgentSpeaking&&!this.agentAudioDoneReceived&&(this.isAgentSpeaking=!0,this.emit({type:"agent_speaking",payload:!0}));const i=new Float32Array(h);this.playbackWorkletNode.port.postMessage({type:"audio",data:i},[i.buffer])}resample24kTo48k(e){const t=e.length*2,s=new Float32Array(t);for(let o=0;o<e.length-1;o++){const r=e[o],h=e[o+1];s[o*2]=r,s[o*2+1]=(r+h)/2}const a=e.length-1;return s[a*2]=e[a],s[a*2+1]=e[a],s}clearPlaybackBuffer(){this.playbackWorkletNode&&this.playbackWorkletNode.port.postMessage({type:"clear"})}async startMicrophone(){if(!this.audioContext)throw new Error("Audio context not initialized");try{this.mediaStream=await navigator.mediaDevices.getUserMedia({audio:{channelCount:1,sampleRate:this.inputSampleRate,echoCancellation:!0,noiseSuppression:!0}});const e=this.audioContext.createMediaStreamSource(this.mediaStream);this.captureWorkletNode=new AudioWorkletNode(this.audioContext,"audio-capture-processor"),this.captureWorkletNode.port.onmessage=t=>{!this.active||!this.ws||this.ws.readyState!==WebSocket.OPEN||this.isMuted||this.ws.send(t.data)},e.connect(this.captureWorkletNode),this.emit({type:"microphone_ready"})}catch(e){throw console.error("[GlydeVoice] Microphone error:",e),e}}async saveTranscript(e,t){if(!(!this.config.contextId||!e))try{await fetch(`${this.unityUrl}/api/unity/voice/transcript`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({context_id:this.config.contextId,content:e,role:t==="assistant"?"assistant":"user"})})}catch{}}setMuted(e){this.isMuted=e}getMuted(){return this.isMuted}isActive(){return this.active}getServerConfig(){return this.serverConfig}stop(){this.active=!1,this.cleanup()}cleanup(){this.captureWorkletNode&&(this.captureWorkletNode.disconnect(),this.captureWorkletNode.port.close(),this.captureWorkletNode=null),this.playbackWorkletNode&&(this.playbackWorkletNode.disconnect(),this.playbackWorkletNode.port.close(),this.playbackWorkletNode=null),this.mediaStream&&(this.mediaStream.getTracks().forEach(e=>e.stop()),this.mediaStream=null),this.audioContext&&(this.audioContext.close(),this.audioContext=null),this.ws&&(this.ws.readyState===WebSocket.OPEN&&this.ws.close(),this.ws=null)}emit(e){this.config.onEvent&&this.config.onEvent(e)}renderUI(){if(!this.config.container)return;const e=typeof this.config.container=="string"?document.querySelector(this.config.container):this.config.container;e&&(e.innerHTML=`
|
|
2
134
|
<div style="padding: 20px; border: 1px solid #ccc; border-radius: 8px; background: #fff;">
|
|
3
135
|
<h3>Glyde Voice Agent</h3>
|
|
4
136
|
<p>Status: Active</p>
|
|
5
137
|
<p>Context: ${this.config.contextType}</p>
|
|
6
138
|
<button onclick="this.closest('div').remove()">Close</button>
|
|
7
139
|
</div>
|
|
8
|
-
`)}}
|
|
140
|
+
`)}}l.GlydeVoice=p,Object.defineProperty(l,Symbol.toStringTag,{value:"Module"})}));
|