@glydeunity/voice-sdk 1.3.4 → 1.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +36 -7
- package/dist/voice-sdk.es.js +77 -65
- package/dist/voice-sdk.umd.js +2 -2
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -47,22 +47,48 @@ export declare interface DeepgramAgentConfig {
|
|
|
47
47
|
* Function call request from Deepgram Voice Agent
|
|
48
48
|
* @see https://developers.deepgram.com/docs/voice-agent-function-call-request
|
|
49
49
|
*/
|
|
50
|
+
/**
|
|
51
|
+
* Individual function call within a FunctionCallRequest
|
|
52
|
+
* @see https://developers.deepgram.com/docs/voice-agent-function-call-request
|
|
53
|
+
*/
|
|
54
|
+
export declare interface FunctionCall {
|
|
55
|
+
/** Unique identifier for the function call */
|
|
56
|
+
id: string;
|
|
57
|
+
/** Name of the function to execute */
|
|
58
|
+
name: string;
|
|
59
|
+
/** JSON string containing function arguments */
|
|
60
|
+
arguments: string;
|
|
61
|
+
/** If true, client must execute and respond; if false, server handles it */
|
|
62
|
+
client_side?: boolean;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Function call request message from Deepgram Voice Agent
|
|
67
|
+
* Contains an array of functions to be executed
|
|
68
|
+
* @see https://developers.deepgram.com/docs/voice-agent-function-call-request
|
|
69
|
+
*/
|
|
50
70
|
export declare interface FunctionCallRequest {
|
|
51
71
|
type: 'FunctionCallRequest';
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
input: Record<string, unknown>;
|
|
55
|
-
client_side?: boolean;
|
|
72
|
+
/** Array of function calls to execute */
|
|
73
|
+
functions: FunctionCall[];
|
|
56
74
|
}
|
|
57
75
|
|
|
58
76
|
/**
|
|
59
77
|
* Function call response to send back to Deepgram
|
|
60
78
|
* @see https://developers.deepgram.com/docs/voice-agent-function-call-response
|
|
61
79
|
*/
|
|
80
|
+
/**
|
|
81
|
+
* Function call response to send back to Deepgram Voice Agent
|
|
82
|
+
* @see https://developers.deepgram.com/docs/voice-agent-function-call-response
|
|
83
|
+
*/
|
|
62
84
|
export declare interface FunctionCallResponse {
|
|
63
85
|
type: 'FunctionCallResponse';
|
|
64
|
-
|
|
65
|
-
|
|
86
|
+
/** Unique identifier of the original function call */
|
|
87
|
+
id: string;
|
|
88
|
+
/** Name of the function that was executed */
|
|
89
|
+
name: string;
|
|
90
|
+
/** Text summary or JSON result of the function's output */
|
|
91
|
+
content: string;
|
|
66
92
|
}
|
|
67
93
|
|
|
68
94
|
/**
|
|
@@ -195,7 +221,10 @@ export declare class GlydeVoice {
|
|
|
195
221
|
* Handle a function call request from Deepgram Voice Agent
|
|
196
222
|
* Routes function execution through the Unity voice function endpoint for proper authentication
|
|
197
223
|
*
|
|
198
|
-
*
|
|
224
|
+
* Deepgram sends an array of functions in each request, so we process each one
|
|
225
|
+
* and send individual responses back.
|
|
226
|
+
*
|
|
227
|
+
* @param request - The function call request from Deepgram containing functions array
|
|
199
228
|
* @see https://developers.deepgram.com/docs/voice-agents-function-calling
|
|
200
229
|
*/
|
|
201
230
|
private handleFunctionCallRequest;
|
package/dist/voice-sdk.es.js
CHANGED
|
@@ -176,15 +176,15 @@ class y {
|
|
|
176
176
|
* @returns Voice configuration including system prompt, tools, and Deepgram settings
|
|
177
177
|
*/
|
|
178
178
|
async fetchConfig() {
|
|
179
|
-
const e = `${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`, t = this.config.contextId ? `${e}/${this.config.contextId}` : e,
|
|
179
|
+
const e = `${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`, t = this.config.contextId ? `${e}/${this.config.contextId}` : e, s = await fetch(t, {
|
|
180
180
|
method: "GET",
|
|
181
181
|
headers: this.getAuthHeaders()
|
|
182
182
|
});
|
|
183
|
-
if (!
|
|
184
|
-
const
|
|
185
|
-
throw new Error(
|
|
183
|
+
if (!s.ok) {
|
|
184
|
+
const o = await s.json();
|
|
185
|
+
throw new Error(o.error?.message || o.message || "Failed to fetch voice config");
|
|
186
186
|
}
|
|
187
|
-
const { data: i } = await
|
|
187
|
+
const { data: i } = await s.json();
|
|
188
188
|
return i;
|
|
189
189
|
}
|
|
190
190
|
/**
|
|
@@ -206,26 +206,26 @@ class y {
|
|
|
206
206
|
body: JSON.stringify(e)
|
|
207
207
|
});
|
|
208
208
|
if (!t.ok) {
|
|
209
|
-
const
|
|
210
|
-
throw new Error(
|
|
209
|
+
const a = await t.json();
|
|
210
|
+
throw new Error(a.error?.message || a.message || "Failed to authenticate voice session");
|
|
211
211
|
}
|
|
212
|
-
const { data:
|
|
212
|
+
const { data: s } = await t.json(), { token: i, agent_config: o, deepgram_config: n } = s;
|
|
213
213
|
this.setSessionContext({
|
|
214
|
-
clientUuid:
|
|
214
|
+
clientUuid: o?.client_uuid,
|
|
215
215
|
contextId: this.config.contextId,
|
|
216
216
|
contextType: this.config.contextType,
|
|
217
|
-
currentJobUuid:
|
|
217
|
+
currentJobUuid: o?.job_uuid
|
|
218
218
|
});
|
|
219
|
-
const c = this.config.systemPrompt ||
|
|
219
|
+
const c = this.config.systemPrompt || o.instructions || this.serverConfig?.system_prompt || "You are a helpful AI assistant.";
|
|
220
220
|
await this.initializeAudio();
|
|
221
221
|
let l = "wss://agent.deepgram.com/v1/agent/converse";
|
|
222
222
|
const r = this.config.deepgramConfig || n || this.serverConfig?.deepgram_config;
|
|
223
223
|
if (r?.tags && r.tags.length > 0) {
|
|
224
|
-
const
|
|
225
|
-
r.tags.forEach((h) =>
|
|
224
|
+
const a = new URLSearchParams();
|
|
225
|
+
r.tags.forEach((h) => a.append("tag", h)), l += `?${a.toString()}`;
|
|
226
226
|
}
|
|
227
227
|
this.ws = new WebSocket(l, ["bearer", i]), this.ws.onopen = () => {
|
|
228
|
-
const
|
|
228
|
+
const a = r || {
|
|
229
229
|
think: { provider: { type: "open_ai", model: "gpt-4.1-nano" } },
|
|
230
230
|
speak: { provider: { type: "deepgram", model: "aura-2-thalia-en" } },
|
|
231
231
|
listen: { provider: { type: "deepgram", version: "v2", model: "flux-general-en" } }
|
|
@@ -244,39 +244,39 @@ class y {
|
|
|
244
244
|
},
|
|
245
245
|
agent: {
|
|
246
246
|
language: "en",
|
|
247
|
-
speak:
|
|
247
|
+
speak: a.speak || {
|
|
248
248
|
provider: { type: "deepgram", model: "aura-2-thalia-en" }
|
|
249
249
|
},
|
|
250
|
-
listen:
|
|
250
|
+
listen: a.listen || {
|
|
251
251
|
provider: { type: "deepgram", version: "v2", model: "flux-general-en" }
|
|
252
252
|
},
|
|
253
253
|
think: {
|
|
254
|
-
provider:
|
|
254
|
+
provider: a.think?.provider || { type: "open_ai", model: "gpt-4.1-nano" },
|
|
255
255
|
// Functions come from server config - no client-side defaults
|
|
256
|
-
...
|
|
256
|
+
...a.think?.functions && { functions: a.think.functions }
|
|
257
257
|
},
|
|
258
258
|
greeting: "Hi! I'm excited you chose to speak with me. Are you ready to start?"
|
|
259
259
|
}
|
|
260
260
|
};
|
|
261
|
-
|
|
261
|
+
a.tags && a.tags.length > 0 && (h.tags = a.tags), this.ws.send(JSON.stringify(h)), this.emit({ type: "open", payload: { config: o, serverConfig: this.serverConfig } });
|
|
262
262
|
};
|
|
263
|
-
const
|
|
264
|
-
this.ws.onmessage = (
|
|
265
|
-
if (typeof
|
|
263
|
+
const u = c;
|
|
264
|
+
this.ws.onmessage = (a) => {
|
|
265
|
+
if (typeof a.data == "string") {
|
|
266
266
|
try {
|
|
267
|
-
if (JSON.parse(
|
|
268
|
-
const
|
|
267
|
+
if (JSON.parse(a.data).type === "SettingsApplied") {
|
|
268
|
+
const d = {
|
|
269
269
|
type: "UpdatePrompt",
|
|
270
|
-
prompt:
|
|
270
|
+
prompt: u
|
|
271
271
|
};
|
|
272
|
-
this.ws.send(JSON.stringify(
|
|
272
|
+
this.ws.send(JSON.stringify(d)), this.startMicrophone();
|
|
273
273
|
}
|
|
274
274
|
} catch {
|
|
275
275
|
}
|
|
276
|
-
this.handleTextMessage(
|
|
277
|
-
} else
|
|
278
|
-
}, this.ws.onerror = (
|
|
279
|
-
console.error("[GlydeVoice] WebSocket error:",
|
|
276
|
+
this.handleTextMessage(a.data);
|
|
277
|
+
} else a.data instanceof Blob ? this.handleAudioData(a.data) : a.data instanceof ArrayBuffer && this.handleAudioBuffer(a.data);
|
|
278
|
+
}, this.ws.onerror = (a) => {
|
|
279
|
+
console.error("[GlydeVoice] WebSocket error:", a), this.emit({ type: "error", payload: a });
|
|
280
280
|
}, this.ws.onclose = () => {
|
|
281
281
|
this.cleanup(), this.emit({ type: "close" });
|
|
282
282
|
}, this.renderUI();
|
|
@@ -310,8 +310,8 @@ class y {
|
|
|
310
310
|
} finally {
|
|
311
311
|
URL.revokeObjectURL(e), URL.revokeObjectURL(t);
|
|
312
312
|
}
|
|
313
|
-
this.playbackWorkletNode = new AudioWorkletNode(this.audioContext, "audio-playback-processor"), this.playbackWorkletNode.connect(this.audioContext.destination), this.playbackWorkletNode.port.onmessage = (
|
|
314
|
-
const { type: i } =
|
|
313
|
+
this.playbackWorkletNode = new AudioWorkletNode(this.audioContext, "audio-playback-processor"), this.playbackWorkletNode.connect(this.audioContext.destination), this.playbackWorkletNode.port.onmessage = (s) => {
|
|
314
|
+
const { type: i } = s.data;
|
|
315
315
|
(i === "cleared" || i === "bufferEmpty") && (this.isAgentSpeaking = !1, this.agentAudioDoneReceived = !1, this.emit({ type: "agent_speaking", payload: !1 }));
|
|
316
316
|
};
|
|
317
317
|
}
|
|
@@ -335,8 +335,8 @@ class y {
|
|
|
335
335
|
break;
|
|
336
336
|
case "ConversationText":
|
|
337
337
|
if (t.content && t.content.trim()) {
|
|
338
|
-
const
|
|
339
|
-
this.config.onTranscript && this.config.onTranscript(t.content,
|
|
338
|
+
const s = t.role === "assistant" ? "agent" : "user";
|
|
339
|
+
this.config.onTranscript && this.config.onTranscript(t.content, s), this.emit({ type: "transcript", payload: { text: t.content, role: s } }), this.saveTranscript(t.content, t.role);
|
|
340
340
|
}
|
|
341
341
|
break;
|
|
342
342
|
case "AgentStartedSpeaking":
|
|
@@ -372,11 +372,11 @@ class y {
|
|
|
372
372
|
this.audioContext.state === "suspended" && this.audioContext.resume();
|
|
373
373
|
const t = e.byteLength;
|
|
374
374
|
if (t === 0) return;
|
|
375
|
-
const
|
|
376
|
-
if (
|
|
377
|
-
const i =
|
|
378
|
-
for (let r = 0; r <
|
|
379
|
-
n[r] =
|
|
375
|
+
const s = t - t % 2;
|
|
376
|
+
if (s === 0) return;
|
|
377
|
+
const i = s === t ? e : e.slice(0, s), o = new Int16Array(i), n = new Float32Array(o.length);
|
|
378
|
+
for (let r = 0; r < o.length; r++)
|
|
379
|
+
n[r] = o[r] / 32768;
|
|
380
380
|
const c = this.resample24kTo48k(n);
|
|
381
381
|
!this.isAgentSpeaking && !this.agentAudioDoneReceived && (this.isAgentSpeaking = !0, this.emit({ type: "agent_speaking", payload: !0 }));
|
|
382
382
|
const l = new Float32Array(c);
|
|
@@ -389,13 +389,13 @@ class y {
|
|
|
389
389
|
* Resample audio from 24kHz to 48kHz using linear interpolation
|
|
390
390
|
*/
|
|
391
391
|
resample24kTo48k(e) {
|
|
392
|
-
const t = e.length * 2,
|
|
393
|
-
for (let
|
|
394
|
-
const n = e[
|
|
395
|
-
o
|
|
392
|
+
const t = e.length * 2, s = new Float32Array(t);
|
|
393
|
+
for (let o = 0; o < e.length - 1; o++) {
|
|
394
|
+
const n = e[o], c = e[o + 1];
|
|
395
|
+
s[o * 2] = n, s[o * 2 + 1] = (n + c) / 2;
|
|
396
396
|
}
|
|
397
397
|
const i = e.length - 1;
|
|
398
|
-
return
|
|
398
|
+
return s[i * 2] = e[i], s[i * 2 + 1] = e[i], s;
|
|
399
399
|
}
|
|
400
400
|
/**
|
|
401
401
|
* Clear the playback buffer (for interruption handling)
|
|
@@ -506,26 +506,38 @@ class y {
|
|
|
506
506
|
* Handle a function call request from Deepgram Voice Agent
|
|
507
507
|
* Routes function execution through the Unity voice function endpoint for proper authentication
|
|
508
508
|
*
|
|
509
|
-
*
|
|
509
|
+
* Deepgram sends an array of functions in each request, so we process each one
|
|
510
|
+
* and send individual responses back.
|
|
511
|
+
*
|
|
512
|
+
* @param request - The function call request from Deepgram containing functions array
|
|
510
513
|
* @see https://developers.deepgram.com/docs/voice-agents-function-calling
|
|
511
514
|
*/
|
|
512
515
|
async handleFunctionCallRequest(e) {
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
516
|
+
for (const t of e.functions) {
|
|
517
|
+
console.log("[GlydeVoice] Function call request:", t.name, t.arguments);
|
|
518
|
+
let s = {};
|
|
519
|
+
try {
|
|
520
|
+
s = t.arguments ? JSON.parse(t.arguments) : {};
|
|
521
|
+
} catch (n) {
|
|
522
|
+
console.warn("[GlydeVoice] Failed to parse function arguments:", n);
|
|
523
|
+
}
|
|
524
|
+
let i;
|
|
525
|
+
try {
|
|
526
|
+
t.name === "end_conversation" ? i = await this.handleEndConversation(s) : i = await this.executeVoiceFunction(t.name, t.id, s);
|
|
527
|
+
} catch (n) {
|
|
528
|
+
console.error("[GlydeVoice] Function call error:", n), i = JSON.stringify({
|
|
529
|
+
error: "Function execution failed",
|
|
530
|
+
details: n instanceof Error ? n.message : String(n)
|
|
531
|
+
});
|
|
532
|
+
}
|
|
533
|
+
const o = {
|
|
534
|
+
type: "FunctionCallResponse",
|
|
535
|
+
id: t.id,
|
|
536
|
+
name: t.name,
|
|
537
|
+
content: i
|
|
538
|
+
};
|
|
539
|
+
this.ws && this.ws.readyState === WebSocket.OPEN ? (this.ws.send(JSON.stringify(o)), console.log("[GlydeVoice] Function response sent:", t.name)) : console.error("[GlydeVoice] Cannot send function response - WebSocket not open");
|
|
522
540
|
}
|
|
523
|
-
const o = {
|
|
524
|
-
type: "FunctionCallResponse",
|
|
525
|
-
function_call_id: e.function_call_id,
|
|
526
|
-
output: t
|
|
527
|
-
};
|
|
528
|
-
this.ws && this.ws.readyState === WebSocket.OPEN ? (this.ws.send(JSON.stringify(o)), console.log("[GlydeVoice] Function response sent:", e.function_name)) : console.error("[GlydeVoice] Cannot send function response - WebSocket not open");
|
|
529
541
|
}
|
|
530
542
|
/**
|
|
531
543
|
* Execute a voice function through the Unity API with proper authentication
|
|
@@ -537,7 +549,7 @@ class y {
|
|
|
537
549
|
* @param input - Function input parameters
|
|
538
550
|
* @returns JSON string with function result
|
|
539
551
|
*/
|
|
540
|
-
async executeVoiceFunction(e, t,
|
|
552
|
+
async executeVoiceFunction(e, t, s) {
|
|
541
553
|
console.log("[GlydeVoice] Executing voice function via Unity API:", e);
|
|
542
554
|
try {
|
|
543
555
|
const i = await fetch(`${this.unityUrl}/api/unity/voice/function`, {
|
|
@@ -546,7 +558,7 @@ class y {
|
|
|
546
558
|
body: JSON.stringify({
|
|
547
559
|
function_name: e,
|
|
548
560
|
function_call_id: t,
|
|
549
|
-
input:
|
|
561
|
+
input: s,
|
|
550
562
|
context: {
|
|
551
563
|
context_id: this.sessionContext.contextId,
|
|
552
564
|
context_type: this.sessionContext.contextType,
|
|
@@ -558,9 +570,9 @@ class y {
|
|
|
558
570
|
const n = await i.json().catch(() => ({}));
|
|
559
571
|
throw new Error(n.error?.message || `Function call failed: ${i.status}`);
|
|
560
572
|
}
|
|
561
|
-
const
|
|
562
|
-
if (
|
|
563
|
-
return typeof
|
|
573
|
+
const o = await i.json();
|
|
574
|
+
if (o.success && o.data?.output)
|
|
575
|
+
return typeof o.data.output == "string" ? o.data.output : JSON.stringify(o.data.output);
|
|
564
576
|
throw new Error("Invalid response from voice function endpoint");
|
|
565
577
|
} catch (i) {
|
|
566
578
|
return console.error("[GlydeVoice] Voice function error:", i), JSON.stringify({
|
package/dist/voice-sdk.umd.js
CHANGED
|
@@ -130,11 +130,11 @@ class AudioPlaybackProcessor extends AudioWorkletProcessor {
|
|
|
130
130
|
}
|
|
131
131
|
|
|
132
132
|
registerProcessor('audio-playback-processor', AudioPlaybackProcessor);
|
|
133
|
-
`;class f{config;unityUrl;active=!1;serverConfig=null;ws=null;audioContext=null;mediaStream=null;captureWorkletNode=null;playbackWorkletNode=null;isMuted=!1;outputSampleRate=24e3;inputSampleRate=48e3;isAgentSpeaking=!1;agentAudioDoneReceived=!1;sessionContext={};constructor(e){this.config=e,this.unityUrl=e.unityBaseUrl||"https://api.glydeunity.com",!e.publishableKey&&!e.apiKey&&!e.authToken&&console.warn("[GlydeVoice] No authentication method provided. One of publishableKey, apiKey, or authToken is required.")}getAuthHeaders(){const e={"Content-Type":"application/json"};return this.config.publishableKey&&(e["x-publishable-key"]=this.config.publishableKey),this.config.apiKey&&(e["x-api-key"]=this.config.apiKey),this.config.authToken&&(e.Authorization=`Bearer ${this.config.authToken}`),e}async fetchConfig(){const e=`${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`,t=this.config.contextId?`${e}/${this.config.contextId}`:e,
|
|
133
|
+
`;class f{config;unityUrl;active=!1;serverConfig=null;ws=null;audioContext=null;mediaStream=null;captureWorkletNode=null;playbackWorkletNode=null;isMuted=!1;outputSampleRate=24e3;inputSampleRate=48e3;isAgentSpeaking=!1;agentAudioDoneReceived=!1;sessionContext={};constructor(e){this.config=e,this.unityUrl=e.unityBaseUrl||"https://api.glydeunity.com",!e.publishableKey&&!e.apiKey&&!e.authToken&&console.warn("[GlydeVoice] No authentication method provided. One of publishableKey, apiKey, or authToken is required.")}getAuthHeaders(){const e={"Content-Type":"application/json"};return this.config.publishableKey&&(e["x-publishable-key"]=this.config.publishableKey),this.config.apiKey&&(e["x-api-key"]=this.config.apiKey),this.config.authToken&&(e.Authorization=`Bearer ${this.config.authToken}`),e}async fetchConfig(){const e=`${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`,t=this.config.contextId?`${e}/${this.config.contextId}`:e,s=await fetch(t,{method:"GET",headers:this.getAuthHeaders()});if(!s.ok){const o=await s.json();throw new Error(o.error?.message||o.message||"Failed to fetch voice config")}const{data:i}=await s.json();return i}async start(){if(!this.active){this.active=!0;try{this.config.systemPrompt||(this.serverConfig=await this.fetchConfig(),console.log("[GlydeVoice] Fetched config:",this.serverConfig));const e={context_id:this.config.contextId,domain:typeof window<"u"?window.location.hostname:"localhost"};this.config.systemPrompt&&(e.system_prompt=this.config.systemPrompt),this.config.deepgramConfig&&(e.deepgram_config=this.config.deepgramConfig);const t=await fetch(`${this.unityUrl}/api/unity/voice/auth`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify(e)});if(!t.ok){const a=await t.json();throw new Error(a.error?.message||a.message||"Failed to authenticate voice session")}const{data:s}=await t.json(),{token:i,agent_config:o,deepgram_config:n}=s;this.setSessionContext({clientUuid:o?.client_uuid,contextId:this.config.contextId,contextType:this.config.contextType,currentJobUuid:o?.job_uuid});const d=this.config.systemPrompt||o.instructions||this.serverConfig?.system_prompt||"You are a helpful AI assistant.";await this.initializeAudio();let u="wss://agent.deepgram.com/v1/agent/converse";const r=this.config.deepgramConfig||n||this.serverConfig?.deepgram_config;if(r?.tags&&r.tags.length>0){const a=new URLSearchParams;r.tags.forEach(h=>a.append("tag",h)),u+=`?${a.toString()}`}this.ws=new WebSocket(u,["bearer",i]),this.ws.onopen=()=>{const a=r||{think:{provider:{type:"open_ai",model:"gpt-4.1-nano"}},speak:{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}}},h={type:"Settings",audio:{input:{encoding:"linear16",sample_rate:this.inputSampleRate},output:{encoding:"linear16",sample_rate:this.outputSampleRate,container:"none"}},agent:{language:"en",speak:a.speak||{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:a.listen||{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}},think:{provider:a.think?.provider||{type:"open_ai",model:"gpt-4.1-nano"},...a.think?.functions&&{functions:a.think.functions}},greeting:"Hi! I'm excited you chose to speak with me. Are you ready to start?"}};a.tags&&a.tags.length>0&&(h.tags=a.tags),this.ws.send(JSON.stringify(h)),this.emit({type:"open",payload:{config:o,serverConfig:this.serverConfig}})};const g=d;this.ws.onmessage=a=>{if(typeof a.data=="string"){try{if(JSON.parse(a.data).type==="SettingsApplied"){const y={type:"UpdatePrompt",prompt:g};this.ws.send(JSON.stringify(y)),this.startMicrophone()}}catch{}this.handleTextMessage(a.data)}else a.data instanceof Blob?this.handleAudioData(a.data):a.data instanceof ArrayBuffer&&this.handleAudioBuffer(a.data)},this.ws.onerror=a=>{console.error("[GlydeVoice] WebSocket error:",a),this.emit({type:"error",payload:a})},this.ws.onclose=()=>{this.cleanup(),this.emit({type:"close"})},this.renderUI()}catch(e){throw console.error("[GlydeVoice] Error starting session:",e),this.active=!1,this.emit({type:"error",payload:e}),e}}}createWorkletBlobUrl(e){const t=new Blob([e],{type:"application/javascript"});return URL.createObjectURL(t)}async initializeAudio(){this.audioContext=new AudioContext({sampleRate:this.inputSampleRate});const e=this.createWorkletBlobUrl(l),t=this.createWorkletBlobUrl(p);try{await Promise.all([this.audioContext.audioWorklet.addModule(e),this.audioContext.audioWorklet.addModule(t)])}finally{URL.revokeObjectURL(e),URL.revokeObjectURL(t)}this.playbackWorkletNode=new AudioWorkletNode(this.audioContext,"audio-playback-processor"),this.playbackWorkletNode.connect(this.audioContext.destination),this.playbackWorkletNode.port.onmessage=s=>{const{type:i}=s.data;(i==="cleared"||i==="bufferEmpty")&&(this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!1}))}}handleTextMessage(e){try{const t=JSON.parse(e);switch(t.type){case"Welcome":this.emit({type:"ready"});break;case"SettingsApplied":break;case"UserStartedSpeaking":this.emit({type:"user_speaking",payload:!0}),this.clearPlaybackBuffer(),this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1;break;case"UserStoppedSpeaking":this.emit({type:"user_speaking",payload:!1});break;case"ConversationText":if(t.content&&t.content.trim()){const s=t.role==="assistant"?"agent":"user";this.config.onTranscript&&this.config.onTranscript(t.content,s),this.emit({type:"transcript",payload:{text:t.content,role:s}}),this.saveTranscript(t.content,t.role)}break;case"AgentStartedSpeaking":this.isAgentSpeaking=!0,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!0});break;case"AgentAudioDone":this.agentAudioDoneReceived=!0;break;case"Error":console.error("[GlydeVoice] Agent error:",t),this.emit({type:"error",payload:t});break;case"FunctionCallRequest":this.handleFunctionCallRequest(t);break}}catch(t){console.error("[GlydeVoice] Failed to parse message:",t)}}async handleAudioData(e){const t=await e.arrayBuffer();this.handleAudioBuffer(t)}handleAudioBuffer(e){if(!this.playbackWorkletNode||!this.audioContext)return;this.audioContext.state==="suspended"&&this.audioContext.resume();const t=e.byteLength;if(t===0)return;const s=t-t%2;if(s===0)return;const i=s===t?e:e.slice(0,s),o=new Int16Array(i),n=new Float32Array(o.length);for(let r=0;r<o.length;r++)n[r]=o[r]/32768;const d=this.resample24kTo48k(n);!this.isAgentSpeaking&&!this.agentAudioDoneReceived&&(this.isAgentSpeaking=!0,this.emit({type:"agent_speaking",payload:!0}));const u=new Float32Array(d);this.playbackWorkletNode.port.postMessage({type:"audio",data:u},[u.buffer])}resample24kTo48k(e){const t=e.length*2,s=new Float32Array(t);for(let o=0;o<e.length-1;o++){const n=e[o],d=e[o+1];s[o*2]=n,s[o*2+1]=(n+d)/2}const i=e.length-1;return s[i*2]=e[i],s[i*2+1]=e[i],s}clearPlaybackBuffer(){this.playbackWorkletNode&&this.playbackWorkletNode.port.postMessage({type:"clear"})}async startMicrophone(){if(!this.audioContext)throw new Error("Audio context not initialized");try{this.mediaStream=await navigator.mediaDevices.getUserMedia({audio:{channelCount:1,sampleRate:this.inputSampleRate,echoCancellation:!0,noiseSuppression:!0}});const e=this.audioContext.createMediaStreamSource(this.mediaStream);this.captureWorkletNode=new AudioWorkletNode(this.audioContext,"audio-capture-processor"),this.captureWorkletNode.port.onmessage=t=>{!this.active||!this.ws||this.ws.readyState!==WebSocket.OPEN||this.isMuted||this.ws.send(t.data)},e.connect(this.captureWorkletNode),this.emit({type:"microphone_ready"})}catch(e){throw console.error("[GlydeVoice] Microphone error:",e),e}}async saveTranscript(e,t){if(!(!this.config.contextId||!e))try{await fetch(`${this.unityUrl}/api/unity/voice/transcript`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({context_id:this.config.contextId,content:e,role:t==="assistant"?"assistant":"user"})})}catch{}}setMuted(e){this.isMuted=e}getMuted(){return this.isMuted}isActive(){return this.active}getServerConfig(){return this.serverConfig}stop(){this.active=!1,this.cleanup()}cleanup(){this.captureWorkletNode&&(this.captureWorkletNode.disconnect(),this.captureWorkletNode.port.close(),this.captureWorkletNode=null),this.playbackWorkletNode&&(this.playbackWorkletNode.disconnect(),this.playbackWorkletNode.port.close(),this.playbackWorkletNode=null),this.mediaStream&&(this.mediaStream.getTracks().forEach(e=>e.stop()),this.mediaStream=null),this.audioContext&&(this.audioContext.close(),this.audioContext=null),this.ws&&(this.ws.readyState===WebSocket.OPEN&&this.ws.close(),this.ws=null)}emit(e){this.config.onEvent&&this.config.onEvent(e)}renderUI(){if(!this.config.container)return;const e=typeof this.config.container=="string"?document.querySelector(this.config.container):this.config.container;e&&(e.innerHTML=`
|
|
134
134
|
<div style="padding: 20px; border: 1px solid #ccc; border-radius: 8px; background: #fff;">
|
|
135
135
|
<h3>Glyde Voice Agent</h3>
|
|
136
136
|
<p>Status: Active</p>
|
|
137
137
|
<p>Context: ${this.config.contextType}</p>
|
|
138
138
|
<button onclick="this.closest('div').remove()">Close</button>
|
|
139
139
|
</div>
|
|
140
|
-
`)}async handleFunctionCallRequest(e){console.log("[GlydeVoice] Function call request:",
|
|
140
|
+
`)}async handleFunctionCallRequest(e){for(const t of e.functions){console.log("[GlydeVoice] Function call request:",t.name,t.arguments);let s={};try{s=t.arguments?JSON.parse(t.arguments):{}}catch(n){console.warn("[GlydeVoice] Failed to parse function arguments:",n)}let i;try{t.name==="end_conversation"?i=await this.handleEndConversation(s):i=await this.executeVoiceFunction(t.name,t.id,s)}catch(n){console.error("[GlydeVoice] Function call error:",n),i=JSON.stringify({error:"Function execution failed",details:n instanceof Error?n.message:String(n)})}const o={type:"FunctionCallResponse",id:t.id,name:t.name,content:i};this.ws&&this.ws.readyState===WebSocket.OPEN?(this.ws.send(JSON.stringify(o)),console.log("[GlydeVoice] Function response sent:",t.name)):console.error("[GlydeVoice] Cannot send function response - WebSocket not open")}}async executeVoiceFunction(e,t,s){console.log("[GlydeVoice] Executing voice function via Unity API:",e);try{const i=await fetch(`${this.unityUrl}/api/unity/voice/function`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({function_name:e,function_call_id:t,input:s,context:{context_id:this.sessionContext.contextId,context_type:this.sessionContext.contextType,current_job_uuid:this.sessionContext.currentJobUuid}})});if(!i.ok){const n=await i.json().catch(()=>({}));throw new Error(n.error?.message||`Function call failed: ${i.status}`)}const o=await i.json();if(o.success&&o.data?.output)return typeof o.data.output=="string"?o.data.output:JSON.stringify(o.data.output);throw new Error("Invalid response from voice function endpoint")}catch(i){return console.error("[GlydeVoice] Voice function error:",i),JSON.stringify({success:!1,error:i instanceof Error?i.message:"Function execution failed",fallback_message:"I apologize, but I'm having trouble with that request right now. Is there something else I can help you with?"})}}async handleEndConversation(e){const t=e.item||"user request";return console.log(`[GlydeVoice] End conversation triggered by: ${t}`),setTimeout(()=>{this.stop()},2e3),JSON.stringify({success:!0,message:"Conversation ending. Say goodbye to the user.",trigger_phrase:t})}setSessionContext(e){this.sessionContext={...this.sessionContext,...e},console.log("[GlydeVoice] Session context updated:",{hasContextId:!!e.contextId,contextType:e.contextType,hasJobUuid:!!e.currentJobUuid})}}c.GlydeVoice=f,Object.defineProperty(c,Symbol.toStringTag,{value:"Module"})}));
|