@glydeunity/voice-sdk 1.3.2 → 1.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +77 -0
- package/dist/voice-sdk.es.js +166 -175
- package/dist/voice-sdk.umd.js +4 -76
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -43,6 +43,28 @@ export declare interface DeepgramAgentConfig {
|
|
|
43
43
|
};
|
|
44
44
|
}
|
|
45
45
|
|
|
46
|
+
/**
|
|
47
|
+
* Function call request from Deepgram Voice Agent
|
|
48
|
+
* @see https://developers.deepgram.com/docs/voice-agent-function-call-request
|
|
49
|
+
*/
|
|
50
|
+
export declare interface FunctionCallRequest {
|
|
51
|
+
type: 'FunctionCallRequest';
|
|
52
|
+
function_name: string;
|
|
53
|
+
function_call_id: string;
|
|
54
|
+
input: Record<string, unknown>;
|
|
55
|
+
client_side?: boolean;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Function call response to send back to Deepgram
|
|
60
|
+
* @see https://developers.deepgram.com/docs/voice-agent-function-call-response
|
|
61
|
+
*/
|
|
62
|
+
export declare interface FunctionCallResponse {
|
|
63
|
+
type: 'FunctionCallResponse';
|
|
64
|
+
function_call_id: string;
|
|
65
|
+
output: string;
|
|
66
|
+
}
|
|
67
|
+
|
|
46
68
|
/**
|
|
47
69
|
* GlydeVoice - Voice Agent Client
|
|
48
70
|
*
|
|
@@ -74,6 +96,7 @@ export declare class GlydeVoice {
|
|
|
74
96
|
private readonly inputSampleRate;
|
|
75
97
|
private isAgentSpeaking;
|
|
76
98
|
private agentAudioDoneReceived;
|
|
99
|
+
private sessionContext;
|
|
77
100
|
/**
|
|
78
101
|
* Create a new GlydeVoice instance
|
|
79
102
|
* @param config - Configuration options
|
|
@@ -168,6 +191,46 @@ export declare class GlydeVoice {
|
|
|
168
191
|
* Render a simple UI widget (optional)
|
|
169
192
|
*/
|
|
170
193
|
private renderUI;
|
|
194
|
+
/**
|
|
195
|
+
* Handle a function call request from Deepgram Voice Agent
|
|
196
|
+
* Routes function execution through the Unity voice function endpoint for proper authentication
|
|
197
|
+
*
|
|
198
|
+
* @param request - The function call request from Deepgram
|
|
199
|
+
* @see https://developers.deepgram.com/docs/voice-agents-function-calling
|
|
200
|
+
*/
|
|
201
|
+
private handleFunctionCallRequest;
|
|
202
|
+
/**
|
|
203
|
+
* Execute a voice function through the Unity API with proper authentication
|
|
204
|
+
* Uses the dedicated /api/unity/voice/function endpoint which handles
|
|
205
|
+
* publishable key authentication and data controls
|
|
206
|
+
*
|
|
207
|
+
* @param functionName - Name of the function to execute
|
|
208
|
+
* @param functionCallId - Unique ID for tracking
|
|
209
|
+
* @param input - Function input parameters
|
|
210
|
+
* @returns JSON string with function result
|
|
211
|
+
*/
|
|
212
|
+
private executeVoiceFunction;
|
|
213
|
+
/**
|
|
214
|
+
* Handle the end_conversation function locally
|
|
215
|
+
* Gracefully ends the voice session (no server call needed)
|
|
216
|
+
*
|
|
217
|
+
* @param input - Function parameters including the trigger phrase
|
|
218
|
+
* @returns Acknowledgment string for the agent
|
|
219
|
+
*/
|
|
220
|
+
private handleEndConversation;
|
|
221
|
+
/**
|
|
222
|
+
* Set session context for function calls
|
|
223
|
+
* Called internally after authentication to provide job context for function routing
|
|
224
|
+
* Context is passed to the backend voice function endpoint for proper data controls
|
|
225
|
+
*
|
|
226
|
+
* @param context - Session context with contextId, contextType, and job details
|
|
227
|
+
*/
|
|
228
|
+
setSessionContext(context: {
|
|
229
|
+
clientUuid?: string;
|
|
230
|
+
contextId?: string;
|
|
231
|
+
contextType?: VoiceContextType;
|
|
232
|
+
currentJobUuid?: string;
|
|
233
|
+
}): void;
|
|
171
234
|
}
|
|
172
235
|
|
|
173
236
|
/**
|
|
@@ -198,6 +261,20 @@ export declare interface GlydeVoiceConfig {
|
|
|
198
261
|
deepgramConfig?: DeepgramAgentConfig;
|
|
199
262
|
}
|
|
200
263
|
|
|
264
|
+
/**
|
|
265
|
+
* Job opportunity result from search
|
|
266
|
+
*/
|
|
267
|
+
export declare interface JobOpportunity {
|
|
268
|
+
job_uuid: string;
|
|
269
|
+
title: string;
|
|
270
|
+
location?: string;
|
|
271
|
+
department?: string;
|
|
272
|
+
salary_range?: string;
|
|
273
|
+
employment_type?: string;
|
|
274
|
+
description_snippet?: string;
|
|
275
|
+
match_score?: number;
|
|
276
|
+
}
|
|
277
|
+
|
|
201
278
|
/**
|
|
202
279
|
* MCP Tool definition for voice agent
|
|
203
280
|
*/
|
package/dist/voice-sdk.es.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
const
|
|
1
|
+
const p = `
|
|
2
2
|
class AudioCaptureProcessor extends AudioWorkletProcessor {
|
|
3
3
|
constructor() {
|
|
4
4
|
super();
|
|
@@ -151,6 +151,8 @@ class y {
|
|
|
151
151
|
// Agent state
|
|
152
152
|
isAgentSpeaking = !1;
|
|
153
153
|
agentAudioDoneReceived = !1;
|
|
154
|
+
// Session context for function calls (passed to backend for data controls)
|
|
155
|
+
sessionContext = {};
|
|
154
156
|
/**
|
|
155
157
|
* Create a new GlydeVoice instance
|
|
156
158
|
* @param config - Configuration options
|
|
@@ -174,16 +176,16 @@ class y {
|
|
|
174
176
|
* @returns Voice configuration including system prompt, tools, and Deepgram settings
|
|
175
177
|
*/
|
|
176
178
|
async fetchConfig() {
|
|
177
|
-
const e = `${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`, t = this.config.contextId ? `${e}/${this.config.contextId}` : e,
|
|
179
|
+
const e = `${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`, t = this.config.contextId ? `${e}/${this.config.contextId}` : e, o = await fetch(t, {
|
|
178
180
|
method: "GET",
|
|
179
181
|
headers: this.getAuthHeaders()
|
|
180
182
|
});
|
|
181
|
-
if (!
|
|
182
|
-
const
|
|
183
|
-
throw new Error(
|
|
183
|
+
if (!o.ok) {
|
|
184
|
+
const a = await o.json();
|
|
185
|
+
throw new Error(a.error?.message || a.message || "Failed to fetch voice config");
|
|
184
186
|
}
|
|
185
|
-
const { data:
|
|
186
|
-
return
|
|
187
|
+
const { data: i } = await o.json();
|
|
188
|
+
return i;
|
|
187
189
|
}
|
|
188
190
|
/**
|
|
189
191
|
* Initialize and start the voice session
|
|
@@ -204,85 +206,30 @@ class y {
|
|
|
204
206
|
body: JSON.stringify(e)
|
|
205
207
|
});
|
|
206
208
|
if (!t.ok) {
|
|
207
|
-
const
|
|
208
|
-
throw new Error(
|
|
209
|
+
const s = await t.json();
|
|
210
|
+
throw new Error(s.error?.message || s.message || "Failed to authenticate voice session");
|
|
209
211
|
}
|
|
210
|
-
const { data:
|
|
212
|
+
const { data: o } = await t.json(), { token: i, agent_config: a, deepgram_config: n } = o;
|
|
213
|
+
this.setSessionContext({
|
|
214
|
+
clientUuid: a?.client_uuid,
|
|
215
|
+
contextId: this.config.contextId,
|
|
216
|
+
contextType: this.config.contextType,
|
|
217
|
+
currentJobUuid: a?.job_uuid
|
|
218
|
+
});
|
|
219
|
+
const c = this.config.systemPrompt || a.instructions || this.serverConfig?.system_prompt || "You are a helpful AI assistant.";
|
|
211
220
|
await this.initializeAudio();
|
|
212
|
-
let
|
|
221
|
+
let l = "wss://agent.deepgram.com/v1/agent/converse";
|
|
213
222
|
const r = this.config.deepgramConfig || n || this.serverConfig?.deepgram_config;
|
|
214
223
|
if (r?.tags && r.tags.length > 0) {
|
|
215
|
-
const
|
|
216
|
-
r.tags.forEach((
|
|
224
|
+
const s = new URLSearchParams();
|
|
225
|
+
r.tags.forEach((h) => s.append("tag", h)), l += `?${s.toString()}`;
|
|
217
226
|
}
|
|
218
|
-
this.ws = new WebSocket(
|
|
219
|
-
const
|
|
220
|
-
think: {
|
|
221
|
-
provider: { type: "open_ai", model: "gpt-4.1-mini" },
|
|
222
|
-
functions: [
|
|
223
|
-
{
|
|
224
|
-
name: "end_conversation",
|
|
225
|
-
description: `You are an AI assistant that monitors conversations and ends them when specific stop phrases are detected.
|
|
226
|
-
|
|
227
|
-
Here is a list of phrases to listen for but not restricted to:
|
|
228
|
-
-stop
|
|
229
|
-
-shut up
|
|
230
|
-
-go away
|
|
231
|
-
-turn off
|
|
232
|
-
-stop listening
|
|
233
|
-
|
|
234
|
-
Before ending the conversation, always say a brief, polite goodbye such as "Goodbye!", "Take care!", or "Have a great day!".
|
|
235
|
-
|
|
236
|
-
When monitoring the conversation, pay close attention to any input that matches or closely resembles the phrases listed above. The matching should be case-insensitive and allow for minor variations or typos.
|
|
237
|
-
|
|
238
|
-
End the conversation immediately if:
|
|
239
|
-
1. The user's input exactly matches any phrase in the list.
|
|
240
|
-
2. The user's input is a close variation of any phrase in the list (e.g., "please shut up" instead of "shut up").
|
|
241
|
-
3. The user's input clearly expresses a desire to end the conversation, even if it doesn't use the exact phrases listed.`,
|
|
242
|
-
parameters: {
|
|
243
|
-
type: "object",
|
|
244
|
-
properties: {
|
|
245
|
-
item: { type: "string", description: "The phrase or text that triggered the end of conversation" }
|
|
246
|
-
},
|
|
247
|
-
required: ["item"]
|
|
248
|
-
}
|
|
249
|
-
},
|
|
250
|
-
{
|
|
251
|
-
name: "other_opportunities",
|
|
252
|
-
description: `You are an AI assistant that monitors conversations to identify whether the candidate should be informed about other job opportunities.
|
|
253
|
-
|
|
254
|
-
If the candidate appears to be a poor fit for any of the must have requirements, gently suggest that there are other job opportunities with the company and you could inform about other roles if they are interested. If they are not interested, you should continue with the conversation about the current role.
|
|
255
|
-
|
|
256
|
-
Here is a list of phrases to listen for but not restricted to:
|
|
257
|
-
-other opportunities
|
|
258
|
-
-other jobs
|
|
259
|
-
-other roles
|
|
260
|
-
-other job opportunities
|
|
261
|
-
-other job roles
|
|
262
|
-
-other job opportunities
|
|
263
|
-
|
|
264
|
-
When monitoring the conversation, pay close attention to any input that matches or closely resembles the phrases listed above. The matching should be case-insensitive and allow for minor variations or typos. Additionally monitor for input that suggests the candidate does not meet the criteria for the current role or if they'd like to know about urgent or immediate opportunities.
|
|
265
|
-
|
|
266
|
-
Suggest other opportunities if:
|
|
267
|
-
1. The user's input exactly matches any phrase in the list.
|
|
268
|
-
2. The user's input is a close variation of any phrase in the list (e.g., "please other opportunities" instead of "other opportunities").
|
|
269
|
-
3. The user's input clearly expresses a desire to end the conversation, even if it doesn't use the exact phrases listed.
|
|
270
|
-
4. The user's input clearly expresses a desire to know about urgent or immediate opportunities.
|
|
271
|
-
|
|
272
|
-
If the candidate is interested in other opportunities, you should call a GLYDE Unity MCP tool to identify other job openings.`,
|
|
273
|
-
parameters: {
|
|
274
|
-
type: "object",
|
|
275
|
-
properties: {
|
|
276
|
-
item: { type: "string", description: "The phrase or text that triggered the suggestion of other opportunities" }
|
|
277
|
-
},
|
|
278
|
-
required: ["item"]
|
|
279
|
-
}
|
|
280
|
-
}
|
|
281
|
-
]
|
|
282
|
-
},
|
|
227
|
+
this.ws = new WebSocket(l, ["bearer", i]), this.ws.onopen = () => {
|
|
228
|
+
const s = r || {
|
|
229
|
+
think: { provider: { type: "open_ai", model: "gpt-5-nano" } },
|
|
283
230
|
speak: { provider: { type: "deepgram", model: "aura-2-thalia-en" } },
|
|
284
231
|
listen: { provider: { type: "deepgram", version: "v2", model: "flux-general-en" } }
|
|
285
|
-
},
|
|
232
|
+
}, h = {
|
|
286
233
|
type: "Settings",
|
|
287
234
|
audio: {
|
|
288
235
|
input: {
|
|
@@ -297,97 +244,39 @@ If the candidate is interested in other opportunities, you should call a GLYDE U
|
|
|
297
244
|
},
|
|
298
245
|
agent: {
|
|
299
246
|
language: "en",
|
|
300
|
-
speak:
|
|
247
|
+
speak: s.speak || {
|
|
301
248
|
provider: { type: "deepgram", model: "aura-2-thalia-en" }
|
|
302
249
|
},
|
|
303
|
-
listen:
|
|
250
|
+
listen: s.listen || {
|
|
304
251
|
provider: { type: "deepgram", version: "v2", model: "flux-general-en" }
|
|
305
252
|
},
|
|
306
253
|
think: {
|
|
307
|
-
provider:
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
name: "end_conversation",
|
|
311
|
-
description: `You are an AI assistant that monitors conversations and ends them when specific stop phrases are detected.
|
|
312
|
-
|
|
313
|
-
Here is a list of phrases to listen for but not restricted to:
|
|
314
|
-
-stop
|
|
315
|
-
-shut up
|
|
316
|
-
-go away
|
|
317
|
-
-turn off
|
|
318
|
-
-stop listening
|
|
319
|
-
|
|
320
|
-
Before ending the conversation, always say a brief, polite goodbye such as "Goodbye!", "Take care!", or "Have a great day!".
|
|
321
|
-
|
|
322
|
-
When monitoring the conversation, pay close attention to any input that matches or closely resembles the phrases listed above. The matching should be case-insensitive and allow for minor variations or typos.
|
|
323
|
-
|
|
324
|
-
End the conversation immediately if:
|
|
325
|
-
1. The user's input exactly matches any phrase in the list.
|
|
326
|
-
2. The user's input is a close variation of any phrase in the list (e.g., "please shut up" instead of "shut up").
|
|
327
|
-
3. The user's input clearly expresses a desire to end the conversation, even if it doesn't use the exact phrases listed.`,
|
|
328
|
-
parameters: {
|
|
329
|
-
type: "object",
|
|
330
|
-
properties: {
|
|
331
|
-
item: { type: "string", description: "The phrase or text that triggered the end of conversation" }
|
|
332
|
-
},
|
|
333
|
-
required: ["item"]
|
|
334
|
-
}
|
|
335
|
-
},
|
|
336
|
-
{
|
|
337
|
-
name: "other_opportunities",
|
|
338
|
-
description: `You are an AI assistant that monitors conversations to identify whether the candidate should be informed about other job opportunities.
|
|
339
|
-
|
|
340
|
-
If the candidate appears to be a poor fit for any of the must have requirements, gently suggest that there are other job opportunities with the company and you could inform about other roles if they are interested. If they are not interested, you should continue with the conversation about the current role.
|
|
341
|
-
|
|
342
|
-
Here is a list of phrases to listen for but not restricted to:
|
|
343
|
-
-other opportunities
|
|
344
|
-
-other jobs
|
|
345
|
-
-other roles
|
|
346
|
-
-other job opportunities
|
|
347
|
-
-other job roles
|
|
348
|
-
-other job opportunities
|
|
349
|
-
|
|
350
|
-
When monitoring the conversation, pay close attention to any input that matches or closely resembles the phrases listed above. The matching should be case-insensitive and allow for minor variations or typos. Additionally monitor for input that suggests the candidate does not meet the criteria for the current role or if they'd like to know about urgent or immediate opportunities.
|
|
351
|
-
|
|
352
|
-
Suggest other opportunities if:
|
|
353
|
-
1. The user's input exactly matches any phrase in the list.
|
|
354
|
-
2. The user's input is a close variation of any phrase in the list (e.g., "please other opportunities" instead of "other opportunities").
|
|
355
|
-
3. The user's input clearly expresses a desire to end the conversation, even if it doesn't use the exact phrases listed.
|
|
356
|
-
4. The user's input clearly expresses a desire to know about urgent or immediate opportunities.
|
|
357
|
-
|
|
358
|
-
If the candidate is interested in other opportunities, you should call a GLYDE Unity MCP tool to identify other job openings.`,
|
|
359
|
-
parameters: {
|
|
360
|
-
type: "object",
|
|
361
|
-
properties: {
|
|
362
|
-
item: { type: "string", description: "The phrase or text that triggered the suggestion of other opportunities" }
|
|
363
|
-
},
|
|
364
|
-
required: ["item"]
|
|
365
|
-
}
|
|
366
|
-
}
|
|
367
|
-
]
|
|
254
|
+
provider: s.think?.provider || { type: "open_ai", model: "gpt-5-nano" },
|
|
255
|
+
// Functions come from server config - no client-side defaults
|
|
256
|
+
...s.think?.functions && { functions: s.think.functions }
|
|
368
257
|
},
|
|
369
258
|
greeting: "Hi! I'm excited you chose to speak with me. Are you ready to start?"
|
|
370
259
|
}
|
|
371
260
|
};
|
|
372
|
-
|
|
261
|
+
s.tags && s.tags.length > 0 && (h.tags = s.tags), this.ws.send(JSON.stringify(h)), this.emit({ type: "open", payload: { config: a, serverConfig: this.serverConfig } });
|
|
373
262
|
};
|
|
374
|
-
const
|
|
375
|
-
this.ws.onmessage = (
|
|
376
|
-
if (typeof
|
|
263
|
+
const d = c;
|
|
264
|
+
this.ws.onmessage = (s) => {
|
|
265
|
+
if (typeof s.data == "string") {
|
|
377
266
|
try {
|
|
378
|
-
if (JSON.parse(
|
|
379
|
-
const
|
|
267
|
+
if (JSON.parse(s.data).type === "SettingsApplied") {
|
|
268
|
+
const u = {
|
|
380
269
|
type: "UpdatePrompt",
|
|
381
|
-
prompt:
|
|
270
|
+
prompt: d
|
|
382
271
|
};
|
|
383
|
-
this.ws.send(JSON.stringify(
|
|
272
|
+
this.ws.send(JSON.stringify(u)), this.startMicrophone();
|
|
384
273
|
}
|
|
385
274
|
} catch {
|
|
386
275
|
}
|
|
387
|
-
this.handleTextMessage(
|
|
388
|
-
} else
|
|
389
|
-
}, this.ws.onerror = (
|
|
390
|
-
console.error("[GlydeVoice] WebSocket error:",
|
|
276
|
+
this.handleTextMessage(s.data);
|
|
277
|
+
} else s.data instanceof Blob ? this.handleAudioData(s.data) : s.data instanceof ArrayBuffer && this.handleAudioBuffer(s.data);
|
|
278
|
+
}, this.ws.onerror = (s) => {
|
|
279
|
+
console.error("[GlydeVoice] WebSocket error:", s), this.emit({ type: "error", payload: s });
|
|
391
280
|
}, this.ws.onclose = () => {
|
|
392
281
|
this.cleanup(), this.emit({ type: "close" });
|
|
393
282
|
}, this.renderUI();
|
|
@@ -412,7 +301,7 @@ If the candidate is interested in other opportunities, you should call a GLYDE U
|
|
|
412
301
|
*/
|
|
413
302
|
async initializeAudio() {
|
|
414
303
|
this.audioContext = new AudioContext({ sampleRate: this.inputSampleRate });
|
|
415
|
-
const e = this.createWorkletBlobUrl(
|
|
304
|
+
const e = this.createWorkletBlobUrl(p), t = this.createWorkletBlobUrl(f);
|
|
416
305
|
try {
|
|
417
306
|
await Promise.all([
|
|
418
307
|
this.audioContext.audioWorklet.addModule(e),
|
|
@@ -421,9 +310,9 @@ If the candidate is interested in other opportunities, you should call a GLYDE U
|
|
|
421
310
|
} finally {
|
|
422
311
|
URL.revokeObjectURL(e), URL.revokeObjectURL(t);
|
|
423
312
|
}
|
|
424
|
-
this.playbackWorkletNode = new AudioWorkletNode(this.audioContext, "audio-playback-processor"), this.playbackWorkletNode.connect(this.audioContext.destination), this.playbackWorkletNode.port.onmessage = (
|
|
425
|
-
const { type:
|
|
426
|
-
(
|
|
313
|
+
this.playbackWorkletNode = new AudioWorkletNode(this.audioContext, "audio-playback-processor"), this.playbackWorkletNode.connect(this.audioContext.destination), this.playbackWorkletNode.port.onmessage = (o) => {
|
|
314
|
+
const { type: i } = o.data;
|
|
315
|
+
(i === "cleared" || i === "bufferEmpty") && (this.isAgentSpeaking = !1, this.agentAudioDoneReceived = !1, this.emit({ type: "agent_speaking", payload: !1 }));
|
|
427
316
|
};
|
|
428
317
|
}
|
|
429
318
|
/**
|
|
@@ -446,8 +335,8 @@ If the candidate is interested in other opportunities, you should call a GLYDE U
|
|
|
446
335
|
break;
|
|
447
336
|
case "ConversationText":
|
|
448
337
|
if (t.content && t.content.trim()) {
|
|
449
|
-
const
|
|
450
|
-
this.config.onTranscript && this.config.onTranscript(t.content,
|
|
338
|
+
const o = t.role === "assistant" ? "agent" : "user";
|
|
339
|
+
this.config.onTranscript && this.config.onTranscript(t.content, o), this.emit({ type: "transcript", payload: { text: t.content, role: o } }), this.saveTranscript(t.content, t.role);
|
|
451
340
|
}
|
|
452
341
|
break;
|
|
453
342
|
case "AgentStartedSpeaking":
|
|
@@ -459,6 +348,9 @@ If the candidate is interested in other opportunities, you should call a GLYDE U
|
|
|
459
348
|
case "Error":
|
|
460
349
|
console.error("[GlydeVoice] Agent error:", t), this.emit({ type: "error", payload: t });
|
|
461
350
|
break;
|
|
351
|
+
case "FunctionCallRequest":
|
|
352
|
+
this.handleFunctionCallRequest(t);
|
|
353
|
+
break;
|
|
462
354
|
}
|
|
463
355
|
} catch (t) {
|
|
464
356
|
console.error("[GlydeVoice] Failed to parse message:", t);
|
|
@@ -480,30 +372,30 @@ If the candidate is interested in other opportunities, you should call a GLYDE U
|
|
|
480
372
|
this.audioContext.state === "suspended" && this.audioContext.resume();
|
|
481
373
|
const t = e.byteLength;
|
|
482
374
|
if (t === 0) return;
|
|
483
|
-
const
|
|
484
|
-
if (
|
|
485
|
-
const
|
|
486
|
-
for (let r = 0; r <
|
|
487
|
-
n[r] =
|
|
488
|
-
const
|
|
375
|
+
const o = t - t % 2;
|
|
376
|
+
if (o === 0) return;
|
|
377
|
+
const i = o === t ? e : e.slice(0, o), a = new Int16Array(i), n = new Float32Array(a.length);
|
|
378
|
+
for (let r = 0; r < a.length; r++)
|
|
379
|
+
n[r] = a[r] / 32768;
|
|
380
|
+
const c = this.resample24kTo48k(n);
|
|
489
381
|
!this.isAgentSpeaking && !this.agentAudioDoneReceived && (this.isAgentSpeaking = !0, this.emit({ type: "agent_speaking", payload: !0 }));
|
|
490
|
-
const
|
|
382
|
+
const l = new Float32Array(c);
|
|
491
383
|
this.playbackWorkletNode.port.postMessage({
|
|
492
384
|
type: "audio",
|
|
493
|
-
data:
|
|
494
|
-
}, [
|
|
385
|
+
data: l
|
|
386
|
+
}, [l.buffer]);
|
|
495
387
|
}
|
|
496
388
|
/**
|
|
497
389
|
* Resample audio from 24kHz to 48kHz using linear interpolation
|
|
498
390
|
*/
|
|
499
391
|
resample24kTo48k(e) {
|
|
500
|
-
const t = e.length * 2,
|
|
501
|
-
for (let
|
|
502
|
-
const n = e[
|
|
503
|
-
|
|
392
|
+
const t = e.length * 2, o = new Float32Array(t);
|
|
393
|
+
for (let a = 0; a < e.length - 1; a++) {
|
|
394
|
+
const n = e[a], c = e[a + 1];
|
|
395
|
+
o[a * 2] = n, o[a * 2 + 1] = (n + c) / 2;
|
|
504
396
|
}
|
|
505
|
-
const
|
|
506
|
-
return
|
|
397
|
+
const i = e.length - 1;
|
|
398
|
+
return o[i * 2] = e[i], o[i * 2 + 1] = e[i], o;
|
|
507
399
|
}
|
|
508
400
|
/**
|
|
509
401
|
* Clear the playback buffer (for interruption handling)
|
|
@@ -610,6 +502,105 @@ If the candidate is interested in other opportunities, you should call a GLYDE U
|
|
|
610
502
|
</div>
|
|
611
503
|
`);
|
|
612
504
|
}
|
|
505
|
+
/**
|
|
506
|
+
* Handle a function call request from Deepgram Voice Agent
|
|
507
|
+
* Routes function execution through the Unity voice function endpoint for proper authentication
|
|
508
|
+
*
|
|
509
|
+
* @param request - The function call request from Deepgram
|
|
510
|
+
* @see https://developers.deepgram.com/docs/voice-agents-function-calling
|
|
511
|
+
*/
|
|
512
|
+
async handleFunctionCallRequest(e) {
|
|
513
|
+
console.log("[GlydeVoice] Function call request:", e.function_name, e.input);
|
|
514
|
+
let t;
|
|
515
|
+
try {
|
|
516
|
+
e.function_name === "end_conversation" ? t = await this.handleEndConversation(e.input) : t = await this.executeVoiceFunction(e.function_name, e.function_call_id, e.input);
|
|
517
|
+
} catch (i) {
|
|
518
|
+
console.error("[GlydeVoice] Function call error:", i), t = JSON.stringify({
|
|
519
|
+
error: "Function execution failed",
|
|
520
|
+
details: i instanceof Error ? i.message : String(i)
|
|
521
|
+
});
|
|
522
|
+
}
|
|
523
|
+
const o = {
|
|
524
|
+
type: "FunctionCallResponse",
|
|
525
|
+
function_call_id: e.function_call_id,
|
|
526
|
+
output: t
|
|
527
|
+
};
|
|
528
|
+
this.ws && this.ws.readyState === WebSocket.OPEN ? (this.ws.send(JSON.stringify(o)), console.log("[GlydeVoice] Function response sent:", e.function_name)) : console.error("[GlydeVoice] Cannot send function response - WebSocket not open");
|
|
529
|
+
}
|
|
530
|
+
/**
|
|
531
|
+
* Execute a voice function through the Unity API with proper authentication
|
|
532
|
+
* Uses the dedicated /api/unity/voice/function endpoint which handles
|
|
533
|
+
* publishable key authentication and data controls
|
|
534
|
+
*
|
|
535
|
+
* @param functionName - Name of the function to execute
|
|
536
|
+
* @param functionCallId - Unique ID for tracking
|
|
537
|
+
* @param input - Function input parameters
|
|
538
|
+
* @returns JSON string with function result
|
|
539
|
+
*/
|
|
540
|
+
async executeVoiceFunction(e, t, o) {
|
|
541
|
+
console.log("[GlydeVoice] Executing voice function via Unity API:", e);
|
|
542
|
+
try {
|
|
543
|
+
const i = await fetch(`${this.unityUrl}/api/unity/voice/function`, {
|
|
544
|
+
method: "POST",
|
|
545
|
+
headers: this.getAuthHeaders(),
|
|
546
|
+
body: JSON.stringify({
|
|
547
|
+
function_name: e,
|
|
548
|
+
function_call_id: t,
|
|
549
|
+
input: o,
|
|
550
|
+
context: {
|
|
551
|
+
context_id: this.sessionContext.contextId,
|
|
552
|
+
context_type: this.sessionContext.contextType,
|
|
553
|
+
current_job_uuid: this.sessionContext.currentJobUuid
|
|
554
|
+
}
|
|
555
|
+
})
|
|
556
|
+
});
|
|
557
|
+
if (!i.ok) {
|
|
558
|
+
const n = await i.json().catch(() => ({}));
|
|
559
|
+
throw new Error(n.error?.message || `Function call failed: ${i.status}`);
|
|
560
|
+
}
|
|
561
|
+
const a = await i.json();
|
|
562
|
+
if (a.success && a.data?.output)
|
|
563
|
+
return typeof a.data.output == "string" ? a.data.output : JSON.stringify(a.data.output);
|
|
564
|
+
throw new Error("Invalid response from voice function endpoint");
|
|
565
|
+
} catch (i) {
|
|
566
|
+
return console.error("[GlydeVoice] Voice function error:", i), JSON.stringify({
|
|
567
|
+
success: !1,
|
|
568
|
+
error: i instanceof Error ? i.message : "Function execution failed",
|
|
569
|
+
fallback_message: "I apologize, but I'm having trouble with that request right now. Is there something else I can help you with?"
|
|
570
|
+
});
|
|
571
|
+
}
|
|
572
|
+
}
|
|
573
|
+
/**
|
|
574
|
+
* Handle the end_conversation function locally
|
|
575
|
+
* Gracefully ends the voice session (no server call needed)
|
|
576
|
+
*
|
|
577
|
+
* @param input - Function parameters including the trigger phrase
|
|
578
|
+
* @returns Acknowledgment string for the agent
|
|
579
|
+
*/
|
|
580
|
+
async handleEndConversation(e) {
|
|
581
|
+
const t = e.item || "user request";
|
|
582
|
+
return console.log(`[GlydeVoice] End conversation triggered by: ${t}`), setTimeout(() => {
|
|
583
|
+
this.stop();
|
|
584
|
+
}, 2e3), JSON.stringify({
|
|
585
|
+
success: !0,
|
|
586
|
+
message: "Conversation ending. Say goodbye to the user.",
|
|
587
|
+
trigger_phrase: t
|
|
588
|
+
});
|
|
589
|
+
}
|
|
590
|
+
/**
|
|
591
|
+
* Set session context for function calls
|
|
592
|
+
* Called internally after authentication to provide job context for function routing
|
|
593
|
+
* Context is passed to the backend voice function endpoint for proper data controls
|
|
594
|
+
*
|
|
595
|
+
* @param context - Session context with contextId, contextType, and job details
|
|
596
|
+
*/
|
|
597
|
+
setSessionContext(e) {
|
|
598
|
+
this.sessionContext = { ...this.sessionContext, ...e }, console.log("[GlydeVoice] Session context updated:", {
|
|
599
|
+
hasContextId: !!e.contextId,
|
|
600
|
+
contextType: e.contextType,
|
|
601
|
+
hasJobUuid: !!e.currentJobUuid
|
|
602
|
+
});
|
|
603
|
+
}
|
|
613
604
|
}
|
|
614
605
|
export {
|
|
615
606
|
y as GlydeVoice
|
package/dist/voice-sdk.umd.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
(function(
|
|
1
|
+
(function(c,l){typeof exports=="object"&&typeof module<"u"?l(exports):typeof define=="function"&&define.amd?define(["exports"],l):(c=typeof globalThis<"u"?globalThis:c||self,l(c.GlydeVoice={}))})(this,(function(c){"use strict";const l=`
|
|
2
2
|
class AudioCaptureProcessor extends AudioWorkletProcessor {
|
|
3
3
|
constructor() {
|
|
4
4
|
super();
|
|
@@ -33,7 +33,7 @@ class AudioCaptureProcessor extends AudioWorkletProcessor {
|
|
|
33
33
|
}
|
|
34
34
|
|
|
35
35
|
registerProcessor('audio-capture-processor', AudioCaptureProcessor);
|
|
36
|
-
`,
|
|
36
|
+
`,p=`
|
|
37
37
|
class AudioPlaybackProcessor extends AudioWorkletProcessor {
|
|
38
38
|
constructor() {
|
|
39
39
|
super();
|
|
@@ -130,83 +130,11 @@ class AudioPlaybackProcessor extends AudioWorkletProcessor {
|
|
|
130
130
|
}
|
|
131
131
|
|
|
132
132
|
registerProcessor('audio-playback-processor', AudioPlaybackProcessor);
|
|
133
|
-
`;class f{config;unityUrl;active=!1;serverConfig=null;ws=null;audioContext=null;mediaStream=null;captureWorkletNode=null;playbackWorkletNode=null;isMuted=!1;outputSampleRate=24e3;inputSampleRate=48e3;isAgentSpeaking=!1;agentAudioDoneReceived=!1;constructor(e){this.config=e,this.unityUrl=e.unityBaseUrl||"https://api.glydeunity.com",!e.publishableKey&&!e.apiKey&&!e.authToken&&console.warn("[GlydeVoice] No authentication method provided. One of publishableKey, apiKey, or authToken is required.")}getAuthHeaders(){const e={"Content-Type":"application/json"};return this.config.publishableKey&&(e["x-publishable-key"]=this.config.publishableKey),this.config.apiKey&&(e["x-api-key"]=this.config.apiKey),this.config.authToken&&(e.Authorization=`Bearer ${this.config.authToken}`),e}async fetchConfig(){const e=`${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`,t=this.config.contextId?`${e}/${this.config.contextId}`:e,
|
|
134
|
-
|
|
135
|
-
Here is a list of phrases to listen for but not restricted to:
|
|
136
|
-
-stop
|
|
137
|
-
-shut up
|
|
138
|
-
-go away
|
|
139
|
-
-turn off
|
|
140
|
-
-stop listening
|
|
141
|
-
|
|
142
|
-
Before ending the conversation, always say a brief, polite goodbye such as "Goodbye!", "Take care!", or "Have a great day!".
|
|
143
|
-
|
|
144
|
-
When monitoring the conversation, pay close attention to any input that matches or closely resembles the phrases listed above. The matching should be case-insensitive and allow for minor variations or typos.
|
|
145
|
-
|
|
146
|
-
End the conversation immediately if:
|
|
147
|
-
1. The user's input exactly matches any phrase in the list.
|
|
148
|
-
2. The user's input is a close variation of any phrase in the list (e.g., "please shut up" instead of "shut up").
|
|
149
|
-
3. The user's input clearly expresses a desire to end the conversation, even if it doesn't use the exact phrases listed.`,parameters:{type:"object",properties:{item:{type:"string",description:"The phrase or text that triggered the end of conversation"}},required:["item"]}},{name:"other_opportunities",description:`You are an AI assistant that monitors conversations to identify whether the candidate should be informed about other job opportunities.
|
|
150
|
-
|
|
151
|
-
If the candidate appears to be a poor fit for any of the must have requirements, gently suggest that there are other job opportunities with the company and you could inform about other roles if they are interested. If they are not interested, you should continue with the conversation about the current role.
|
|
152
|
-
|
|
153
|
-
Here is a list of phrases to listen for but not restricted to:
|
|
154
|
-
-other opportunities
|
|
155
|
-
-other jobs
|
|
156
|
-
-other roles
|
|
157
|
-
-other job opportunities
|
|
158
|
-
-other job roles
|
|
159
|
-
-other job opportunities
|
|
160
|
-
|
|
161
|
-
When monitoring the conversation, pay close attention to any input that matches or closely resembles the phrases listed above. The matching should be case-insensitive and allow for minor variations or typos. Additionally monitor for input that suggests the candidate does not meet the criteria for the current role or if they'd like to know about urgent or immediate opportunities.
|
|
162
|
-
|
|
163
|
-
Suggest other opportunities if:
|
|
164
|
-
1. The user's input exactly matches any phrase in the list.
|
|
165
|
-
2. The user's input is a close variation of any phrase in the list (e.g., "please other opportunities" instead of "other opportunities").
|
|
166
|
-
3. The user's input clearly expresses a desire to end the conversation, even if it doesn't use the exact phrases listed.
|
|
167
|
-
4. The user's input clearly expresses a desire to know about urgent or immediate opportunities.
|
|
168
|
-
|
|
169
|
-
If the candidate is interested in other opportunities, you should call a GLYDE Unity MCP tool to identify other job openings.`,parameters:{type:"object",properties:{item:{type:"string",description:"The phrase or text that triggered the suggestion of other opportunities"}},required:["item"]}}]},speak:{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}}},d={type:"Settings",audio:{input:{encoding:"linear16",sample_rate:this.inputSampleRate},output:{encoding:"linear16",sample_rate:this.outputSampleRate,container:"none"}},agent:{language:"en",speak:i.speak||{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:i.listen||{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}},think:{provider:i.think?.provider||{type:"open_ai",model:"gpt-4.1-mini"},functions:i.think?.functions||[{name:"end_conversation",description:`You are an AI assistant that monitors conversations and ends them when specific stop phrases are detected.
|
|
170
|
-
|
|
171
|
-
Here is a list of phrases to listen for but not restricted to:
|
|
172
|
-
-stop
|
|
173
|
-
-shut up
|
|
174
|
-
-go away
|
|
175
|
-
-turn off
|
|
176
|
-
-stop listening
|
|
177
|
-
|
|
178
|
-
Before ending the conversation, always say a brief, polite goodbye such as "Goodbye!", "Take care!", or "Have a great day!".
|
|
179
|
-
|
|
180
|
-
When monitoring the conversation, pay close attention to any input that matches or closely resembles the phrases listed above. The matching should be case-insensitive and allow for minor variations or typos.
|
|
181
|
-
|
|
182
|
-
End the conversation immediately if:
|
|
183
|
-
1. The user's input exactly matches any phrase in the list.
|
|
184
|
-
2. The user's input is a close variation of any phrase in the list (e.g., "please shut up" instead of "shut up").
|
|
185
|
-
3. The user's input clearly expresses a desire to end the conversation, even if it doesn't use the exact phrases listed.`,parameters:{type:"object",properties:{item:{type:"string",description:"The phrase or text that triggered the end of conversation"}},required:["item"]}},{name:"other_opportunities",description:`You are an AI assistant that monitors conversations to identify whether the candidate should be informed about other job opportunities.
|
|
186
|
-
|
|
187
|
-
If the candidate appears to be a poor fit for any of the must have requirements, gently suggest that there are other job opportunities with the company and you could inform about other roles if they are interested. If they are not interested, you should continue with the conversation about the current role.
|
|
188
|
-
|
|
189
|
-
Here is a list of phrases to listen for but not restricted to:
|
|
190
|
-
-other opportunities
|
|
191
|
-
-other jobs
|
|
192
|
-
-other roles
|
|
193
|
-
-other job opportunities
|
|
194
|
-
-other job roles
|
|
195
|
-
-other job opportunities
|
|
196
|
-
|
|
197
|
-
When monitoring the conversation, pay close attention to any input that matches or closely resembles the phrases listed above. The matching should be case-insensitive and allow for minor variations or typos. Additionally monitor for input that suggests the candidate does not meet the criteria for the current role or if they'd like to know about urgent or immediate opportunities.
|
|
198
|
-
|
|
199
|
-
Suggest other opportunities if:
|
|
200
|
-
1. The user's input exactly matches any phrase in the list.
|
|
201
|
-
2. The user's input is a close variation of any phrase in the list (e.g., "please other opportunities" instead of "other opportunities").
|
|
202
|
-
3. The user's input clearly expresses a desire to end the conversation, even if it doesn't use the exact phrases listed.
|
|
203
|
-
4. The user's input clearly expresses a desire to know about urgent or immediate opportunities.
|
|
204
|
-
|
|
205
|
-
If the candidate is interested in other opportunities, you should call a GLYDE Unity MCP tool to identify other job openings.`,parameters:{type:"object",properties:{item:{type:"string",description:"The phrase or text that triggered the suggestion of other opportunities"}},required:["item"]}}]},greeting:"Hi! I'm excited you chose to speak with me. Are you ready to start?"}};i.tags&&i.tags.length>0&&(d.tags=i.tags),this.ws.send(JSON.stringify(d)),this.emit({type:"open",payload:{config:o,serverConfig:this.serverConfig}})};const g=p;this.ws.onmessage=i=>{if(typeof i.data=="string"){try{if(JSON.parse(i.data).type==="SettingsApplied"){const y={type:"UpdatePrompt",prompt:g};this.ws.send(JSON.stringify(y)),this.startMicrophone()}}catch{}this.handleTextMessage(i.data)}else i.data instanceof Blob?this.handleAudioData(i.data):i.data instanceof ArrayBuffer&&this.handleAudioBuffer(i.data)},this.ws.onerror=i=>{console.error("[GlydeVoice] WebSocket error:",i),this.emit({type:"error",payload:i})},this.ws.onclose=()=>{this.cleanup(),this.emit({type:"close"})},this.renderUI()}catch(e){throw console.error("[GlydeVoice] Error starting session:",e),this.active=!1,this.emit({type:"error",payload:e}),e}}}createWorkletBlobUrl(e){const t=new Blob([e],{type:"application/javascript"});return URL.createObjectURL(t)}async initializeAudio(){this.audioContext=new AudioContext({sampleRate:this.inputSampleRate});const e=this.createWorkletBlobUrl(l),t=this.createWorkletBlobUrl(u);try{await Promise.all([this.audioContext.audioWorklet.addModule(e),this.audioContext.audioWorklet.addModule(t)])}finally{URL.revokeObjectURL(e),URL.revokeObjectURL(t)}this.playbackWorkletNode=new AudioWorkletNode(this.audioContext,"audio-playback-processor"),this.playbackWorkletNode.connect(this.audioContext.destination),this.playbackWorkletNode.port.onmessage=s=>{const{type:a}=s.data;(a==="cleared"||a==="bufferEmpty")&&(this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!1}))}}handleTextMessage(e){try{const t=JSON.parse(e);switch(t.type){case"Welcome":this.emit({type:"ready"});break;case"SettingsApplied":break;case"UserStartedSpeaking":this.emit({type:"user_speaking",payload:!0}),this.clearPlaybackBuffer(),this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1;break;case"UserStoppedSpeaking":this.emit({type:"user_speaking",payload:!1});break;case"ConversationText":if(t.content&&t.content.trim()){const s=t.role==="assistant"?"agent":"user";this.config.onTranscript&&this.config.onTranscript(t.content,s),this.emit({type:"transcript",payload:{text:t.content,role:s}}),this.saveTranscript(t.content,t.role)}break;case"AgentStartedSpeaking":this.isAgentSpeaking=!0,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!0});break;case"AgentAudioDone":this.agentAudioDoneReceived=!0;break;case"Error":console.error("[GlydeVoice] Agent error:",t),this.emit({type:"error",payload:t});break}}catch(t){console.error("[GlydeVoice] Failed to parse message:",t)}}async handleAudioData(e){const t=await e.arrayBuffer();this.handleAudioBuffer(t)}handleAudioBuffer(e){if(!this.playbackWorkletNode||!this.audioContext)return;this.audioContext.state==="suspended"&&this.audioContext.resume();const t=e.byteLength;if(t===0)return;const s=t-t%2;if(s===0)return;const a=s===t?e:e.slice(0,s),o=new Int16Array(a),n=new Float32Array(o.length);for(let r=0;r<o.length;r++)n[r]=o[r]/32768;const p=this.resample24kTo48k(n);!this.isAgentSpeaking&&!this.agentAudioDoneReceived&&(this.isAgentSpeaking=!0,this.emit({type:"agent_speaking",payload:!0}));const c=new Float32Array(p);this.playbackWorkletNode.port.postMessage({type:"audio",data:c},[c.buffer])}resample24kTo48k(e){const t=e.length*2,s=new Float32Array(t);for(let o=0;o<e.length-1;o++){const n=e[o],p=e[o+1];s[o*2]=n,s[o*2+1]=(n+p)/2}const a=e.length-1;return s[a*2]=e[a],s[a*2+1]=e[a],s}clearPlaybackBuffer(){this.playbackWorkletNode&&this.playbackWorkletNode.port.postMessage({type:"clear"})}async startMicrophone(){if(!this.audioContext)throw new Error("Audio context not initialized");try{this.mediaStream=await navigator.mediaDevices.getUserMedia({audio:{channelCount:1,sampleRate:this.inputSampleRate,echoCancellation:!0,noiseSuppression:!0}});const e=this.audioContext.createMediaStreamSource(this.mediaStream);this.captureWorkletNode=new AudioWorkletNode(this.audioContext,"audio-capture-processor"),this.captureWorkletNode.port.onmessage=t=>{!this.active||!this.ws||this.ws.readyState!==WebSocket.OPEN||this.isMuted||this.ws.send(t.data)},e.connect(this.captureWorkletNode),this.emit({type:"microphone_ready"})}catch(e){throw console.error("[GlydeVoice] Microphone error:",e),e}}async saveTranscript(e,t){if(!(!this.config.contextId||!e))try{await fetch(`${this.unityUrl}/api/unity/voice/transcript`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({context_id:this.config.contextId,content:e,role:t==="assistant"?"assistant":"user"})})}catch{}}setMuted(e){this.isMuted=e}getMuted(){return this.isMuted}isActive(){return this.active}getServerConfig(){return this.serverConfig}stop(){this.active=!1,this.cleanup()}cleanup(){this.captureWorkletNode&&(this.captureWorkletNode.disconnect(),this.captureWorkletNode.port.close(),this.captureWorkletNode=null),this.playbackWorkletNode&&(this.playbackWorkletNode.disconnect(),this.playbackWorkletNode.port.close(),this.playbackWorkletNode=null),this.mediaStream&&(this.mediaStream.getTracks().forEach(e=>e.stop()),this.mediaStream=null),this.audioContext&&(this.audioContext.close(),this.audioContext=null),this.ws&&(this.ws.readyState===WebSocket.OPEN&&this.ws.close(),this.ws=null)}emit(e){this.config.onEvent&&this.config.onEvent(e)}renderUI(){if(!this.config.container)return;const e=typeof this.config.container=="string"?document.querySelector(this.config.container):this.config.container;e&&(e.innerHTML=`
|
|
133
|
+
`;class f{config;unityUrl;active=!1;serverConfig=null;ws=null;audioContext=null;mediaStream=null;captureWorkletNode=null;playbackWorkletNode=null;isMuted=!1;outputSampleRate=24e3;inputSampleRate=48e3;isAgentSpeaking=!1;agentAudioDoneReceived=!1;sessionContext={};constructor(e){this.config=e,this.unityUrl=e.unityBaseUrl||"https://api.glydeunity.com",!e.publishableKey&&!e.apiKey&&!e.authToken&&console.warn("[GlydeVoice] No authentication method provided. One of publishableKey, apiKey, or authToken is required.")}getAuthHeaders(){const e={"Content-Type":"application/json"};return this.config.publishableKey&&(e["x-publishable-key"]=this.config.publishableKey),this.config.apiKey&&(e["x-api-key"]=this.config.apiKey),this.config.authToken&&(e.Authorization=`Bearer ${this.config.authToken}`),e}async fetchConfig(){const e=`${this.unityUrl}/api/unity/voice/config/${this.config.contextType}`,t=this.config.contextId?`${e}/${this.config.contextId}`:e,o=await fetch(t,{method:"GET",headers:this.getAuthHeaders()});if(!o.ok){const a=await o.json();throw new Error(a.error?.message||a.message||"Failed to fetch voice config")}const{data:i}=await o.json();return i}async start(){if(!this.active){this.active=!0;try{this.config.systemPrompt||(this.serverConfig=await this.fetchConfig(),console.log("[GlydeVoice] Fetched config:",this.serverConfig));const e={context_id:this.config.contextId,domain:typeof window<"u"?window.location.hostname:"localhost"};this.config.systemPrompt&&(e.system_prompt=this.config.systemPrompt),this.config.deepgramConfig&&(e.deepgram_config=this.config.deepgramConfig);const t=await fetch(`${this.unityUrl}/api/unity/voice/auth`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify(e)});if(!t.ok){const s=await t.json();throw new Error(s.error?.message||s.message||"Failed to authenticate voice session")}const{data:o}=await t.json(),{token:i,agent_config:a,deepgram_config:n}=o;this.setSessionContext({clientUuid:a?.client_uuid,contextId:this.config.contextId,contextType:this.config.contextType,currentJobUuid:a?.job_uuid});const d=this.config.systemPrompt||a.instructions||this.serverConfig?.system_prompt||"You are a helpful AI assistant.";await this.initializeAudio();let h="wss://agent.deepgram.com/v1/agent/converse";const r=this.config.deepgramConfig||n||this.serverConfig?.deepgram_config;if(r?.tags&&r.tags.length>0){const s=new URLSearchParams;r.tags.forEach(u=>s.append("tag",u)),h+=`?${s.toString()}`}this.ws=new WebSocket(h,["bearer",i]),this.ws.onopen=()=>{const s=r||{think:{provider:{type:"open_ai",model:"gpt-5-nano"}},speak:{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}}},u={type:"Settings",audio:{input:{encoding:"linear16",sample_rate:this.inputSampleRate},output:{encoding:"linear16",sample_rate:this.outputSampleRate,container:"none"}},agent:{language:"en",speak:s.speak||{provider:{type:"deepgram",model:"aura-2-thalia-en"}},listen:s.listen||{provider:{type:"deepgram",version:"v2",model:"flux-general-en"}},think:{provider:s.think?.provider||{type:"open_ai",model:"gpt-5-nano"},...s.think?.functions&&{functions:s.think.functions}},greeting:"Hi! I'm excited you chose to speak with me. Are you ready to start?"}};s.tags&&s.tags.length>0&&(u.tags=s.tags),this.ws.send(JSON.stringify(u)),this.emit({type:"open",payload:{config:a,serverConfig:this.serverConfig}})};const g=d;this.ws.onmessage=s=>{if(typeof s.data=="string"){try{if(JSON.parse(s.data).type==="SettingsApplied"){const y={type:"UpdatePrompt",prompt:g};this.ws.send(JSON.stringify(y)),this.startMicrophone()}}catch{}this.handleTextMessage(s.data)}else s.data instanceof Blob?this.handleAudioData(s.data):s.data instanceof ArrayBuffer&&this.handleAudioBuffer(s.data)},this.ws.onerror=s=>{console.error("[GlydeVoice] WebSocket error:",s),this.emit({type:"error",payload:s})},this.ws.onclose=()=>{this.cleanup(),this.emit({type:"close"})},this.renderUI()}catch(e){throw console.error("[GlydeVoice] Error starting session:",e),this.active=!1,this.emit({type:"error",payload:e}),e}}}createWorkletBlobUrl(e){const t=new Blob([e],{type:"application/javascript"});return URL.createObjectURL(t)}async initializeAudio(){this.audioContext=new AudioContext({sampleRate:this.inputSampleRate});const e=this.createWorkletBlobUrl(l),t=this.createWorkletBlobUrl(p);try{await Promise.all([this.audioContext.audioWorklet.addModule(e),this.audioContext.audioWorklet.addModule(t)])}finally{URL.revokeObjectURL(e),URL.revokeObjectURL(t)}this.playbackWorkletNode=new AudioWorkletNode(this.audioContext,"audio-playback-processor"),this.playbackWorkletNode.connect(this.audioContext.destination),this.playbackWorkletNode.port.onmessage=o=>{const{type:i}=o.data;(i==="cleared"||i==="bufferEmpty")&&(this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!1}))}}handleTextMessage(e){try{const t=JSON.parse(e);switch(t.type){case"Welcome":this.emit({type:"ready"});break;case"SettingsApplied":break;case"UserStartedSpeaking":this.emit({type:"user_speaking",payload:!0}),this.clearPlaybackBuffer(),this.isAgentSpeaking=!1,this.agentAudioDoneReceived=!1;break;case"UserStoppedSpeaking":this.emit({type:"user_speaking",payload:!1});break;case"ConversationText":if(t.content&&t.content.trim()){const o=t.role==="assistant"?"agent":"user";this.config.onTranscript&&this.config.onTranscript(t.content,o),this.emit({type:"transcript",payload:{text:t.content,role:o}}),this.saveTranscript(t.content,t.role)}break;case"AgentStartedSpeaking":this.isAgentSpeaking=!0,this.agentAudioDoneReceived=!1,this.emit({type:"agent_speaking",payload:!0});break;case"AgentAudioDone":this.agentAudioDoneReceived=!0;break;case"Error":console.error("[GlydeVoice] Agent error:",t),this.emit({type:"error",payload:t});break;case"FunctionCallRequest":this.handleFunctionCallRequest(t);break}}catch(t){console.error("[GlydeVoice] Failed to parse message:",t)}}async handleAudioData(e){const t=await e.arrayBuffer();this.handleAudioBuffer(t)}handleAudioBuffer(e){if(!this.playbackWorkletNode||!this.audioContext)return;this.audioContext.state==="suspended"&&this.audioContext.resume();const t=e.byteLength;if(t===0)return;const o=t-t%2;if(o===0)return;const i=o===t?e:e.slice(0,o),a=new Int16Array(i),n=new Float32Array(a.length);for(let r=0;r<a.length;r++)n[r]=a[r]/32768;const d=this.resample24kTo48k(n);!this.isAgentSpeaking&&!this.agentAudioDoneReceived&&(this.isAgentSpeaking=!0,this.emit({type:"agent_speaking",payload:!0}));const h=new Float32Array(d);this.playbackWorkletNode.port.postMessage({type:"audio",data:h},[h.buffer])}resample24kTo48k(e){const t=e.length*2,o=new Float32Array(t);for(let a=0;a<e.length-1;a++){const n=e[a],d=e[a+1];o[a*2]=n,o[a*2+1]=(n+d)/2}const i=e.length-1;return o[i*2]=e[i],o[i*2+1]=e[i],o}clearPlaybackBuffer(){this.playbackWorkletNode&&this.playbackWorkletNode.port.postMessage({type:"clear"})}async startMicrophone(){if(!this.audioContext)throw new Error("Audio context not initialized");try{this.mediaStream=await navigator.mediaDevices.getUserMedia({audio:{channelCount:1,sampleRate:this.inputSampleRate,echoCancellation:!0,noiseSuppression:!0}});const e=this.audioContext.createMediaStreamSource(this.mediaStream);this.captureWorkletNode=new AudioWorkletNode(this.audioContext,"audio-capture-processor"),this.captureWorkletNode.port.onmessage=t=>{!this.active||!this.ws||this.ws.readyState!==WebSocket.OPEN||this.isMuted||this.ws.send(t.data)},e.connect(this.captureWorkletNode),this.emit({type:"microphone_ready"})}catch(e){throw console.error("[GlydeVoice] Microphone error:",e),e}}async saveTranscript(e,t){if(!(!this.config.contextId||!e))try{await fetch(`${this.unityUrl}/api/unity/voice/transcript`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({context_id:this.config.contextId,content:e,role:t==="assistant"?"assistant":"user"})})}catch{}}setMuted(e){this.isMuted=e}getMuted(){return this.isMuted}isActive(){return this.active}getServerConfig(){return this.serverConfig}stop(){this.active=!1,this.cleanup()}cleanup(){this.captureWorkletNode&&(this.captureWorkletNode.disconnect(),this.captureWorkletNode.port.close(),this.captureWorkletNode=null),this.playbackWorkletNode&&(this.playbackWorkletNode.disconnect(),this.playbackWorkletNode.port.close(),this.playbackWorkletNode=null),this.mediaStream&&(this.mediaStream.getTracks().forEach(e=>e.stop()),this.mediaStream=null),this.audioContext&&(this.audioContext.close(),this.audioContext=null),this.ws&&(this.ws.readyState===WebSocket.OPEN&&this.ws.close(),this.ws=null)}emit(e){this.config.onEvent&&this.config.onEvent(e)}renderUI(){if(!this.config.container)return;const e=typeof this.config.container=="string"?document.querySelector(this.config.container):this.config.container;e&&(e.innerHTML=`
|
|
206
134
|
<div style="padding: 20px; border: 1px solid #ccc; border-radius: 8px; background: #fff;">
|
|
207
135
|
<h3>Glyde Voice Agent</h3>
|
|
208
136
|
<p>Status: Active</p>
|
|
209
137
|
<p>Context: ${this.config.contextType}</p>
|
|
210
138
|
<button onclick="this.closest('div').remove()">Close</button>
|
|
211
139
|
</div>
|
|
212
|
-
`)}}
|
|
140
|
+
`)}async handleFunctionCallRequest(e){console.log("[GlydeVoice] Function call request:",e.function_name,e.input);let t;try{e.function_name==="end_conversation"?t=await this.handleEndConversation(e.input):t=await this.executeVoiceFunction(e.function_name,e.function_call_id,e.input)}catch(i){console.error("[GlydeVoice] Function call error:",i),t=JSON.stringify({error:"Function execution failed",details:i instanceof Error?i.message:String(i)})}const o={type:"FunctionCallResponse",function_call_id:e.function_call_id,output:t};this.ws&&this.ws.readyState===WebSocket.OPEN?(this.ws.send(JSON.stringify(o)),console.log("[GlydeVoice] Function response sent:",e.function_name)):console.error("[GlydeVoice] Cannot send function response - WebSocket not open")}async executeVoiceFunction(e,t,o){console.log("[GlydeVoice] Executing voice function via Unity API:",e);try{const i=await fetch(`${this.unityUrl}/api/unity/voice/function`,{method:"POST",headers:this.getAuthHeaders(),body:JSON.stringify({function_name:e,function_call_id:t,input:o,context:{context_id:this.sessionContext.contextId,context_type:this.sessionContext.contextType,current_job_uuid:this.sessionContext.currentJobUuid}})});if(!i.ok){const n=await i.json().catch(()=>({}));throw new Error(n.error?.message||`Function call failed: ${i.status}`)}const a=await i.json();if(a.success&&a.data?.output)return typeof a.data.output=="string"?a.data.output:JSON.stringify(a.data.output);throw new Error("Invalid response from voice function endpoint")}catch(i){return console.error("[GlydeVoice] Voice function error:",i),JSON.stringify({success:!1,error:i instanceof Error?i.message:"Function execution failed",fallback_message:"I apologize, but I'm having trouble with that request right now. Is there something else I can help you with?"})}}async handleEndConversation(e){const t=e.item||"user request";return console.log(`[GlydeVoice] End conversation triggered by: ${t}`),setTimeout(()=>{this.stop()},2e3),JSON.stringify({success:!0,message:"Conversation ending. Say goodbye to the user.",trigger_phrase:t})}setSessionContext(e){this.sessionContext={...this.sessionContext,...e},console.log("[GlydeVoice] Session context updated:",{hasContextId:!!e.contextId,contextType:e.contextType,hasJobUuid:!!e.currentJobUuid})}}c.GlydeVoice=f,Object.defineProperty(c,Symbol.toStringTag,{value:"Module"})}));
|