@lokutor/sdk 1.1.9 → 1.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +48 -51
- package/dist/index.d.ts +48 -51
- package/dist/index.js +446 -445
- package/dist/index.mjs +446 -444
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -26,7 +26,6 @@ __export(index_exports, {
|
|
|
26
26
|
Language: () => Language,
|
|
27
27
|
StreamResampler: () => StreamResampler,
|
|
28
28
|
TTSClient: () => TTSClient,
|
|
29
|
-
VoiceAgent: () => VoiceAgent,
|
|
30
29
|
VoiceAgentClient: () => VoiceAgentClient,
|
|
31
30
|
VoiceStyle: () => VoiceStyle,
|
|
32
31
|
applyLowPassFilter: () => applyLowPassFilter,
|
|
@@ -79,289 +78,6 @@ var DEFAULT_URLS = {
|
|
|
79
78
|
TTS: "wss://api.lokutor.com/ws/tts"
|
|
80
79
|
};
|
|
81
80
|
|
|
82
|
-
// src/client.ts
|
|
83
|
-
function base64ToUint8Array(base64) {
|
|
84
|
-
const binaryString = atob(base64);
|
|
85
|
-
const bytes = new Uint8Array(binaryString.length);
|
|
86
|
-
for (let i = 0; i < binaryString.length; i++) {
|
|
87
|
-
bytes[i] = binaryString.charCodeAt(i);
|
|
88
|
-
}
|
|
89
|
-
return bytes;
|
|
90
|
-
}
|
|
91
|
-
var VoiceAgentClient = class {
|
|
92
|
-
ws = null;
|
|
93
|
-
apiKey;
|
|
94
|
-
prompt;
|
|
95
|
-
voice;
|
|
96
|
-
language;
|
|
97
|
-
// Callbacks
|
|
98
|
-
onTranscription;
|
|
99
|
-
onResponse;
|
|
100
|
-
onAudioCallback;
|
|
101
|
-
onVisemesCallback;
|
|
102
|
-
onStatus;
|
|
103
|
-
onError;
|
|
104
|
-
isConnected = false;
|
|
105
|
-
messages = [];
|
|
106
|
-
visemeListeners = [];
|
|
107
|
-
wantVisemes = false;
|
|
108
|
-
serverUrl = null;
|
|
109
|
-
constructor(config) {
|
|
110
|
-
this.apiKey = config.apiKey;
|
|
111
|
-
this.prompt = config.prompt;
|
|
112
|
-
this.voice = config.voice || "F1" /* F1 */;
|
|
113
|
-
this.language = config.language || "en" /* ENGLISH */;
|
|
114
|
-
this.serverUrl = config.serverUrl || null;
|
|
115
|
-
this.onTranscription = config.onTranscription;
|
|
116
|
-
this.onResponse = config.onResponse;
|
|
117
|
-
this.onAudioCallback = config.onAudio;
|
|
118
|
-
this.onVisemesCallback = config.onVisemes;
|
|
119
|
-
this.onStatus = config.onStatus;
|
|
120
|
-
this.onError = config.onError;
|
|
121
|
-
this.wantVisemes = config.visemes || false;
|
|
122
|
-
}
|
|
123
|
-
/**
|
|
124
|
-
* Connect to the Lokutor Voice Agent server
|
|
125
|
-
*/
|
|
126
|
-
async connect() {
|
|
127
|
-
return new Promise((resolve, reject) => {
|
|
128
|
-
try {
|
|
129
|
-
let url = this.serverUrl || DEFAULT_URLS.VOICE_AGENT;
|
|
130
|
-
if (this.apiKey) {
|
|
131
|
-
const separator = url.includes("?") ? "&" : "?";
|
|
132
|
-
url += `${separator}api_key=${this.apiKey}`;
|
|
133
|
-
}
|
|
134
|
-
console.log(`\u{1F517} Connecting to ${url}...`);
|
|
135
|
-
this.ws = new WebSocket(url);
|
|
136
|
-
this.ws.binaryType = "arraybuffer";
|
|
137
|
-
this.ws.onopen = () => {
|
|
138
|
-
this.isConnected = true;
|
|
139
|
-
console.log("\u2705 Connected to voice agent!");
|
|
140
|
-
this.sendConfig();
|
|
141
|
-
resolve(true);
|
|
142
|
-
};
|
|
143
|
-
this.ws.onmessage = async (event) => {
|
|
144
|
-
if (event.data instanceof ArrayBuffer) {
|
|
145
|
-
this.handleBinaryMessage(new Uint8Array(event.data));
|
|
146
|
-
} else {
|
|
147
|
-
this.handleTextMessage(event.data.toString());
|
|
148
|
-
}
|
|
149
|
-
};
|
|
150
|
-
this.ws.onerror = (err) => {
|
|
151
|
-
console.error("\u274C WebSocket error:", err);
|
|
152
|
-
if (this.onError) this.onError(err);
|
|
153
|
-
if (!this.isConnected) reject(err);
|
|
154
|
-
};
|
|
155
|
-
this.ws.onclose = () => {
|
|
156
|
-
this.isConnected = false;
|
|
157
|
-
console.log("Disconnected");
|
|
158
|
-
};
|
|
159
|
-
} catch (err) {
|
|
160
|
-
if (this.onError) this.onError(err);
|
|
161
|
-
reject(err);
|
|
162
|
-
}
|
|
163
|
-
});
|
|
164
|
-
}
|
|
165
|
-
/**
|
|
166
|
-
* Send initial configuration to the server
|
|
167
|
-
*/
|
|
168
|
-
sendConfig() {
|
|
169
|
-
if (!this.ws || !this.isConnected) return;
|
|
170
|
-
this.ws.send(JSON.stringify({ type: "prompt", data: this.prompt }));
|
|
171
|
-
this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
|
|
172
|
-
this.ws.send(JSON.stringify({ type: "language", data: this.language }));
|
|
173
|
-
this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
|
|
174
|
-
console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}`);
|
|
175
|
-
}
|
|
176
|
-
/**
|
|
177
|
-
* Send raw PCM audio data to the server
|
|
178
|
-
* @param audioData Int16 PCM audio buffer
|
|
179
|
-
*/
|
|
180
|
-
sendAudio(audioData) {
|
|
181
|
-
if (this.ws && this.isConnected) {
|
|
182
|
-
this.ws.send(audioData);
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
/**
|
|
186
|
-
* Handle incoming binary data (audio response)
|
|
187
|
-
*/
|
|
188
|
-
handleBinaryMessage(data) {
|
|
189
|
-
this.emit("audio", data);
|
|
190
|
-
}
|
|
191
|
-
/**
|
|
192
|
-
* Handle incoming text messages (metadata/transcriptions)
|
|
193
|
-
*/
|
|
194
|
-
handleTextMessage(text) {
|
|
195
|
-
try {
|
|
196
|
-
const msg = JSON.parse(text);
|
|
197
|
-
switch (msg.type) {
|
|
198
|
-
case "audio":
|
|
199
|
-
if (msg.data) {
|
|
200
|
-
const buffer = base64ToUint8Array(msg.data);
|
|
201
|
-
this.handleBinaryMessage(buffer);
|
|
202
|
-
}
|
|
203
|
-
break;
|
|
204
|
-
case "transcript":
|
|
205
|
-
const role = msg.role === "user" ? "user" : "agent";
|
|
206
|
-
this.messages.push({
|
|
207
|
-
role,
|
|
208
|
-
text: msg.data,
|
|
209
|
-
timestamp: Date.now()
|
|
210
|
-
});
|
|
211
|
-
if (msg.role === "user") {
|
|
212
|
-
if (this.onTranscription) this.onTranscription(msg.data);
|
|
213
|
-
console.log(`\u{1F4AC} You: ${msg.data}`);
|
|
214
|
-
} else {
|
|
215
|
-
if (this.onResponse) this.onResponse(msg.data);
|
|
216
|
-
console.log(`\u{1F916} Agent: ${msg.data}`);
|
|
217
|
-
}
|
|
218
|
-
break;
|
|
219
|
-
case "status":
|
|
220
|
-
if (this.onStatus) this.onStatus(msg.data);
|
|
221
|
-
const icons = {
|
|
222
|
-
"interrupted": "\u26A1",
|
|
223
|
-
"thinking": "\u{1F9E0}",
|
|
224
|
-
"speaking": "\u{1F50A}",
|
|
225
|
-
"listening": "\u{1F442}"
|
|
226
|
-
};
|
|
227
|
-
console.log(`${icons[msg.data] || ""} Status: ${msg.data}`);
|
|
228
|
-
break;
|
|
229
|
-
case "visemes":
|
|
230
|
-
if (Array.isArray(msg.data) && msg.data.length > 0) {
|
|
231
|
-
this.emit("visemes", msg.data);
|
|
232
|
-
}
|
|
233
|
-
break;
|
|
234
|
-
case "error":
|
|
235
|
-
if (this.onError) this.onError(msg.data);
|
|
236
|
-
console.error(`\u274C Server error: ${msg.data}`);
|
|
237
|
-
break;
|
|
238
|
-
}
|
|
239
|
-
} catch (e) {
|
|
240
|
-
}
|
|
241
|
-
}
|
|
242
|
-
audioListeners = [];
|
|
243
|
-
emit(event, data) {
|
|
244
|
-
if (event === "audio") {
|
|
245
|
-
if (this.onAudioCallback) this.onAudioCallback(data);
|
|
246
|
-
this.audioListeners.forEach((l) => l(data));
|
|
247
|
-
} else if (event === "visemes") {
|
|
248
|
-
if (this.onVisemesCallback) this.onVisemesCallback(data);
|
|
249
|
-
this.visemeListeners.forEach((l) => l(data));
|
|
250
|
-
}
|
|
251
|
-
}
|
|
252
|
-
onAudio(callback) {
|
|
253
|
-
this.audioListeners.push(callback);
|
|
254
|
-
}
|
|
255
|
-
onVisemes(callback) {
|
|
256
|
-
this.visemeListeners.push(callback);
|
|
257
|
-
}
|
|
258
|
-
/**
|
|
259
|
-
* Disconnect from the server
|
|
260
|
-
*/
|
|
261
|
-
disconnect() {
|
|
262
|
-
if (this.ws) {
|
|
263
|
-
this.ws.close();
|
|
264
|
-
this.ws = null;
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
|
-
/**
|
|
268
|
-
* Update the system prompt mid-conversation
|
|
269
|
-
*/
|
|
270
|
-
updatePrompt(newPrompt) {
|
|
271
|
-
this.prompt = newPrompt;
|
|
272
|
-
if (this.ws && this.isConnected) {
|
|
273
|
-
try {
|
|
274
|
-
this.ws.send(JSON.stringify({ type: "prompt", data: newPrompt }));
|
|
275
|
-
console.log(`\u2699\uFE0F Updated prompt: ${newPrompt.substring(0, 50)}...`);
|
|
276
|
-
} catch (error) {
|
|
277
|
-
console.error("Error updating prompt:", error);
|
|
278
|
-
}
|
|
279
|
-
} else {
|
|
280
|
-
console.warn("Not connected - prompt will be updated on next connection");
|
|
281
|
-
}
|
|
282
|
-
}
|
|
283
|
-
/**
|
|
284
|
-
* Get full conversation transcript
|
|
285
|
-
*/
|
|
286
|
-
getTranscript() {
|
|
287
|
-
return this.messages.slice();
|
|
288
|
-
}
|
|
289
|
-
/**
|
|
290
|
-
* Get conversation as formatted text
|
|
291
|
-
*/
|
|
292
|
-
getTranscriptText() {
|
|
293
|
-
return this.messages.map((msg) => `${msg.role === "user" ? "You" : "Agent"}: ${msg.text}`).join("\n");
|
|
294
|
-
}
|
|
295
|
-
};
|
|
296
|
-
var TTSClient = class {
|
|
297
|
-
apiKey;
|
|
298
|
-
constructor(config) {
|
|
299
|
-
this.apiKey = config.apiKey;
|
|
300
|
-
}
|
|
301
|
-
/**
|
|
302
|
-
* Synthesize text to speech
|
|
303
|
-
*
|
|
304
|
-
* This opens a temporary WebSocket connection, sends the request,
|
|
305
|
-
* and streams back the audio.
|
|
306
|
-
*/
|
|
307
|
-
synthesize(options) {
|
|
308
|
-
return new Promise((resolve, reject) => {
|
|
309
|
-
try {
|
|
310
|
-
let url = DEFAULT_URLS.TTS;
|
|
311
|
-
if (this.apiKey) {
|
|
312
|
-
const separator = url.includes("?") ? "&" : "?";
|
|
313
|
-
url += `${separator}api_key=${this.apiKey}`;
|
|
314
|
-
}
|
|
315
|
-
const ws = new WebSocket(url);
|
|
316
|
-
ws.binaryType = "arraybuffer";
|
|
317
|
-
ws.onopen = () => {
|
|
318
|
-
const req = {
|
|
319
|
-
text: options.text,
|
|
320
|
-
voice: options.voice || "F1" /* F1 */,
|
|
321
|
-
lang: options.language || "en" /* ENGLISH */,
|
|
322
|
-
speed: options.speed || 1.05,
|
|
323
|
-
steps: options.steps || 24,
|
|
324
|
-
visemes: options.visemes || false
|
|
325
|
-
};
|
|
326
|
-
ws.send(JSON.stringify(req));
|
|
327
|
-
};
|
|
328
|
-
ws.onmessage = async (event) => {
|
|
329
|
-
if (event.data instanceof ArrayBuffer) {
|
|
330
|
-
if (options.onAudio) options.onAudio(new Uint8Array(event.data));
|
|
331
|
-
} else {
|
|
332
|
-
try {
|
|
333
|
-
const msg = JSON.parse(event.data.toString());
|
|
334
|
-
if (Array.isArray(msg) && options.onVisemes) {
|
|
335
|
-
options.onVisemes(msg);
|
|
336
|
-
}
|
|
337
|
-
} catch (e) {
|
|
338
|
-
}
|
|
339
|
-
}
|
|
340
|
-
};
|
|
341
|
-
ws.onerror = (err) => {
|
|
342
|
-
if (options.onError) options.onError(err);
|
|
343
|
-
reject(err);
|
|
344
|
-
};
|
|
345
|
-
ws.onclose = () => {
|
|
346
|
-
resolve();
|
|
347
|
-
};
|
|
348
|
-
} catch (err) {
|
|
349
|
-
if (options.onError) options.onError(err);
|
|
350
|
-
reject(err);
|
|
351
|
-
}
|
|
352
|
-
});
|
|
353
|
-
}
|
|
354
|
-
};
|
|
355
|
-
async function simpleConversation(config) {
|
|
356
|
-
const client = new VoiceAgentClient(config);
|
|
357
|
-
await client.connect();
|
|
358
|
-
return client;
|
|
359
|
-
}
|
|
360
|
-
async function simpleTTS(options) {
|
|
361
|
-
const client = new TTSClient({ apiKey: options.apiKey });
|
|
362
|
-
return client.synthesize(options);
|
|
363
|
-
}
|
|
364
|
-
|
|
365
81
|
// src/audio-utils.ts
|
|
366
82
|
function pcm16ToFloat32(int16Data) {
|
|
367
83
|
const float32 = new Float32Array(int16Data.length);
|
|
@@ -501,11 +217,11 @@ var BrowserAudioManager = class {
|
|
|
501
217
|
scriptProcessor = null;
|
|
502
218
|
analyserNode = null;
|
|
503
219
|
mediaStream = null;
|
|
220
|
+
resampler = null;
|
|
504
221
|
// Playback scheduling
|
|
505
222
|
nextPlaybackTime = 0;
|
|
506
223
|
activeSources = [];
|
|
507
|
-
|
|
508
|
-
audioClockOffset = null;
|
|
224
|
+
playbackQueue = [];
|
|
509
225
|
// Configuration
|
|
510
226
|
inputSampleRate;
|
|
511
227
|
outputSampleRate;
|
|
@@ -518,7 +234,6 @@ var BrowserAudioManager = class {
|
|
|
518
234
|
// Audio processing state
|
|
519
235
|
isMuted = false;
|
|
520
236
|
isListening = false;
|
|
521
|
-
resampler = null;
|
|
522
237
|
constructor(config = {}) {
|
|
523
238
|
this.inputSampleRate = config.inputSampleRate ?? AUDIO_CONFIG.SAMPLE_RATE;
|
|
524
239
|
this.outputSampleRate = config.outputSampleRate ?? AUDIO_CONFIG.SPEAKER_SAMPLE_RATE;
|
|
@@ -542,6 +257,7 @@ var BrowserAudioManager = class {
|
|
|
542
257
|
}
|
|
543
258
|
if (this.audioContext.state === "suspended") {
|
|
544
259
|
await this.audioContext.resume();
|
|
260
|
+
console.log("\u{1F442} AudioContext resumed");
|
|
545
261
|
}
|
|
546
262
|
if (analyserConfig?.enabled !== false) {
|
|
547
263
|
this.analyserNode = this.audioContext.createAnalyser();
|
|
@@ -555,7 +271,6 @@ var BrowserAudioManager = class {
|
|
|
555
271
|
if (!this.audioContext) {
|
|
556
272
|
await this.init();
|
|
557
273
|
}
|
|
558
|
-
this.resampler = new StreamResampler(this.audioContext.sampleRate, this.inputSampleRate);
|
|
559
274
|
try {
|
|
560
275
|
this.onAudioInput = onAudioInput;
|
|
561
276
|
this.isListening = true;
|
|
@@ -571,13 +286,21 @@ var BrowserAudioManager = class {
|
|
|
571
286
|
this.scriptProcessor = this.audioContext.createScriptProcessor(
|
|
572
287
|
bufferSize,
|
|
573
288
|
1,
|
|
289
|
+
// input channels
|
|
574
290
|
1
|
|
291
|
+
// output channels
|
|
575
292
|
);
|
|
576
293
|
this.mediaStreamAudioSourceNode.connect(this.scriptProcessor);
|
|
577
294
|
this.scriptProcessor.connect(this.audioContext.destination);
|
|
578
295
|
if (this.analyserNode) {
|
|
579
296
|
this.mediaStreamAudioSourceNode.connect(this.analyserNode);
|
|
580
297
|
}
|
|
298
|
+
const hardwareRate = this.audioContext.sampleRate;
|
|
299
|
+
if (hardwareRate !== this.inputSampleRate) {
|
|
300
|
+
this.resampler = new StreamResampler(hardwareRate, this.inputSampleRate);
|
|
301
|
+
} else {
|
|
302
|
+
this.resampler = null;
|
|
303
|
+
}
|
|
581
304
|
this.scriptProcessor.onaudioprocess = (event) => {
|
|
582
305
|
this._processAudioInput(event);
|
|
583
306
|
};
|
|
@@ -588,19 +311,36 @@ var BrowserAudioManager = class {
|
|
|
588
311
|
throw err;
|
|
589
312
|
}
|
|
590
313
|
}
|
|
314
|
+
/**
|
|
315
|
+
* Internal method to process microphone audio data
|
|
316
|
+
*/
|
|
591
317
|
_processAudioInput(event) {
|
|
592
|
-
if (!this.onAudioInput || !this.audioContext || !this.isListening
|
|
593
|
-
|
|
594
|
-
event.
|
|
595
|
-
const
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
318
|
+
if (!this.onAudioInput || !this.audioContext || !this.isListening) return;
|
|
319
|
+
if (this.isMuted) return;
|
|
320
|
+
const inputBuffer = event.inputBuffer;
|
|
321
|
+
const inputData = inputBuffer.getChannelData(0);
|
|
322
|
+
const outputBuffer = event.outputBuffer;
|
|
323
|
+
for (let i = 0; i < outputBuffer.getChannelData(0).length; i++) {
|
|
324
|
+
outputBuffer.getChannelData(0)[i] = 0;
|
|
325
|
+
}
|
|
326
|
+
let processedData = new Float32Array(inputData);
|
|
327
|
+
if (this.resampler) {
|
|
328
|
+
processedData = this.resampler.process(processedData);
|
|
599
329
|
}
|
|
330
|
+
if (processedData.length === 0) return;
|
|
331
|
+
const int16Data = float32ToPcm16(processedData);
|
|
332
|
+
const uint8Data = new Uint8Array(
|
|
333
|
+
int16Data.buffer,
|
|
334
|
+
int16Data.byteOffset,
|
|
335
|
+
int16Data.byteLength
|
|
336
|
+
);
|
|
337
|
+
this.onAudioInput(uint8Data);
|
|
600
338
|
}
|
|
339
|
+
/**
|
|
340
|
+
* Stop capturing microphone input
|
|
341
|
+
*/
|
|
601
342
|
stopMicrophone() {
|
|
602
343
|
this.isListening = false;
|
|
603
|
-
this.resampler = null;
|
|
604
344
|
if (this.mediaStream) {
|
|
605
345
|
this.mediaStream.getTracks().forEach((track) => track.stop());
|
|
606
346
|
this.mediaStream = null;
|
|
@@ -613,12 +353,17 @@ var BrowserAudioManager = class {
|
|
|
613
353
|
this.mediaStreamAudioSourceNode.disconnect();
|
|
614
354
|
this.mediaStreamAudioSourceNode = null;
|
|
615
355
|
}
|
|
356
|
+
console.log("\u{1F3A4} Microphone stopped");
|
|
616
357
|
}
|
|
617
358
|
/**
|
|
618
359
|
* Play back audio received from the server
|
|
360
|
+
* @param pcm16Data Int16 PCM audio data at SPEAKER_SAMPLE_RATE
|
|
619
361
|
*/
|
|
620
362
|
playAudio(pcm16Data) {
|
|
621
|
-
if (!this.audioContext)
|
|
363
|
+
if (!this.audioContext) {
|
|
364
|
+
console.warn("AudioContext not initialized");
|
|
365
|
+
return;
|
|
366
|
+
}
|
|
622
367
|
const int16Array = new Int16Array(
|
|
623
368
|
pcm16Data.buffer,
|
|
624
369
|
pcm16Data.byteOffset,
|
|
@@ -633,17 +378,18 @@ var BrowserAudioManager = class {
|
|
|
633
378
|
audioBuffer.getChannelData(0).set(float32Data);
|
|
634
379
|
this._schedulePlayback(audioBuffer);
|
|
635
380
|
}
|
|
381
|
+
/**
|
|
382
|
+
* Internal method to schedule and play audio with sample-accurate timing
|
|
383
|
+
*/
|
|
636
384
|
_schedulePlayback(audioBuffer) {
|
|
637
385
|
if (!this.audioContext) return;
|
|
638
386
|
const currentTime = this.audioContext.currentTime;
|
|
639
387
|
const duration = audioBuffer.length / this.outputSampleRate;
|
|
640
388
|
const startTime = Math.max(
|
|
641
389
|
currentTime + 0.01,
|
|
390
|
+
// Minimum 10ms delay
|
|
642
391
|
this.nextPlaybackTime
|
|
643
392
|
);
|
|
644
|
-
if (this.audioClockOffset === null) {
|
|
645
|
-
this.audioClockOffset = startTime;
|
|
646
|
-
}
|
|
647
393
|
this.nextPlaybackTime = startTime + duration;
|
|
648
394
|
const source = this.audioContext.createBufferSource();
|
|
649
395
|
source.buffer = audioBuffer;
|
|
@@ -658,185 +404,441 @@ var BrowserAudioManager = class {
|
|
|
658
404
|
if (index > -1) {
|
|
659
405
|
this.activeSources.splice(index, 1);
|
|
660
406
|
}
|
|
661
|
-
};
|
|
407
|
+
};
|
|
408
|
+
}
|
|
409
|
+
/**
|
|
410
|
+
* Stop all currently playing audio and clear the queue
|
|
411
|
+
*/
|
|
412
|
+
stopPlayback() {
|
|
413
|
+
this.activeSources.forEach((source) => {
|
|
414
|
+
try {
|
|
415
|
+
source.stop();
|
|
416
|
+
} catch (e) {
|
|
417
|
+
}
|
|
418
|
+
});
|
|
419
|
+
this.activeSources = [];
|
|
420
|
+
this.playbackQueue = [];
|
|
421
|
+
this.nextPlaybackTime = this.audioContext?.currentTime ?? 0;
|
|
422
|
+
console.log("\u{1F507} Playback stopped");
|
|
423
|
+
}
|
|
424
|
+
/**
|
|
425
|
+
* Toggle mute state
|
|
426
|
+
*/
|
|
427
|
+
setMuted(muted) {
|
|
428
|
+
this.isMuted = muted;
|
|
429
|
+
}
|
|
430
|
+
/**
|
|
431
|
+
* Get current mute state
|
|
432
|
+
*/
|
|
433
|
+
isMicMuted() {
|
|
434
|
+
return this.isMuted;
|
|
435
|
+
}
|
|
436
|
+
/**
|
|
437
|
+
* Get current amplitude from analyser (for visualization)
|
|
438
|
+
* Returns value between 0 and 1
|
|
439
|
+
*/
|
|
440
|
+
getAmplitude() {
|
|
441
|
+
if (!this.analyserNode) return 0;
|
|
442
|
+
const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
|
|
443
|
+
this.analyserNode.getByteTimeDomainData(dataArray);
|
|
444
|
+
const rms = calculateRMS(dataArray);
|
|
445
|
+
return Math.min(rms * 10, 1);
|
|
446
|
+
}
|
|
447
|
+
/**
|
|
448
|
+
* Get frequency data from analyser for visualization
|
|
449
|
+
*/
|
|
450
|
+
getFrequencyData() {
|
|
451
|
+
if (!this.analyserNode) {
|
|
452
|
+
return new Uint8Array(0);
|
|
453
|
+
}
|
|
454
|
+
const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
|
|
455
|
+
this.analyserNode.getByteFrequencyData(dataArray);
|
|
456
|
+
return dataArray;
|
|
457
|
+
}
|
|
458
|
+
/**
|
|
459
|
+
* Get time-domain data from analyser for waveform visualization
|
|
460
|
+
*/
|
|
461
|
+
getWaveformData() {
|
|
462
|
+
if (!this.analyserNode) {
|
|
463
|
+
return new Uint8Array(0);
|
|
464
|
+
}
|
|
465
|
+
const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
|
|
466
|
+
this.analyserNode.getByteTimeDomainData(dataArray);
|
|
467
|
+
return dataArray;
|
|
468
|
+
}
|
|
469
|
+
/**
|
|
470
|
+
* Cleanup and close AudioContext
|
|
471
|
+
*/
|
|
472
|
+
cleanup() {
|
|
473
|
+
this.stopMicrophone();
|
|
474
|
+
this.stopPlayback();
|
|
475
|
+
if (this.analyserNode) {
|
|
476
|
+
this.analyserNode.disconnect();
|
|
477
|
+
this.analyserNode = null;
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
/**
|
|
481
|
+
* Get current audio context state
|
|
482
|
+
*/
|
|
483
|
+
getState() {
|
|
484
|
+
return this.audioContext?.state ?? null;
|
|
485
|
+
}
|
|
486
|
+
/**
|
|
487
|
+
* Check if microphone is currently listening
|
|
488
|
+
*/
|
|
489
|
+
isRecording() {
|
|
490
|
+
return this.isListening;
|
|
491
|
+
}
|
|
492
|
+
};
|
|
493
|
+
|
|
494
|
+
// src/client.ts
|
|
495
|
+
function base64ToUint8Array(base64) {
|
|
496
|
+
const binaryString = atob(base64);
|
|
497
|
+
const bytes = new Uint8Array(binaryString.length);
|
|
498
|
+
for (let i = 0; i < binaryString.length; i++) {
|
|
499
|
+
bytes[i] = binaryString.charCodeAt(i);
|
|
500
|
+
}
|
|
501
|
+
return bytes;
|
|
502
|
+
}
|
|
503
|
+
var VoiceAgentClient = class {
|
|
504
|
+
ws = null;
|
|
505
|
+
apiKey;
|
|
506
|
+
prompt;
|
|
507
|
+
voice;
|
|
508
|
+
language;
|
|
509
|
+
// Callbacks
|
|
510
|
+
onTranscription;
|
|
511
|
+
onResponse;
|
|
512
|
+
onAudioCallback;
|
|
513
|
+
onVisemesCallback;
|
|
514
|
+
onStatus;
|
|
515
|
+
onError;
|
|
516
|
+
isConnected = false;
|
|
517
|
+
messages = [];
|
|
518
|
+
visemeListeners = [];
|
|
519
|
+
wantVisemes = false;
|
|
520
|
+
audioManager = null;
|
|
521
|
+
enableAudio = false;
|
|
522
|
+
// Connection resilience
|
|
523
|
+
isUserDisconnect = false;
|
|
524
|
+
reconnecting = false;
|
|
525
|
+
reconnectAttempts = 0;
|
|
526
|
+
maxReconnectAttempts = 5;
|
|
527
|
+
constructor(config) {
|
|
528
|
+
this.apiKey = config.apiKey;
|
|
529
|
+
this.prompt = config.prompt;
|
|
530
|
+
this.voice = config.voice || "F1" /* F1 */;
|
|
531
|
+
this.language = config.language || "en" /* ENGLISH */;
|
|
532
|
+
this.onTranscription = config.onTranscription;
|
|
533
|
+
this.onResponse = config.onResponse;
|
|
534
|
+
this.onAudioCallback = config.onAudio;
|
|
535
|
+
this.onVisemesCallback = config.onVisemes;
|
|
536
|
+
this.onStatus = config.onStatus;
|
|
537
|
+
this.onError = config.onError;
|
|
538
|
+
this.wantVisemes = config.visemes || false;
|
|
539
|
+
this.enableAudio = config.enableAudio ?? false;
|
|
540
|
+
}
|
|
541
|
+
/**
|
|
542
|
+
* Connect to the Lokutor Voice Agent server
|
|
543
|
+
*/
|
|
544
|
+
async connect() {
|
|
545
|
+
this.isUserDisconnect = false;
|
|
546
|
+
if (this.enableAudio) {
|
|
547
|
+
if (!this.audioManager) {
|
|
548
|
+
this.audioManager = new BrowserAudioManager();
|
|
549
|
+
}
|
|
550
|
+
await this.audioManager.init();
|
|
551
|
+
}
|
|
552
|
+
return new Promise((resolve, reject) => {
|
|
553
|
+
try {
|
|
554
|
+
let url = DEFAULT_URLS.VOICE_AGENT;
|
|
555
|
+
if (this.apiKey) {
|
|
556
|
+
const separator = url.includes("?") ? "&" : "?";
|
|
557
|
+
url += `${separator}api_key=${this.apiKey}`;
|
|
558
|
+
}
|
|
559
|
+
console.log(`\u{1F517} Connecting to ${DEFAULT_URLS.VOICE_AGENT}...`);
|
|
560
|
+
this.ws = new WebSocket(url);
|
|
561
|
+
this.ws.binaryType = "arraybuffer";
|
|
562
|
+
this.ws.onopen = async () => {
|
|
563
|
+
this.isConnected = true;
|
|
564
|
+
this.reconnectAttempts = 0;
|
|
565
|
+
this.reconnecting = false;
|
|
566
|
+
console.log("\u2705 Connected to voice agent!");
|
|
567
|
+
this.sendConfig();
|
|
568
|
+
if (this.audioManager) {
|
|
569
|
+
await this.audioManager.startMicrophone((data) => {
|
|
570
|
+
if (this.isConnected) {
|
|
571
|
+
this.sendAudio(data);
|
|
572
|
+
}
|
|
573
|
+
});
|
|
574
|
+
}
|
|
575
|
+
resolve(true);
|
|
576
|
+
};
|
|
577
|
+
this.ws.onmessage = async (event) => {
|
|
578
|
+
if (event.data instanceof ArrayBuffer) {
|
|
579
|
+
this.handleBinaryMessage(new Uint8Array(event.data));
|
|
580
|
+
} else {
|
|
581
|
+
this.handleTextMessage(event.data.toString());
|
|
582
|
+
}
|
|
583
|
+
};
|
|
584
|
+
this.ws.onerror = (err) => {
|
|
585
|
+
console.error("\u274C WebSocket error:", err);
|
|
586
|
+
if (this.onError) this.onError(err);
|
|
587
|
+
if (!this.isConnected) reject(err);
|
|
588
|
+
};
|
|
589
|
+
this.ws.onclose = () => {
|
|
590
|
+
this.isConnected = false;
|
|
591
|
+
if (!this.isUserDisconnect && this.reconnectAttempts < this.maxReconnectAttempts) {
|
|
592
|
+
this.reconnecting = true;
|
|
593
|
+
this.reconnectAttempts++;
|
|
594
|
+
const backoffDelay = Math.min(1e3 * Math.pow(2, this.reconnectAttempts), 1e4);
|
|
595
|
+
console.warn(`Connection lost. Reconnecting in ${backoffDelay}ms (attempt ${this.reconnectAttempts}/${this.maxReconnectAttempts})`);
|
|
596
|
+
if (this.onStatus) this.onStatus("reconnecting");
|
|
597
|
+
setTimeout(() => {
|
|
598
|
+
this.connect().catch((e) => console.error("Reconnect failed", e));
|
|
599
|
+
}, backoffDelay);
|
|
600
|
+
} else {
|
|
601
|
+
console.log("Disconnected");
|
|
602
|
+
if (this.onStatus) this.onStatus("disconnected");
|
|
603
|
+
}
|
|
604
|
+
};
|
|
605
|
+
} catch (err) {
|
|
606
|
+
if (this.onError) this.onError(err);
|
|
607
|
+
reject(err);
|
|
608
|
+
}
|
|
609
|
+
});
|
|
662
610
|
}
|
|
663
611
|
/**
|
|
664
|
-
*
|
|
665
|
-
* Total stream time (in ms) = (audioContext.currentTime - audioClockOffset) * 1000
|
|
612
|
+
* Send initial configuration to the server
|
|
666
613
|
*/
|
|
667
|
-
|
|
668
|
-
|
|
614
|
+
sendConfig() {
|
|
615
|
+
if (!this.ws || !this.isConnected) return;
|
|
616
|
+
this.ws.send(JSON.stringify({ type: "prompt", data: this.prompt }));
|
|
617
|
+
this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
|
|
618
|
+
this.ws.send(JSON.stringify({ type: "language", data: this.language }));
|
|
619
|
+
this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
|
|
620
|
+
console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}`);
|
|
669
621
|
}
|
|
670
622
|
/**
|
|
671
|
-
*
|
|
623
|
+
* Send raw PCM audio data to the server
|
|
624
|
+
* @param audioData Int16 PCM audio buffer
|
|
672
625
|
*/
|
|
673
|
-
|
|
674
|
-
this.
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
this.activeSources.forEach((source) => {
|
|
678
|
-
try {
|
|
679
|
-
source.stop();
|
|
680
|
-
} catch (e) {
|
|
681
|
-
}
|
|
682
|
-
});
|
|
683
|
-
this.activeSources = [];
|
|
684
|
-
this.nextPlaybackTime = 0;
|
|
685
|
-
this.resetAudioClock();
|
|
626
|
+
sendAudio(audioData) {
|
|
627
|
+
if (this.ws && this.ws.readyState === WebSocket.OPEN && this.isConnected) {
|
|
628
|
+
this.ws.send(audioData);
|
|
629
|
+
}
|
|
686
630
|
}
|
|
687
|
-
|
|
688
|
-
|
|
631
|
+
/**
|
|
632
|
+
* Handle incoming binary data (audio response)
|
|
633
|
+
*/
|
|
634
|
+
handleBinaryMessage(data) {
|
|
635
|
+
if (this.audioManager) {
|
|
636
|
+
this.audioManager.playAudio(data);
|
|
637
|
+
}
|
|
638
|
+
this.emit("audio", data);
|
|
689
639
|
}
|
|
690
|
-
|
|
691
|
-
|
|
640
|
+
/**
|
|
641
|
+
* Handle incoming text messages (metadata/transcriptions)
|
|
642
|
+
*/
|
|
643
|
+
handleTextMessage(text) {
|
|
644
|
+
try {
|
|
645
|
+
const msg = JSON.parse(text);
|
|
646
|
+
switch (msg.type) {
|
|
647
|
+
case "audio":
|
|
648
|
+
if (msg.data) {
|
|
649
|
+
const buffer = base64ToUint8Array(msg.data);
|
|
650
|
+
this.handleBinaryMessage(buffer);
|
|
651
|
+
}
|
|
652
|
+
break;
|
|
653
|
+
case "transcript":
|
|
654
|
+
const role = msg.role === "user" ? "user" : "agent";
|
|
655
|
+
this.messages.push({
|
|
656
|
+
role,
|
|
657
|
+
text: msg.data,
|
|
658
|
+
timestamp: Date.now()
|
|
659
|
+
});
|
|
660
|
+
if (msg.role === "user") {
|
|
661
|
+
if (this.onTranscription) this.onTranscription(msg.data);
|
|
662
|
+
console.log(`\u{1F4AC} You: ${msg.data}`);
|
|
663
|
+
} else {
|
|
664
|
+
if (this.onResponse) this.onResponse(msg.data);
|
|
665
|
+
console.log(`\u{1F916} Agent: ${msg.data}`);
|
|
666
|
+
}
|
|
667
|
+
break;
|
|
668
|
+
case "status":
|
|
669
|
+
if (msg.data === "interrupted" && this.audioManager) {
|
|
670
|
+
this.audioManager.stopPlayback();
|
|
671
|
+
}
|
|
672
|
+
if (this.onStatus) this.onStatus(msg.data);
|
|
673
|
+
const icons = {
|
|
674
|
+
"interrupted": "\u26A1",
|
|
675
|
+
"thinking": "\u{1F9E0}",
|
|
676
|
+
"speaking": "\u{1F50A}",
|
|
677
|
+
"listening": "\u{1F442}"
|
|
678
|
+
};
|
|
679
|
+
console.log(`${icons[msg.data] || ""} Status: ${msg.data}`);
|
|
680
|
+
break;
|
|
681
|
+
case "visemes":
|
|
682
|
+
if (Array.isArray(msg.data) && msg.data.length > 0) {
|
|
683
|
+
this.emit("visemes", msg.data);
|
|
684
|
+
}
|
|
685
|
+
break;
|
|
686
|
+
case "error":
|
|
687
|
+
if (this.onError) this.onError(msg.data);
|
|
688
|
+
console.error(`\u274C Server error: ${msg.data}`);
|
|
689
|
+
break;
|
|
690
|
+
}
|
|
691
|
+
} catch (e) {
|
|
692
|
+
}
|
|
692
693
|
}
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
694
|
+
audioListeners = [];
|
|
695
|
+
emit(event, data) {
|
|
696
|
+
if (event === "audio") {
|
|
697
|
+
if (this.onAudioCallback) this.onAudioCallback(data);
|
|
698
|
+
this.audioListeners.forEach((l) => l(data));
|
|
699
|
+
} else if (event === "visemes") {
|
|
700
|
+
if (this.onVisemesCallback) this.onVisemesCallback(data);
|
|
701
|
+
this.visemeListeners.forEach((l) => l(data));
|
|
702
|
+
}
|
|
699
703
|
}
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
|
|
703
|
-
this.analyserNode.getByteFrequencyData(dataArray);
|
|
704
|
-
return dataArray;
|
|
704
|
+
onAudio(callback) {
|
|
705
|
+
this.audioListeners.push(callback);
|
|
705
706
|
}
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
|
|
709
|
-
this.analyserNode.getByteTimeDomainData(dataArray);
|
|
710
|
-
return dataArray;
|
|
707
|
+
onVisemes(callback) {
|
|
708
|
+
this.visemeListeners.push(callback);
|
|
711
709
|
}
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
710
|
+
/**
|
|
711
|
+
* Disconnect from the server
|
|
712
|
+
*/
|
|
713
|
+
disconnect() {
|
|
714
|
+
this.isUserDisconnect = true;
|
|
715
|
+
if (this.ws) {
|
|
716
|
+
this.ws.close();
|
|
717
|
+
this.ws = null;
|
|
718
718
|
}
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
};
|
|
724
|
-
|
|
725
|
-
// src/voice-agent.ts
|
|
726
|
-
var VoiceAgent = class {
|
|
727
|
-
client;
|
|
728
|
-
audioManager;
|
|
729
|
-
options;
|
|
730
|
-
isConnected = false;
|
|
731
|
-
visemeQueue = [];
|
|
732
|
-
constructor(options) {
|
|
733
|
-
this.options = options;
|
|
734
|
-
this.client = new VoiceAgentClient({
|
|
735
|
-
apiKey: options.apiKey,
|
|
736
|
-
prompt: options.prompt || "You are a helpful and friendly AI assistant.",
|
|
737
|
-
voice: options.voice || "F1" /* F1 */,
|
|
738
|
-
language: options.language || "en" /* ENGLISH */,
|
|
739
|
-
visemes: options.visemes ?? true,
|
|
740
|
-
serverUrl: options.serverUrl,
|
|
741
|
-
onTranscription: (text) => {
|
|
742
|
-
if (options.onTranscription) options.onTranscription(text, true);
|
|
743
|
-
},
|
|
744
|
-
onResponse: (text) => {
|
|
745
|
-
if (options.onTranscription) options.onTranscription(text, false);
|
|
746
|
-
},
|
|
747
|
-
onAudio: (data) => {
|
|
748
|
-
this.audioManager.playAudio(data);
|
|
749
|
-
},
|
|
750
|
-
onVisemes: (visemes) => {
|
|
751
|
-
this.visemeQueue.push(...visemes);
|
|
752
|
-
if (options.onVisemes) options.onVisemes(visemes);
|
|
753
|
-
},
|
|
754
|
-
onStatus: (status) => {
|
|
755
|
-
if (options.onStatusChange) options.onStatusChange(status);
|
|
756
|
-
if (status === "interrupted" || status === "thinking") {
|
|
757
|
-
this.audioManager.stopPlayback();
|
|
758
|
-
this.visemeQueue = [];
|
|
759
|
-
}
|
|
760
|
-
},
|
|
761
|
-
onError: (err) => {
|
|
762
|
-
if (options.onError) options.onError(err);
|
|
763
|
-
}
|
|
764
|
-
});
|
|
765
|
-
this.audioManager = new BrowserAudioManager({
|
|
766
|
-
autoGainControl: true,
|
|
767
|
-
echoCancellation: true,
|
|
768
|
-
noiseSuppression: true
|
|
769
|
-
});
|
|
719
|
+
if (this.audioManager) {
|
|
720
|
+
this.audioManager.cleanup();
|
|
721
|
+
}
|
|
722
|
+
this.isConnected = false;
|
|
770
723
|
}
|
|
771
724
|
/**
|
|
772
|
-
*
|
|
773
|
-
*
|
|
774
|
-
* to satisfy browser AudioContext requirements.
|
|
725
|
+
* Toggles the microphone mute state (if managed by client)
|
|
726
|
+
* returns the new mute state
|
|
775
727
|
*/
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
this.isConnected = true;
|
|
782
|
-
await this.audioManager.startMicrophone((pcm16Data) => {
|
|
783
|
-
if (this.isConnected) {
|
|
784
|
-
this.client.sendAudio(pcm16Data);
|
|
785
|
-
}
|
|
786
|
-
});
|
|
787
|
-
return true;
|
|
788
|
-
} catch (err) {
|
|
789
|
-
if (this.options.onError) this.options.onError(err);
|
|
790
|
-
return false;
|
|
728
|
+
toggleMute() {
|
|
729
|
+
if (this.audioManager) {
|
|
730
|
+
const isMuted = this.audioManager.isMicMuted();
|
|
731
|
+
this.audioManager.setMuted(!isMuted);
|
|
732
|
+
return !isMuted;
|
|
791
733
|
}
|
|
734
|
+
return false;
|
|
792
735
|
}
|
|
793
736
|
/**
|
|
794
|
-
*
|
|
795
|
-
* Useful for voice activity visualization.
|
|
796
|
-
* @returns value between 0 and 1
|
|
737
|
+
* Gets the microphone volume amplitude 0-1 (if managed by client)
|
|
797
738
|
*/
|
|
798
739
|
getAmplitude() {
|
|
799
|
-
|
|
740
|
+
if (this.audioManager) {
|
|
741
|
+
return this.audioManager.getAmplitude();
|
|
742
|
+
}
|
|
743
|
+
return 0;
|
|
800
744
|
}
|
|
801
745
|
/**
|
|
802
|
-
*
|
|
746
|
+
* Update the system prompt mid-conversation
|
|
803
747
|
*/
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
this.
|
|
807
|
-
|
|
748
|
+
updatePrompt(newPrompt) {
|
|
749
|
+
this.prompt = newPrompt;
|
|
750
|
+
if (this.ws && this.isConnected) {
|
|
751
|
+
try {
|
|
752
|
+
this.ws.send(JSON.stringify({ type: "prompt", data: newPrompt }));
|
|
753
|
+
console.log(`\u2699\uFE0F Updated prompt: ${newPrompt.substring(0, 50)}...`);
|
|
754
|
+
} catch (error) {
|
|
755
|
+
console.error("Error updating prompt:", error);
|
|
756
|
+
}
|
|
757
|
+
} else {
|
|
758
|
+
console.warn("Not connected - prompt will be updated on next connection");
|
|
759
|
+
}
|
|
808
760
|
}
|
|
809
761
|
/**
|
|
810
|
-
*
|
|
811
|
-
* at the current playback frame. Use this in a requestAnimationFrame loop.
|
|
762
|
+
* Get full conversation transcript
|
|
812
763
|
*/
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
const audioCtx = this.audioManager.getAudioContext();
|
|
816
|
-
if (offset === null || !audioCtx) return [];
|
|
817
|
-
const streamTime = (audioCtx.currentTime - offset) * 1e3;
|
|
818
|
-
const currentBatch = [];
|
|
819
|
-
while (this.visemeQueue.length > 0 && this.visemeQueue[0].t * 1e3 <= streamTime) {
|
|
820
|
-
currentBatch.push(this.visemeQueue.shift());
|
|
821
|
-
}
|
|
822
|
-
return currentBatch;
|
|
764
|
+
getTranscript() {
|
|
765
|
+
return this.messages.slice();
|
|
823
766
|
}
|
|
824
767
|
/**
|
|
825
|
-
*
|
|
768
|
+
* Get conversation as formatted text
|
|
826
769
|
*/
|
|
827
|
-
|
|
828
|
-
this.
|
|
770
|
+
getTranscriptText() {
|
|
771
|
+
return this.messages.map((msg) => `${msg.role === "user" ? "You" : "Agent"}: ${msg.text}`).join("\n");
|
|
772
|
+
}
|
|
773
|
+
};
|
|
774
|
+
var TTSClient = class {
|
|
775
|
+
apiKey;
|
|
776
|
+
constructor(config) {
|
|
777
|
+
this.apiKey = config.apiKey;
|
|
829
778
|
}
|
|
830
779
|
/**
|
|
831
|
-
*
|
|
780
|
+
* Synthesize text to speech
|
|
781
|
+
*
|
|
782
|
+
* This opens a temporary WebSocket connection, sends the request,
|
|
783
|
+
* and streams back the audio.
|
|
832
784
|
*/
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
785
|
+
synthesize(options) {
|
|
786
|
+
return new Promise((resolve, reject) => {
|
|
787
|
+
try {
|
|
788
|
+
let url = DEFAULT_URLS.TTS;
|
|
789
|
+
if (this.apiKey) {
|
|
790
|
+
const separator = url.includes("?") ? "&" : "?";
|
|
791
|
+
url += `${separator}api_key=${this.apiKey}`;
|
|
792
|
+
}
|
|
793
|
+
const ws = new WebSocket(url);
|
|
794
|
+
ws.binaryType = "arraybuffer";
|
|
795
|
+
ws.onopen = () => {
|
|
796
|
+
const req = {
|
|
797
|
+
text: options.text,
|
|
798
|
+
voice: options.voice || "F1" /* F1 */,
|
|
799
|
+
lang: options.language || "en" /* ENGLISH */,
|
|
800
|
+
speed: options.speed || 1.05,
|
|
801
|
+
steps: options.steps || 24,
|
|
802
|
+
visemes: options.visemes || false
|
|
803
|
+
};
|
|
804
|
+
ws.send(JSON.stringify(req));
|
|
805
|
+
};
|
|
806
|
+
ws.onmessage = async (event) => {
|
|
807
|
+
if (event.data instanceof ArrayBuffer) {
|
|
808
|
+
if (options.onAudio) options.onAudio(new Uint8Array(event.data));
|
|
809
|
+
} else {
|
|
810
|
+
try {
|
|
811
|
+
const msg = JSON.parse(event.data.toString());
|
|
812
|
+
if (Array.isArray(msg) && options.onVisemes) {
|
|
813
|
+
options.onVisemes(msg);
|
|
814
|
+
}
|
|
815
|
+
} catch (e) {
|
|
816
|
+
}
|
|
817
|
+
}
|
|
818
|
+
};
|
|
819
|
+
ws.onerror = (err) => {
|
|
820
|
+
if (options.onError) options.onError(err);
|
|
821
|
+
reject(err);
|
|
822
|
+
};
|
|
823
|
+
ws.onclose = () => {
|
|
824
|
+
resolve();
|
|
825
|
+
};
|
|
826
|
+
} catch (err) {
|
|
827
|
+
if (options.onError) options.onError(err);
|
|
828
|
+
reject(err);
|
|
829
|
+
}
|
|
830
|
+
});
|
|
838
831
|
}
|
|
839
832
|
};
|
|
833
|
+
async function simpleConversation(config) {
|
|
834
|
+
const client = new VoiceAgentClient(config);
|
|
835
|
+
await client.connect();
|
|
836
|
+
return client;
|
|
837
|
+
}
|
|
838
|
+
async function simpleTTS(options) {
|
|
839
|
+
const client = new TTSClient({ apiKey: options.apiKey });
|
|
840
|
+
return client.synthesize(options);
|
|
841
|
+
}
|
|
840
842
|
// Annotate the CommonJS export names for ESM import in node:
|
|
841
843
|
0 && (module.exports = {
|
|
842
844
|
AUDIO_CONFIG,
|
|
@@ -845,7 +847,6 @@ var VoiceAgent = class {
|
|
|
845
847
|
Language,
|
|
846
848
|
StreamResampler,
|
|
847
849
|
TTSClient,
|
|
848
|
-
VoiceAgent,
|
|
849
850
|
VoiceAgentClient,
|
|
850
851
|
VoiceStyle,
|
|
851
852
|
applyLowPassFilter,
|