@lokutor/sdk 1.1.9 → 1.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +48 -51
- package/dist/index.d.ts +48 -51
- package/dist/index.js +446 -445
- package/dist/index.mjs +446 -444
- package/package.json +1 -1
package/dist/index.mjs
CHANGED
|
@@ -34,289 +34,6 @@ var DEFAULT_URLS = {
|
|
|
34
34
|
TTS: "wss://api.lokutor.com/ws/tts"
|
|
35
35
|
};
|
|
36
36
|
|
|
37
|
-
// src/client.ts
|
|
38
|
-
function base64ToUint8Array(base64) {
|
|
39
|
-
const binaryString = atob(base64);
|
|
40
|
-
const bytes = new Uint8Array(binaryString.length);
|
|
41
|
-
for (let i = 0; i < binaryString.length; i++) {
|
|
42
|
-
bytes[i] = binaryString.charCodeAt(i);
|
|
43
|
-
}
|
|
44
|
-
return bytes;
|
|
45
|
-
}
|
|
46
|
-
var VoiceAgentClient = class {
|
|
47
|
-
ws = null;
|
|
48
|
-
apiKey;
|
|
49
|
-
prompt;
|
|
50
|
-
voice;
|
|
51
|
-
language;
|
|
52
|
-
// Callbacks
|
|
53
|
-
onTranscription;
|
|
54
|
-
onResponse;
|
|
55
|
-
onAudioCallback;
|
|
56
|
-
onVisemesCallback;
|
|
57
|
-
onStatus;
|
|
58
|
-
onError;
|
|
59
|
-
isConnected = false;
|
|
60
|
-
messages = [];
|
|
61
|
-
visemeListeners = [];
|
|
62
|
-
wantVisemes = false;
|
|
63
|
-
serverUrl = null;
|
|
64
|
-
constructor(config) {
|
|
65
|
-
this.apiKey = config.apiKey;
|
|
66
|
-
this.prompt = config.prompt;
|
|
67
|
-
this.voice = config.voice || "F1" /* F1 */;
|
|
68
|
-
this.language = config.language || "en" /* ENGLISH */;
|
|
69
|
-
this.serverUrl = config.serverUrl || null;
|
|
70
|
-
this.onTranscription = config.onTranscription;
|
|
71
|
-
this.onResponse = config.onResponse;
|
|
72
|
-
this.onAudioCallback = config.onAudio;
|
|
73
|
-
this.onVisemesCallback = config.onVisemes;
|
|
74
|
-
this.onStatus = config.onStatus;
|
|
75
|
-
this.onError = config.onError;
|
|
76
|
-
this.wantVisemes = config.visemes || false;
|
|
77
|
-
}
|
|
78
|
-
/**
|
|
79
|
-
* Connect to the Lokutor Voice Agent server
|
|
80
|
-
*/
|
|
81
|
-
async connect() {
|
|
82
|
-
return new Promise((resolve, reject) => {
|
|
83
|
-
try {
|
|
84
|
-
let url = this.serverUrl || DEFAULT_URLS.VOICE_AGENT;
|
|
85
|
-
if (this.apiKey) {
|
|
86
|
-
const separator = url.includes("?") ? "&" : "?";
|
|
87
|
-
url += `${separator}api_key=${this.apiKey}`;
|
|
88
|
-
}
|
|
89
|
-
console.log(`\u{1F517} Connecting to ${url}...`);
|
|
90
|
-
this.ws = new WebSocket(url);
|
|
91
|
-
this.ws.binaryType = "arraybuffer";
|
|
92
|
-
this.ws.onopen = () => {
|
|
93
|
-
this.isConnected = true;
|
|
94
|
-
console.log("\u2705 Connected to voice agent!");
|
|
95
|
-
this.sendConfig();
|
|
96
|
-
resolve(true);
|
|
97
|
-
};
|
|
98
|
-
this.ws.onmessage = async (event) => {
|
|
99
|
-
if (event.data instanceof ArrayBuffer) {
|
|
100
|
-
this.handleBinaryMessage(new Uint8Array(event.data));
|
|
101
|
-
} else {
|
|
102
|
-
this.handleTextMessage(event.data.toString());
|
|
103
|
-
}
|
|
104
|
-
};
|
|
105
|
-
this.ws.onerror = (err) => {
|
|
106
|
-
console.error("\u274C WebSocket error:", err);
|
|
107
|
-
if (this.onError) this.onError(err);
|
|
108
|
-
if (!this.isConnected) reject(err);
|
|
109
|
-
};
|
|
110
|
-
this.ws.onclose = () => {
|
|
111
|
-
this.isConnected = false;
|
|
112
|
-
console.log("Disconnected");
|
|
113
|
-
};
|
|
114
|
-
} catch (err) {
|
|
115
|
-
if (this.onError) this.onError(err);
|
|
116
|
-
reject(err);
|
|
117
|
-
}
|
|
118
|
-
});
|
|
119
|
-
}
|
|
120
|
-
/**
|
|
121
|
-
* Send initial configuration to the server
|
|
122
|
-
*/
|
|
123
|
-
sendConfig() {
|
|
124
|
-
if (!this.ws || !this.isConnected) return;
|
|
125
|
-
this.ws.send(JSON.stringify({ type: "prompt", data: this.prompt }));
|
|
126
|
-
this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
|
|
127
|
-
this.ws.send(JSON.stringify({ type: "language", data: this.language }));
|
|
128
|
-
this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
|
|
129
|
-
console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}`);
|
|
130
|
-
}
|
|
131
|
-
/**
|
|
132
|
-
* Send raw PCM audio data to the server
|
|
133
|
-
* @param audioData Int16 PCM audio buffer
|
|
134
|
-
*/
|
|
135
|
-
sendAudio(audioData) {
|
|
136
|
-
if (this.ws && this.isConnected) {
|
|
137
|
-
this.ws.send(audioData);
|
|
138
|
-
}
|
|
139
|
-
}
|
|
140
|
-
/**
|
|
141
|
-
* Handle incoming binary data (audio response)
|
|
142
|
-
*/
|
|
143
|
-
handleBinaryMessage(data) {
|
|
144
|
-
this.emit("audio", data);
|
|
145
|
-
}
|
|
146
|
-
/**
|
|
147
|
-
* Handle incoming text messages (metadata/transcriptions)
|
|
148
|
-
*/
|
|
149
|
-
handleTextMessage(text) {
|
|
150
|
-
try {
|
|
151
|
-
const msg = JSON.parse(text);
|
|
152
|
-
switch (msg.type) {
|
|
153
|
-
case "audio":
|
|
154
|
-
if (msg.data) {
|
|
155
|
-
const buffer = base64ToUint8Array(msg.data);
|
|
156
|
-
this.handleBinaryMessage(buffer);
|
|
157
|
-
}
|
|
158
|
-
break;
|
|
159
|
-
case "transcript":
|
|
160
|
-
const role = msg.role === "user" ? "user" : "agent";
|
|
161
|
-
this.messages.push({
|
|
162
|
-
role,
|
|
163
|
-
text: msg.data,
|
|
164
|
-
timestamp: Date.now()
|
|
165
|
-
});
|
|
166
|
-
if (msg.role === "user") {
|
|
167
|
-
if (this.onTranscription) this.onTranscription(msg.data);
|
|
168
|
-
console.log(`\u{1F4AC} You: ${msg.data}`);
|
|
169
|
-
} else {
|
|
170
|
-
if (this.onResponse) this.onResponse(msg.data);
|
|
171
|
-
console.log(`\u{1F916} Agent: ${msg.data}`);
|
|
172
|
-
}
|
|
173
|
-
break;
|
|
174
|
-
case "status":
|
|
175
|
-
if (this.onStatus) this.onStatus(msg.data);
|
|
176
|
-
const icons = {
|
|
177
|
-
"interrupted": "\u26A1",
|
|
178
|
-
"thinking": "\u{1F9E0}",
|
|
179
|
-
"speaking": "\u{1F50A}",
|
|
180
|
-
"listening": "\u{1F442}"
|
|
181
|
-
};
|
|
182
|
-
console.log(`${icons[msg.data] || ""} Status: ${msg.data}`);
|
|
183
|
-
break;
|
|
184
|
-
case "visemes":
|
|
185
|
-
if (Array.isArray(msg.data) && msg.data.length > 0) {
|
|
186
|
-
this.emit("visemes", msg.data);
|
|
187
|
-
}
|
|
188
|
-
break;
|
|
189
|
-
case "error":
|
|
190
|
-
if (this.onError) this.onError(msg.data);
|
|
191
|
-
console.error(`\u274C Server error: ${msg.data}`);
|
|
192
|
-
break;
|
|
193
|
-
}
|
|
194
|
-
} catch (e) {
|
|
195
|
-
}
|
|
196
|
-
}
|
|
197
|
-
audioListeners = [];
|
|
198
|
-
emit(event, data) {
|
|
199
|
-
if (event === "audio") {
|
|
200
|
-
if (this.onAudioCallback) this.onAudioCallback(data);
|
|
201
|
-
this.audioListeners.forEach((l) => l(data));
|
|
202
|
-
} else if (event === "visemes") {
|
|
203
|
-
if (this.onVisemesCallback) this.onVisemesCallback(data);
|
|
204
|
-
this.visemeListeners.forEach((l) => l(data));
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
onAudio(callback) {
|
|
208
|
-
this.audioListeners.push(callback);
|
|
209
|
-
}
|
|
210
|
-
onVisemes(callback) {
|
|
211
|
-
this.visemeListeners.push(callback);
|
|
212
|
-
}
|
|
213
|
-
/**
|
|
214
|
-
* Disconnect from the server
|
|
215
|
-
*/
|
|
216
|
-
disconnect() {
|
|
217
|
-
if (this.ws) {
|
|
218
|
-
this.ws.close();
|
|
219
|
-
this.ws = null;
|
|
220
|
-
}
|
|
221
|
-
}
|
|
222
|
-
/**
|
|
223
|
-
* Update the system prompt mid-conversation
|
|
224
|
-
*/
|
|
225
|
-
updatePrompt(newPrompt) {
|
|
226
|
-
this.prompt = newPrompt;
|
|
227
|
-
if (this.ws && this.isConnected) {
|
|
228
|
-
try {
|
|
229
|
-
this.ws.send(JSON.stringify({ type: "prompt", data: newPrompt }));
|
|
230
|
-
console.log(`\u2699\uFE0F Updated prompt: ${newPrompt.substring(0, 50)}...`);
|
|
231
|
-
} catch (error) {
|
|
232
|
-
console.error("Error updating prompt:", error);
|
|
233
|
-
}
|
|
234
|
-
} else {
|
|
235
|
-
console.warn("Not connected - prompt will be updated on next connection");
|
|
236
|
-
}
|
|
237
|
-
}
|
|
238
|
-
/**
|
|
239
|
-
* Get full conversation transcript
|
|
240
|
-
*/
|
|
241
|
-
getTranscript() {
|
|
242
|
-
return this.messages.slice();
|
|
243
|
-
}
|
|
244
|
-
/**
|
|
245
|
-
* Get conversation as formatted text
|
|
246
|
-
*/
|
|
247
|
-
getTranscriptText() {
|
|
248
|
-
return this.messages.map((msg) => `${msg.role === "user" ? "You" : "Agent"}: ${msg.text}`).join("\n");
|
|
249
|
-
}
|
|
250
|
-
};
|
|
251
|
-
var TTSClient = class {
|
|
252
|
-
apiKey;
|
|
253
|
-
constructor(config) {
|
|
254
|
-
this.apiKey = config.apiKey;
|
|
255
|
-
}
|
|
256
|
-
/**
|
|
257
|
-
* Synthesize text to speech
|
|
258
|
-
*
|
|
259
|
-
* This opens a temporary WebSocket connection, sends the request,
|
|
260
|
-
* and streams back the audio.
|
|
261
|
-
*/
|
|
262
|
-
synthesize(options) {
|
|
263
|
-
return new Promise((resolve, reject) => {
|
|
264
|
-
try {
|
|
265
|
-
let url = DEFAULT_URLS.TTS;
|
|
266
|
-
if (this.apiKey) {
|
|
267
|
-
const separator = url.includes("?") ? "&" : "?";
|
|
268
|
-
url += `${separator}api_key=${this.apiKey}`;
|
|
269
|
-
}
|
|
270
|
-
const ws = new WebSocket(url);
|
|
271
|
-
ws.binaryType = "arraybuffer";
|
|
272
|
-
ws.onopen = () => {
|
|
273
|
-
const req = {
|
|
274
|
-
text: options.text,
|
|
275
|
-
voice: options.voice || "F1" /* F1 */,
|
|
276
|
-
lang: options.language || "en" /* ENGLISH */,
|
|
277
|
-
speed: options.speed || 1.05,
|
|
278
|
-
steps: options.steps || 24,
|
|
279
|
-
visemes: options.visemes || false
|
|
280
|
-
};
|
|
281
|
-
ws.send(JSON.stringify(req));
|
|
282
|
-
};
|
|
283
|
-
ws.onmessage = async (event) => {
|
|
284
|
-
if (event.data instanceof ArrayBuffer) {
|
|
285
|
-
if (options.onAudio) options.onAudio(new Uint8Array(event.data));
|
|
286
|
-
} else {
|
|
287
|
-
try {
|
|
288
|
-
const msg = JSON.parse(event.data.toString());
|
|
289
|
-
if (Array.isArray(msg) && options.onVisemes) {
|
|
290
|
-
options.onVisemes(msg);
|
|
291
|
-
}
|
|
292
|
-
} catch (e) {
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
};
|
|
296
|
-
ws.onerror = (err) => {
|
|
297
|
-
if (options.onError) options.onError(err);
|
|
298
|
-
reject(err);
|
|
299
|
-
};
|
|
300
|
-
ws.onclose = () => {
|
|
301
|
-
resolve();
|
|
302
|
-
};
|
|
303
|
-
} catch (err) {
|
|
304
|
-
if (options.onError) options.onError(err);
|
|
305
|
-
reject(err);
|
|
306
|
-
}
|
|
307
|
-
});
|
|
308
|
-
}
|
|
309
|
-
};
|
|
310
|
-
async function simpleConversation(config) {
|
|
311
|
-
const client = new VoiceAgentClient(config);
|
|
312
|
-
await client.connect();
|
|
313
|
-
return client;
|
|
314
|
-
}
|
|
315
|
-
async function simpleTTS(options) {
|
|
316
|
-
const client = new TTSClient({ apiKey: options.apiKey });
|
|
317
|
-
return client.synthesize(options);
|
|
318
|
-
}
|
|
319
|
-
|
|
320
37
|
// src/audio-utils.ts
|
|
321
38
|
function pcm16ToFloat32(int16Data) {
|
|
322
39
|
const float32 = new Float32Array(int16Data.length);
|
|
@@ -456,11 +173,11 @@ var BrowserAudioManager = class {
|
|
|
456
173
|
scriptProcessor = null;
|
|
457
174
|
analyserNode = null;
|
|
458
175
|
mediaStream = null;
|
|
176
|
+
resampler = null;
|
|
459
177
|
// Playback scheduling
|
|
460
178
|
nextPlaybackTime = 0;
|
|
461
179
|
activeSources = [];
|
|
462
|
-
|
|
463
|
-
audioClockOffset = null;
|
|
180
|
+
playbackQueue = [];
|
|
464
181
|
// Configuration
|
|
465
182
|
inputSampleRate;
|
|
466
183
|
outputSampleRate;
|
|
@@ -473,7 +190,6 @@ var BrowserAudioManager = class {
|
|
|
473
190
|
// Audio processing state
|
|
474
191
|
isMuted = false;
|
|
475
192
|
isListening = false;
|
|
476
|
-
resampler = null;
|
|
477
193
|
constructor(config = {}) {
|
|
478
194
|
this.inputSampleRate = config.inputSampleRate ?? AUDIO_CONFIG.SAMPLE_RATE;
|
|
479
195
|
this.outputSampleRate = config.outputSampleRate ?? AUDIO_CONFIG.SPEAKER_SAMPLE_RATE;
|
|
@@ -497,6 +213,7 @@ var BrowserAudioManager = class {
|
|
|
497
213
|
}
|
|
498
214
|
if (this.audioContext.state === "suspended") {
|
|
499
215
|
await this.audioContext.resume();
|
|
216
|
+
console.log("\u{1F442} AudioContext resumed");
|
|
500
217
|
}
|
|
501
218
|
if (analyserConfig?.enabled !== false) {
|
|
502
219
|
this.analyserNode = this.audioContext.createAnalyser();
|
|
@@ -510,7 +227,6 @@ var BrowserAudioManager = class {
|
|
|
510
227
|
if (!this.audioContext) {
|
|
511
228
|
await this.init();
|
|
512
229
|
}
|
|
513
|
-
this.resampler = new StreamResampler(this.audioContext.sampleRate, this.inputSampleRate);
|
|
514
230
|
try {
|
|
515
231
|
this.onAudioInput = onAudioInput;
|
|
516
232
|
this.isListening = true;
|
|
@@ -526,13 +242,21 @@ var BrowserAudioManager = class {
|
|
|
526
242
|
this.scriptProcessor = this.audioContext.createScriptProcessor(
|
|
527
243
|
bufferSize,
|
|
528
244
|
1,
|
|
245
|
+
// input channels
|
|
529
246
|
1
|
|
247
|
+
// output channels
|
|
530
248
|
);
|
|
531
249
|
this.mediaStreamAudioSourceNode.connect(this.scriptProcessor);
|
|
532
250
|
this.scriptProcessor.connect(this.audioContext.destination);
|
|
533
251
|
if (this.analyserNode) {
|
|
534
252
|
this.mediaStreamAudioSourceNode.connect(this.analyserNode);
|
|
535
253
|
}
|
|
254
|
+
const hardwareRate = this.audioContext.sampleRate;
|
|
255
|
+
if (hardwareRate !== this.inputSampleRate) {
|
|
256
|
+
this.resampler = new StreamResampler(hardwareRate, this.inputSampleRate);
|
|
257
|
+
} else {
|
|
258
|
+
this.resampler = null;
|
|
259
|
+
}
|
|
536
260
|
this.scriptProcessor.onaudioprocess = (event) => {
|
|
537
261
|
this._processAudioInput(event);
|
|
538
262
|
};
|
|
@@ -543,19 +267,36 @@ var BrowserAudioManager = class {
|
|
|
543
267
|
throw err;
|
|
544
268
|
}
|
|
545
269
|
}
|
|
270
|
+
/**
|
|
271
|
+
* Internal method to process microphone audio data
|
|
272
|
+
*/
|
|
546
273
|
_processAudioInput(event) {
|
|
547
|
-
if (!this.onAudioInput || !this.audioContext || !this.isListening
|
|
548
|
-
|
|
549
|
-
event.
|
|
550
|
-
const
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
274
|
+
if (!this.onAudioInput || !this.audioContext || !this.isListening) return;
|
|
275
|
+
if (this.isMuted) return;
|
|
276
|
+
const inputBuffer = event.inputBuffer;
|
|
277
|
+
const inputData = inputBuffer.getChannelData(0);
|
|
278
|
+
const outputBuffer = event.outputBuffer;
|
|
279
|
+
for (let i = 0; i < outputBuffer.getChannelData(0).length; i++) {
|
|
280
|
+
outputBuffer.getChannelData(0)[i] = 0;
|
|
281
|
+
}
|
|
282
|
+
let processedData = new Float32Array(inputData);
|
|
283
|
+
if (this.resampler) {
|
|
284
|
+
processedData = this.resampler.process(processedData);
|
|
554
285
|
}
|
|
286
|
+
if (processedData.length === 0) return;
|
|
287
|
+
const int16Data = float32ToPcm16(processedData);
|
|
288
|
+
const uint8Data = new Uint8Array(
|
|
289
|
+
int16Data.buffer,
|
|
290
|
+
int16Data.byteOffset,
|
|
291
|
+
int16Data.byteLength
|
|
292
|
+
);
|
|
293
|
+
this.onAudioInput(uint8Data);
|
|
555
294
|
}
|
|
295
|
+
/**
|
|
296
|
+
* Stop capturing microphone input
|
|
297
|
+
*/
|
|
556
298
|
stopMicrophone() {
|
|
557
299
|
this.isListening = false;
|
|
558
|
-
this.resampler = null;
|
|
559
300
|
if (this.mediaStream) {
|
|
560
301
|
this.mediaStream.getTracks().forEach((track) => track.stop());
|
|
561
302
|
this.mediaStream = null;
|
|
@@ -568,12 +309,17 @@ var BrowserAudioManager = class {
|
|
|
568
309
|
this.mediaStreamAudioSourceNode.disconnect();
|
|
569
310
|
this.mediaStreamAudioSourceNode = null;
|
|
570
311
|
}
|
|
312
|
+
console.log("\u{1F3A4} Microphone stopped");
|
|
571
313
|
}
|
|
572
314
|
/**
|
|
573
315
|
* Play back audio received from the server
|
|
316
|
+
* @param pcm16Data Int16 PCM audio data at SPEAKER_SAMPLE_RATE
|
|
574
317
|
*/
|
|
575
318
|
playAudio(pcm16Data) {
|
|
576
|
-
if (!this.audioContext)
|
|
319
|
+
if (!this.audioContext) {
|
|
320
|
+
console.warn("AudioContext not initialized");
|
|
321
|
+
return;
|
|
322
|
+
}
|
|
577
323
|
const int16Array = new Int16Array(
|
|
578
324
|
pcm16Data.buffer,
|
|
579
325
|
pcm16Data.byteOffset,
|
|
@@ -588,17 +334,18 @@ var BrowserAudioManager = class {
|
|
|
588
334
|
audioBuffer.getChannelData(0).set(float32Data);
|
|
589
335
|
this._schedulePlayback(audioBuffer);
|
|
590
336
|
}
|
|
337
|
+
/**
|
|
338
|
+
* Internal method to schedule and play audio with sample-accurate timing
|
|
339
|
+
*/
|
|
591
340
|
_schedulePlayback(audioBuffer) {
|
|
592
341
|
if (!this.audioContext) return;
|
|
593
342
|
const currentTime = this.audioContext.currentTime;
|
|
594
343
|
const duration = audioBuffer.length / this.outputSampleRate;
|
|
595
344
|
const startTime = Math.max(
|
|
596
345
|
currentTime + 0.01,
|
|
346
|
+
// Minimum 10ms delay
|
|
597
347
|
this.nextPlaybackTime
|
|
598
348
|
);
|
|
599
|
-
if (this.audioClockOffset === null) {
|
|
600
|
-
this.audioClockOffset = startTime;
|
|
601
|
-
}
|
|
602
349
|
this.nextPlaybackTime = startTime + duration;
|
|
603
350
|
const source = this.audioContext.createBufferSource();
|
|
604
351
|
source.buffer = audioBuffer;
|
|
@@ -613,185 +360,441 @@ var BrowserAudioManager = class {
|
|
|
613
360
|
if (index > -1) {
|
|
614
361
|
this.activeSources.splice(index, 1);
|
|
615
362
|
}
|
|
616
|
-
};
|
|
363
|
+
};
|
|
364
|
+
}
|
|
365
|
+
/**
|
|
366
|
+
* Stop all currently playing audio and clear the queue
|
|
367
|
+
*/
|
|
368
|
+
stopPlayback() {
|
|
369
|
+
this.activeSources.forEach((source) => {
|
|
370
|
+
try {
|
|
371
|
+
source.stop();
|
|
372
|
+
} catch (e) {
|
|
373
|
+
}
|
|
374
|
+
});
|
|
375
|
+
this.activeSources = [];
|
|
376
|
+
this.playbackQueue = [];
|
|
377
|
+
this.nextPlaybackTime = this.audioContext?.currentTime ?? 0;
|
|
378
|
+
console.log("\u{1F507} Playback stopped");
|
|
379
|
+
}
|
|
380
|
+
/**
|
|
381
|
+
* Toggle mute state
|
|
382
|
+
*/
|
|
383
|
+
setMuted(muted) {
|
|
384
|
+
this.isMuted = muted;
|
|
385
|
+
}
|
|
386
|
+
/**
|
|
387
|
+
* Get current mute state
|
|
388
|
+
*/
|
|
389
|
+
isMicMuted() {
|
|
390
|
+
return this.isMuted;
|
|
391
|
+
}
|
|
392
|
+
/**
|
|
393
|
+
* Get current amplitude from analyser (for visualization)
|
|
394
|
+
* Returns value between 0 and 1
|
|
395
|
+
*/
|
|
396
|
+
getAmplitude() {
|
|
397
|
+
if (!this.analyserNode) return 0;
|
|
398
|
+
const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
|
|
399
|
+
this.analyserNode.getByteTimeDomainData(dataArray);
|
|
400
|
+
const rms = calculateRMS(dataArray);
|
|
401
|
+
return Math.min(rms * 10, 1);
|
|
402
|
+
}
|
|
403
|
+
/**
|
|
404
|
+
* Get frequency data from analyser for visualization
|
|
405
|
+
*/
|
|
406
|
+
getFrequencyData() {
|
|
407
|
+
if (!this.analyserNode) {
|
|
408
|
+
return new Uint8Array(0);
|
|
409
|
+
}
|
|
410
|
+
const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
|
|
411
|
+
this.analyserNode.getByteFrequencyData(dataArray);
|
|
412
|
+
return dataArray;
|
|
413
|
+
}
|
|
414
|
+
/**
|
|
415
|
+
* Get time-domain data from analyser for waveform visualization
|
|
416
|
+
*/
|
|
417
|
+
getWaveformData() {
|
|
418
|
+
if (!this.analyserNode) {
|
|
419
|
+
return new Uint8Array(0);
|
|
420
|
+
}
|
|
421
|
+
const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
|
|
422
|
+
this.analyserNode.getByteTimeDomainData(dataArray);
|
|
423
|
+
return dataArray;
|
|
424
|
+
}
|
|
425
|
+
/**
|
|
426
|
+
* Cleanup and close AudioContext
|
|
427
|
+
*/
|
|
428
|
+
cleanup() {
|
|
429
|
+
this.stopMicrophone();
|
|
430
|
+
this.stopPlayback();
|
|
431
|
+
if (this.analyserNode) {
|
|
432
|
+
this.analyserNode.disconnect();
|
|
433
|
+
this.analyserNode = null;
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
/**
|
|
437
|
+
* Get current audio context state
|
|
438
|
+
*/
|
|
439
|
+
getState() {
|
|
440
|
+
return this.audioContext?.state ?? null;
|
|
441
|
+
}
|
|
442
|
+
/**
|
|
443
|
+
* Check if microphone is currently listening
|
|
444
|
+
*/
|
|
445
|
+
isRecording() {
|
|
446
|
+
return this.isListening;
|
|
447
|
+
}
|
|
448
|
+
};
|
|
449
|
+
|
|
450
|
+
// src/client.ts
|
|
451
|
+
function base64ToUint8Array(base64) {
|
|
452
|
+
const binaryString = atob(base64);
|
|
453
|
+
const bytes = new Uint8Array(binaryString.length);
|
|
454
|
+
for (let i = 0; i < binaryString.length; i++) {
|
|
455
|
+
bytes[i] = binaryString.charCodeAt(i);
|
|
456
|
+
}
|
|
457
|
+
return bytes;
|
|
458
|
+
}
|
|
459
|
+
var VoiceAgentClient = class {
|
|
460
|
+
ws = null;
|
|
461
|
+
apiKey;
|
|
462
|
+
prompt;
|
|
463
|
+
voice;
|
|
464
|
+
language;
|
|
465
|
+
// Callbacks
|
|
466
|
+
onTranscription;
|
|
467
|
+
onResponse;
|
|
468
|
+
onAudioCallback;
|
|
469
|
+
onVisemesCallback;
|
|
470
|
+
onStatus;
|
|
471
|
+
onError;
|
|
472
|
+
isConnected = false;
|
|
473
|
+
messages = [];
|
|
474
|
+
visemeListeners = [];
|
|
475
|
+
wantVisemes = false;
|
|
476
|
+
audioManager = null;
|
|
477
|
+
enableAudio = false;
|
|
478
|
+
// Connection resilience
|
|
479
|
+
isUserDisconnect = false;
|
|
480
|
+
reconnecting = false;
|
|
481
|
+
reconnectAttempts = 0;
|
|
482
|
+
maxReconnectAttempts = 5;
|
|
483
|
+
constructor(config) {
|
|
484
|
+
this.apiKey = config.apiKey;
|
|
485
|
+
this.prompt = config.prompt;
|
|
486
|
+
this.voice = config.voice || "F1" /* F1 */;
|
|
487
|
+
this.language = config.language || "en" /* ENGLISH */;
|
|
488
|
+
this.onTranscription = config.onTranscription;
|
|
489
|
+
this.onResponse = config.onResponse;
|
|
490
|
+
this.onAudioCallback = config.onAudio;
|
|
491
|
+
this.onVisemesCallback = config.onVisemes;
|
|
492
|
+
this.onStatus = config.onStatus;
|
|
493
|
+
this.onError = config.onError;
|
|
494
|
+
this.wantVisemes = config.visemes || false;
|
|
495
|
+
this.enableAudio = config.enableAudio ?? false;
|
|
496
|
+
}
|
|
497
|
+
/**
|
|
498
|
+
* Connect to the Lokutor Voice Agent server
|
|
499
|
+
*/
|
|
500
|
+
async connect() {
|
|
501
|
+
this.isUserDisconnect = false;
|
|
502
|
+
if (this.enableAudio) {
|
|
503
|
+
if (!this.audioManager) {
|
|
504
|
+
this.audioManager = new BrowserAudioManager();
|
|
505
|
+
}
|
|
506
|
+
await this.audioManager.init();
|
|
507
|
+
}
|
|
508
|
+
return new Promise((resolve, reject) => {
|
|
509
|
+
try {
|
|
510
|
+
let url = DEFAULT_URLS.VOICE_AGENT;
|
|
511
|
+
if (this.apiKey) {
|
|
512
|
+
const separator = url.includes("?") ? "&" : "?";
|
|
513
|
+
url += `${separator}api_key=${this.apiKey}`;
|
|
514
|
+
}
|
|
515
|
+
console.log(`\u{1F517} Connecting to ${DEFAULT_URLS.VOICE_AGENT}...`);
|
|
516
|
+
this.ws = new WebSocket(url);
|
|
517
|
+
this.ws.binaryType = "arraybuffer";
|
|
518
|
+
this.ws.onopen = async () => {
|
|
519
|
+
this.isConnected = true;
|
|
520
|
+
this.reconnectAttempts = 0;
|
|
521
|
+
this.reconnecting = false;
|
|
522
|
+
console.log("\u2705 Connected to voice agent!");
|
|
523
|
+
this.sendConfig();
|
|
524
|
+
if (this.audioManager) {
|
|
525
|
+
await this.audioManager.startMicrophone((data) => {
|
|
526
|
+
if (this.isConnected) {
|
|
527
|
+
this.sendAudio(data);
|
|
528
|
+
}
|
|
529
|
+
});
|
|
530
|
+
}
|
|
531
|
+
resolve(true);
|
|
532
|
+
};
|
|
533
|
+
this.ws.onmessage = async (event) => {
|
|
534
|
+
if (event.data instanceof ArrayBuffer) {
|
|
535
|
+
this.handleBinaryMessage(new Uint8Array(event.data));
|
|
536
|
+
} else {
|
|
537
|
+
this.handleTextMessage(event.data.toString());
|
|
538
|
+
}
|
|
539
|
+
};
|
|
540
|
+
this.ws.onerror = (err) => {
|
|
541
|
+
console.error("\u274C WebSocket error:", err);
|
|
542
|
+
if (this.onError) this.onError(err);
|
|
543
|
+
if (!this.isConnected) reject(err);
|
|
544
|
+
};
|
|
545
|
+
this.ws.onclose = () => {
|
|
546
|
+
this.isConnected = false;
|
|
547
|
+
if (!this.isUserDisconnect && this.reconnectAttempts < this.maxReconnectAttempts) {
|
|
548
|
+
this.reconnecting = true;
|
|
549
|
+
this.reconnectAttempts++;
|
|
550
|
+
const backoffDelay = Math.min(1e3 * Math.pow(2, this.reconnectAttempts), 1e4);
|
|
551
|
+
console.warn(`Connection lost. Reconnecting in ${backoffDelay}ms (attempt ${this.reconnectAttempts}/${this.maxReconnectAttempts})`);
|
|
552
|
+
if (this.onStatus) this.onStatus("reconnecting");
|
|
553
|
+
setTimeout(() => {
|
|
554
|
+
this.connect().catch((e) => console.error("Reconnect failed", e));
|
|
555
|
+
}, backoffDelay);
|
|
556
|
+
} else {
|
|
557
|
+
console.log("Disconnected");
|
|
558
|
+
if (this.onStatus) this.onStatus("disconnected");
|
|
559
|
+
}
|
|
560
|
+
};
|
|
561
|
+
} catch (err) {
|
|
562
|
+
if (this.onError) this.onError(err);
|
|
563
|
+
reject(err);
|
|
564
|
+
}
|
|
565
|
+
});
|
|
617
566
|
}
|
|
618
567
|
/**
|
|
619
|
-
*
|
|
620
|
-
* Total stream time (in ms) = (audioContext.currentTime - audioClockOffset) * 1000
|
|
568
|
+
* Send initial configuration to the server
|
|
621
569
|
*/
|
|
622
|
-
|
|
623
|
-
|
|
570
|
+
sendConfig() {
|
|
571
|
+
if (!this.ws || !this.isConnected) return;
|
|
572
|
+
this.ws.send(JSON.stringify({ type: "prompt", data: this.prompt }));
|
|
573
|
+
this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
|
|
574
|
+
this.ws.send(JSON.stringify({ type: "language", data: this.language }));
|
|
575
|
+
this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
|
|
576
|
+
console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}`);
|
|
624
577
|
}
|
|
625
578
|
/**
|
|
626
|
-
*
|
|
579
|
+
* Send raw PCM audio data to the server
|
|
580
|
+
* @param audioData Int16 PCM audio buffer
|
|
627
581
|
*/
|
|
628
|
-
|
|
629
|
-
this.
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
this.activeSources.forEach((source) => {
|
|
633
|
-
try {
|
|
634
|
-
source.stop();
|
|
635
|
-
} catch (e) {
|
|
636
|
-
}
|
|
637
|
-
});
|
|
638
|
-
this.activeSources = [];
|
|
639
|
-
this.nextPlaybackTime = 0;
|
|
640
|
-
this.resetAudioClock();
|
|
582
|
+
sendAudio(audioData) {
|
|
583
|
+
if (this.ws && this.ws.readyState === WebSocket.OPEN && this.isConnected) {
|
|
584
|
+
this.ws.send(audioData);
|
|
585
|
+
}
|
|
641
586
|
}
|
|
642
|
-
|
|
643
|
-
|
|
587
|
+
/**
|
|
588
|
+
* Handle incoming binary data (audio response)
|
|
589
|
+
*/
|
|
590
|
+
handleBinaryMessage(data) {
|
|
591
|
+
if (this.audioManager) {
|
|
592
|
+
this.audioManager.playAudio(data);
|
|
593
|
+
}
|
|
594
|
+
this.emit("audio", data);
|
|
644
595
|
}
|
|
645
|
-
|
|
646
|
-
|
|
596
|
+
/**
|
|
597
|
+
* Handle incoming text messages (metadata/transcriptions)
|
|
598
|
+
*/
|
|
599
|
+
handleTextMessage(text) {
|
|
600
|
+
try {
|
|
601
|
+
const msg = JSON.parse(text);
|
|
602
|
+
switch (msg.type) {
|
|
603
|
+
case "audio":
|
|
604
|
+
if (msg.data) {
|
|
605
|
+
const buffer = base64ToUint8Array(msg.data);
|
|
606
|
+
this.handleBinaryMessage(buffer);
|
|
607
|
+
}
|
|
608
|
+
break;
|
|
609
|
+
case "transcript":
|
|
610
|
+
const role = msg.role === "user" ? "user" : "agent";
|
|
611
|
+
this.messages.push({
|
|
612
|
+
role,
|
|
613
|
+
text: msg.data,
|
|
614
|
+
timestamp: Date.now()
|
|
615
|
+
});
|
|
616
|
+
if (msg.role === "user") {
|
|
617
|
+
if (this.onTranscription) this.onTranscription(msg.data);
|
|
618
|
+
console.log(`\u{1F4AC} You: ${msg.data}`);
|
|
619
|
+
} else {
|
|
620
|
+
if (this.onResponse) this.onResponse(msg.data);
|
|
621
|
+
console.log(`\u{1F916} Agent: ${msg.data}`);
|
|
622
|
+
}
|
|
623
|
+
break;
|
|
624
|
+
case "status":
|
|
625
|
+
if (msg.data === "interrupted" && this.audioManager) {
|
|
626
|
+
this.audioManager.stopPlayback();
|
|
627
|
+
}
|
|
628
|
+
if (this.onStatus) this.onStatus(msg.data);
|
|
629
|
+
const icons = {
|
|
630
|
+
"interrupted": "\u26A1",
|
|
631
|
+
"thinking": "\u{1F9E0}",
|
|
632
|
+
"speaking": "\u{1F50A}",
|
|
633
|
+
"listening": "\u{1F442}"
|
|
634
|
+
};
|
|
635
|
+
console.log(`${icons[msg.data] || ""} Status: ${msg.data}`);
|
|
636
|
+
break;
|
|
637
|
+
case "visemes":
|
|
638
|
+
if (Array.isArray(msg.data) && msg.data.length > 0) {
|
|
639
|
+
this.emit("visemes", msg.data);
|
|
640
|
+
}
|
|
641
|
+
break;
|
|
642
|
+
case "error":
|
|
643
|
+
if (this.onError) this.onError(msg.data);
|
|
644
|
+
console.error(`\u274C Server error: ${msg.data}`);
|
|
645
|
+
break;
|
|
646
|
+
}
|
|
647
|
+
} catch (e) {
|
|
648
|
+
}
|
|
647
649
|
}
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
|
|
650
|
+
audioListeners = [];
|
|
651
|
+
emit(event, data) {
|
|
652
|
+
if (event === "audio") {
|
|
653
|
+
if (this.onAudioCallback) this.onAudioCallback(data);
|
|
654
|
+
this.audioListeners.forEach((l) => l(data));
|
|
655
|
+
} else if (event === "visemes") {
|
|
656
|
+
if (this.onVisemesCallback) this.onVisemesCallback(data);
|
|
657
|
+
this.visemeListeners.forEach((l) => l(data));
|
|
658
|
+
}
|
|
654
659
|
}
|
|
655
|
-
|
|
656
|
-
|
|
657
|
-
const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
|
|
658
|
-
this.analyserNode.getByteFrequencyData(dataArray);
|
|
659
|
-
return dataArray;
|
|
660
|
+
onAudio(callback) {
|
|
661
|
+
this.audioListeners.push(callback);
|
|
660
662
|
}
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
|
|
664
|
-
this.analyserNode.getByteTimeDomainData(dataArray);
|
|
665
|
-
return dataArray;
|
|
663
|
+
onVisemes(callback) {
|
|
664
|
+
this.visemeListeners.push(callback);
|
|
666
665
|
}
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
|
|
666
|
+
/**
|
|
667
|
+
* Disconnect from the server
|
|
668
|
+
*/
|
|
669
|
+
disconnect() {
|
|
670
|
+
this.isUserDisconnect = true;
|
|
671
|
+
if (this.ws) {
|
|
672
|
+
this.ws.close();
|
|
673
|
+
this.ws = null;
|
|
673
674
|
}
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
};
|
|
679
|
-
|
|
680
|
-
// src/voice-agent.ts
|
|
681
|
-
var VoiceAgent = class {
|
|
682
|
-
client;
|
|
683
|
-
audioManager;
|
|
684
|
-
options;
|
|
685
|
-
isConnected = false;
|
|
686
|
-
visemeQueue = [];
|
|
687
|
-
constructor(options) {
|
|
688
|
-
this.options = options;
|
|
689
|
-
this.client = new VoiceAgentClient({
|
|
690
|
-
apiKey: options.apiKey,
|
|
691
|
-
prompt: options.prompt || "You are a helpful and friendly AI assistant.",
|
|
692
|
-
voice: options.voice || "F1" /* F1 */,
|
|
693
|
-
language: options.language || "en" /* ENGLISH */,
|
|
694
|
-
visemes: options.visemes ?? true,
|
|
695
|
-
serverUrl: options.serverUrl,
|
|
696
|
-
onTranscription: (text) => {
|
|
697
|
-
if (options.onTranscription) options.onTranscription(text, true);
|
|
698
|
-
},
|
|
699
|
-
onResponse: (text) => {
|
|
700
|
-
if (options.onTranscription) options.onTranscription(text, false);
|
|
701
|
-
},
|
|
702
|
-
onAudio: (data) => {
|
|
703
|
-
this.audioManager.playAudio(data);
|
|
704
|
-
},
|
|
705
|
-
onVisemes: (visemes) => {
|
|
706
|
-
this.visemeQueue.push(...visemes);
|
|
707
|
-
if (options.onVisemes) options.onVisemes(visemes);
|
|
708
|
-
},
|
|
709
|
-
onStatus: (status) => {
|
|
710
|
-
if (options.onStatusChange) options.onStatusChange(status);
|
|
711
|
-
if (status === "interrupted" || status === "thinking") {
|
|
712
|
-
this.audioManager.stopPlayback();
|
|
713
|
-
this.visemeQueue = [];
|
|
714
|
-
}
|
|
715
|
-
},
|
|
716
|
-
onError: (err) => {
|
|
717
|
-
if (options.onError) options.onError(err);
|
|
718
|
-
}
|
|
719
|
-
});
|
|
720
|
-
this.audioManager = new BrowserAudioManager({
|
|
721
|
-
autoGainControl: true,
|
|
722
|
-
echoCancellation: true,
|
|
723
|
-
noiseSuppression: true
|
|
724
|
-
});
|
|
675
|
+
if (this.audioManager) {
|
|
676
|
+
this.audioManager.cleanup();
|
|
677
|
+
}
|
|
678
|
+
this.isConnected = false;
|
|
725
679
|
}
|
|
726
680
|
/**
|
|
727
|
-
*
|
|
728
|
-
*
|
|
729
|
-
* to satisfy browser AudioContext requirements.
|
|
681
|
+
* Toggles the microphone mute state (if managed by client)
|
|
682
|
+
* returns the new mute state
|
|
730
683
|
*/
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
this.isConnected = true;
|
|
737
|
-
await this.audioManager.startMicrophone((pcm16Data) => {
|
|
738
|
-
if (this.isConnected) {
|
|
739
|
-
this.client.sendAudio(pcm16Data);
|
|
740
|
-
}
|
|
741
|
-
});
|
|
742
|
-
return true;
|
|
743
|
-
} catch (err) {
|
|
744
|
-
if (this.options.onError) this.options.onError(err);
|
|
745
|
-
return false;
|
|
684
|
+
toggleMute() {
|
|
685
|
+
if (this.audioManager) {
|
|
686
|
+
const isMuted = this.audioManager.isMicMuted();
|
|
687
|
+
this.audioManager.setMuted(!isMuted);
|
|
688
|
+
return !isMuted;
|
|
746
689
|
}
|
|
690
|
+
return false;
|
|
747
691
|
}
|
|
748
692
|
/**
|
|
749
|
-
*
|
|
750
|
-
* Useful for voice activity visualization.
|
|
751
|
-
* @returns value between 0 and 1
|
|
693
|
+
* Gets the microphone volume amplitude 0-1 (if managed by client)
|
|
752
694
|
*/
|
|
753
695
|
getAmplitude() {
|
|
754
|
-
|
|
696
|
+
if (this.audioManager) {
|
|
697
|
+
return this.audioManager.getAmplitude();
|
|
698
|
+
}
|
|
699
|
+
return 0;
|
|
755
700
|
}
|
|
756
701
|
/**
|
|
757
|
-
*
|
|
702
|
+
* Update the system prompt mid-conversation
|
|
758
703
|
*/
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
this.
|
|
762
|
-
|
|
704
|
+
updatePrompt(newPrompt) {
|
|
705
|
+
this.prompt = newPrompt;
|
|
706
|
+
if (this.ws && this.isConnected) {
|
|
707
|
+
try {
|
|
708
|
+
this.ws.send(JSON.stringify({ type: "prompt", data: newPrompt }));
|
|
709
|
+
console.log(`\u2699\uFE0F Updated prompt: ${newPrompt.substring(0, 50)}...`);
|
|
710
|
+
} catch (error) {
|
|
711
|
+
console.error("Error updating prompt:", error);
|
|
712
|
+
}
|
|
713
|
+
} else {
|
|
714
|
+
console.warn("Not connected - prompt will be updated on next connection");
|
|
715
|
+
}
|
|
763
716
|
}
|
|
764
717
|
/**
|
|
765
|
-
*
|
|
766
|
-
* at the current playback frame. Use this in a requestAnimationFrame loop.
|
|
718
|
+
* Get full conversation transcript
|
|
767
719
|
*/
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
const audioCtx = this.audioManager.getAudioContext();
|
|
771
|
-
if (offset === null || !audioCtx) return [];
|
|
772
|
-
const streamTime = (audioCtx.currentTime - offset) * 1e3;
|
|
773
|
-
const currentBatch = [];
|
|
774
|
-
while (this.visemeQueue.length > 0 && this.visemeQueue[0].t * 1e3 <= streamTime) {
|
|
775
|
-
currentBatch.push(this.visemeQueue.shift());
|
|
776
|
-
}
|
|
777
|
-
return currentBatch;
|
|
720
|
+
getTranscript() {
|
|
721
|
+
return this.messages.slice();
|
|
778
722
|
}
|
|
779
723
|
/**
|
|
780
|
-
*
|
|
724
|
+
* Get conversation as formatted text
|
|
781
725
|
*/
|
|
782
|
-
|
|
783
|
-
this.
|
|
726
|
+
getTranscriptText() {
|
|
727
|
+
return this.messages.map((msg) => `${msg.role === "user" ? "You" : "Agent"}: ${msg.text}`).join("\n");
|
|
728
|
+
}
|
|
729
|
+
};
|
|
730
|
+
var TTSClient = class {
|
|
731
|
+
apiKey;
|
|
732
|
+
constructor(config) {
|
|
733
|
+
this.apiKey = config.apiKey;
|
|
784
734
|
}
|
|
785
735
|
/**
|
|
786
|
-
*
|
|
736
|
+
* Synthesize text to speech
|
|
737
|
+
*
|
|
738
|
+
* This opens a temporary WebSocket connection, sends the request,
|
|
739
|
+
* and streams back the audio.
|
|
787
740
|
*/
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
741
|
+
synthesize(options) {
|
|
742
|
+
return new Promise((resolve, reject) => {
|
|
743
|
+
try {
|
|
744
|
+
let url = DEFAULT_URLS.TTS;
|
|
745
|
+
if (this.apiKey) {
|
|
746
|
+
const separator = url.includes("?") ? "&" : "?";
|
|
747
|
+
url += `${separator}api_key=${this.apiKey}`;
|
|
748
|
+
}
|
|
749
|
+
const ws = new WebSocket(url);
|
|
750
|
+
ws.binaryType = "arraybuffer";
|
|
751
|
+
ws.onopen = () => {
|
|
752
|
+
const req = {
|
|
753
|
+
text: options.text,
|
|
754
|
+
voice: options.voice || "F1" /* F1 */,
|
|
755
|
+
lang: options.language || "en" /* ENGLISH */,
|
|
756
|
+
speed: options.speed || 1.05,
|
|
757
|
+
steps: options.steps || 24,
|
|
758
|
+
visemes: options.visemes || false
|
|
759
|
+
};
|
|
760
|
+
ws.send(JSON.stringify(req));
|
|
761
|
+
};
|
|
762
|
+
ws.onmessage = async (event) => {
|
|
763
|
+
if (event.data instanceof ArrayBuffer) {
|
|
764
|
+
if (options.onAudio) options.onAudio(new Uint8Array(event.data));
|
|
765
|
+
} else {
|
|
766
|
+
try {
|
|
767
|
+
const msg = JSON.parse(event.data.toString());
|
|
768
|
+
if (Array.isArray(msg) && options.onVisemes) {
|
|
769
|
+
options.onVisemes(msg);
|
|
770
|
+
}
|
|
771
|
+
} catch (e) {
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
};
|
|
775
|
+
ws.onerror = (err) => {
|
|
776
|
+
if (options.onError) options.onError(err);
|
|
777
|
+
reject(err);
|
|
778
|
+
};
|
|
779
|
+
ws.onclose = () => {
|
|
780
|
+
resolve();
|
|
781
|
+
};
|
|
782
|
+
} catch (err) {
|
|
783
|
+
if (options.onError) options.onError(err);
|
|
784
|
+
reject(err);
|
|
785
|
+
}
|
|
786
|
+
});
|
|
793
787
|
}
|
|
794
788
|
};
|
|
789
|
+
async function simpleConversation(config) {
|
|
790
|
+
const client = new VoiceAgentClient(config);
|
|
791
|
+
await client.connect();
|
|
792
|
+
return client;
|
|
793
|
+
}
|
|
794
|
+
async function simpleTTS(options) {
|
|
795
|
+
const client = new TTSClient({ apiKey: options.apiKey });
|
|
796
|
+
return client.synthesize(options);
|
|
797
|
+
}
|
|
795
798
|
export {
|
|
796
799
|
AUDIO_CONFIG,
|
|
797
800
|
BrowserAudioManager,
|
|
@@ -799,7 +802,6 @@ export {
|
|
|
799
802
|
Language,
|
|
800
803
|
StreamResampler,
|
|
801
804
|
TTSClient,
|
|
802
|
-
VoiceAgent,
|
|
803
805
|
VoiceAgentClient,
|
|
804
806
|
VoiceStyle,
|
|
805
807
|
applyLowPassFilter,
|