@lokutor/sdk 1.1.10 → 1.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -78,287 +78,6 @@ var DEFAULT_URLS = {
78
78
  TTS: "wss://api.lokutor.com/ws/tts"
79
79
  };
80
80
 
81
- // src/client.ts
82
- function base64ToUint8Array(base64) {
83
- const binaryString = atob(base64);
84
- const bytes = new Uint8Array(binaryString.length);
85
- for (let i = 0; i < binaryString.length; i++) {
86
- bytes[i] = binaryString.charCodeAt(i);
87
- }
88
- return bytes;
89
- }
90
- var VoiceAgentClient = class {
91
- ws = null;
92
- apiKey;
93
- prompt;
94
- voice;
95
- language;
96
- // Callbacks
97
- onTranscription;
98
- onResponse;
99
- onAudioCallback;
100
- onVisemesCallback;
101
- onStatus;
102
- onError;
103
- isConnected = false;
104
- messages = [];
105
- visemeListeners = [];
106
- wantVisemes = false;
107
- constructor(config) {
108
- this.apiKey = config.apiKey;
109
- this.prompt = config.prompt;
110
- this.voice = config.voice || "F1" /* F1 */;
111
- this.language = config.language || "en" /* ENGLISH */;
112
- this.onTranscription = config.onTranscription;
113
- this.onResponse = config.onResponse;
114
- this.onAudioCallback = config.onAudio;
115
- this.onVisemesCallback = config.onVisemes;
116
- this.onStatus = config.onStatus;
117
- this.onError = config.onError;
118
- this.wantVisemes = config.visemes || false;
119
- }
120
- /**
121
- * Connect to the Lokutor Voice Agent server
122
- */
123
- async connect() {
124
- return new Promise((resolve, reject) => {
125
- try {
126
- let url = DEFAULT_URLS.VOICE_AGENT;
127
- if (this.apiKey) {
128
- const separator = url.includes("?") ? "&" : "?";
129
- url += `${separator}api_key=${this.apiKey}`;
130
- }
131
- console.log(`\u{1F517} Connecting to ${DEFAULT_URLS.VOICE_AGENT}...`);
132
- this.ws = new WebSocket(url);
133
- this.ws.binaryType = "arraybuffer";
134
- this.ws.onopen = () => {
135
- this.isConnected = true;
136
- console.log("\u2705 Connected to voice agent!");
137
- this.sendConfig();
138
- resolve(true);
139
- };
140
- this.ws.onmessage = async (event) => {
141
- if (event.data instanceof ArrayBuffer) {
142
- this.handleBinaryMessage(new Uint8Array(event.data));
143
- } else {
144
- this.handleTextMessage(event.data.toString());
145
- }
146
- };
147
- this.ws.onerror = (err) => {
148
- console.error("\u274C WebSocket error:", err);
149
- if (this.onError) this.onError(err);
150
- if (!this.isConnected) reject(err);
151
- };
152
- this.ws.onclose = () => {
153
- this.isConnected = false;
154
- console.log("Disconnected");
155
- };
156
- } catch (err) {
157
- if (this.onError) this.onError(err);
158
- reject(err);
159
- }
160
- });
161
- }
162
- /**
163
- * Send initial configuration to the server
164
- */
165
- sendConfig() {
166
- if (!this.ws || !this.isConnected) return;
167
- this.ws.send(JSON.stringify({ type: "prompt", data: this.prompt }));
168
- this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
169
- this.ws.send(JSON.stringify({ type: "language", data: this.language }));
170
- this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
171
- console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}`);
172
- }
173
- /**
174
- * Send raw PCM audio data to the server
175
- * @param audioData Int16 PCM audio buffer
176
- */
177
- sendAudio(audioData) {
178
- if (this.ws && this.isConnected) {
179
- this.ws.send(audioData);
180
- }
181
- }
182
- /**
183
- * Handle incoming binary data (audio response)
184
- */
185
- handleBinaryMessage(data) {
186
- this.emit("audio", data);
187
- }
188
- /**
189
- * Handle incoming text messages (metadata/transcriptions)
190
- */
191
- handleTextMessage(text) {
192
- try {
193
- const msg = JSON.parse(text);
194
- switch (msg.type) {
195
- case "audio":
196
- if (msg.data) {
197
- const buffer = base64ToUint8Array(msg.data);
198
- this.handleBinaryMessage(buffer);
199
- }
200
- break;
201
- case "transcript":
202
- const role = msg.role === "user" ? "user" : "agent";
203
- this.messages.push({
204
- role,
205
- text: msg.data,
206
- timestamp: Date.now()
207
- });
208
- if (msg.role === "user") {
209
- if (this.onTranscription) this.onTranscription(msg.data);
210
- console.log(`\u{1F4AC} You: ${msg.data}`);
211
- } else {
212
- if (this.onResponse) this.onResponse(msg.data);
213
- console.log(`\u{1F916} Agent: ${msg.data}`);
214
- }
215
- break;
216
- case "status":
217
- if (this.onStatus) this.onStatus(msg.data);
218
- const icons = {
219
- "interrupted": "\u26A1",
220
- "thinking": "\u{1F9E0}",
221
- "speaking": "\u{1F50A}",
222
- "listening": "\u{1F442}"
223
- };
224
- console.log(`${icons[msg.data] || ""} Status: ${msg.data}`);
225
- break;
226
- case "visemes":
227
- if (Array.isArray(msg.data) && msg.data.length > 0) {
228
- this.emit("visemes", msg.data);
229
- }
230
- break;
231
- case "error":
232
- if (this.onError) this.onError(msg.data);
233
- console.error(`\u274C Server error: ${msg.data}`);
234
- break;
235
- }
236
- } catch (e) {
237
- }
238
- }
239
- audioListeners = [];
240
- emit(event, data) {
241
- if (event === "audio") {
242
- if (this.onAudioCallback) this.onAudioCallback(data);
243
- this.audioListeners.forEach((l) => l(data));
244
- } else if (event === "visemes") {
245
- if (this.onVisemesCallback) this.onVisemesCallback(data);
246
- this.visemeListeners.forEach((l) => l(data));
247
- }
248
- }
249
- onAudio(callback) {
250
- this.audioListeners.push(callback);
251
- }
252
- onVisemes(callback) {
253
- this.visemeListeners.push(callback);
254
- }
255
- /**
256
- * Disconnect from the server
257
- */
258
- disconnect() {
259
- if (this.ws) {
260
- this.ws.close();
261
- this.ws = null;
262
- }
263
- }
264
- /**
265
- * Update the system prompt mid-conversation
266
- */
267
- updatePrompt(newPrompt) {
268
- this.prompt = newPrompt;
269
- if (this.ws && this.isConnected) {
270
- try {
271
- this.ws.send(JSON.stringify({ type: "prompt", data: newPrompt }));
272
- console.log(`\u2699\uFE0F Updated prompt: ${newPrompt.substring(0, 50)}...`);
273
- } catch (error) {
274
- console.error("Error updating prompt:", error);
275
- }
276
- } else {
277
- console.warn("Not connected - prompt will be updated on next connection");
278
- }
279
- }
280
- /**
281
- * Get full conversation transcript
282
- */
283
- getTranscript() {
284
- return this.messages.slice();
285
- }
286
- /**
287
- * Get conversation as formatted text
288
- */
289
- getTranscriptText() {
290
- return this.messages.map((msg) => `${msg.role === "user" ? "You" : "Agent"}: ${msg.text}`).join("\n");
291
- }
292
- };
293
- var TTSClient = class {
294
- apiKey;
295
- constructor(config) {
296
- this.apiKey = config.apiKey;
297
- }
298
- /**
299
- * Synthesize text to speech
300
- *
301
- * This opens a temporary WebSocket connection, sends the request,
302
- * and streams back the audio.
303
- */
304
- synthesize(options) {
305
- return new Promise((resolve, reject) => {
306
- try {
307
- let url = DEFAULT_URLS.TTS;
308
- if (this.apiKey) {
309
- const separator = url.includes("?") ? "&" : "?";
310
- url += `${separator}api_key=${this.apiKey}`;
311
- }
312
- const ws = new WebSocket(url);
313
- ws.binaryType = "arraybuffer";
314
- ws.onopen = () => {
315
- const req = {
316
- text: options.text,
317
- voice: options.voice || "F1" /* F1 */,
318
- lang: options.language || "en" /* ENGLISH */,
319
- speed: options.speed || 1.05,
320
- steps: options.steps || 24,
321
- visemes: options.visemes || false
322
- };
323
- ws.send(JSON.stringify(req));
324
- };
325
- ws.onmessage = async (event) => {
326
- if (event.data instanceof ArrayBuffer) {
327
- if (options.onAudio) options.onAudio(new Uint8Array(event.data));
328
- } else {
329
- try {
330
- const msg = JSON.parse(event.data.toString());
331
- if (Array.isArray(msg) && options.onVisemes) {
332
- options.onVisemes(msg);
333
- }
334
- } catch (e) {
335
- }
336
- }
337
- };
338
- ws.onerror = (err) => {
339
- if (options.onError) options.onError(err);
340
- reject(err);
341
- };
342
- ws.onclose = () => {
343
- resolve();
344
- };
345
- } catch (err) {
346
- if (options.onError) options.onError(err);
347
- reject(err);
348
- }
349
- });
350
- }
351
- };
352
- async function simpleConversation(config) {
353
- const client = new VoiceAgentClient(config);
354
- await client.connect();
355
- return client;
356
- }
357
- async function simpleTTS(options) {
358
- const client = new TTSClient({ apiKey: options.apiKey });
359
- return client.synthesize(options);
360
- }
361
-
362
81
  // src/audio-utils.ts
363
82
  function pcm16ToFloat32(int16Data) {
364
83
  const float32 = new Float32Array(int16Data.length);
@@ -498,6 +217,7 @@ var BrowserAudioManager = class {
498
217
  scriptProcessor = null;
499
218
  analyserNode = null;
500
219
  mediaStream = null;
220
+ resampler = null;
501
221
  // Playback scheduling
502
222
  nextPlaybackTime = 0;
503
223
  activeSources = [];
@@ -575,6 +295,12 @@ var BrowserAudioManager = class {
575
295
  if (this.analyserNode) {
576
296
  this.mediaStreamAudioSourceNode.connect(this.analyserNode);
577
297
  }
298
+ const hardwareRate = this.audioContext.sampleRate;
299
+ if (hardwareRate !== this.inputSampleRate) {
300
+ this.resampler = new StreamResampler(hardwareRate, this.inputSampleRate);
301
+ } else {
302
+ this.resampler = null;
303
+ }
578
304
  this.scriptProcessor.onaudioprocess = (event) => {
579
305
  this._processAudioInput(event);
580
306
  };
@@ -597,15 +323,11 @@ var BrowserAudioManager = class {
597
323
  for (let i = 0; i < outputBuffer.getChannelData(0).length; i++) {
598
324
  outputBuffer.getChannelData(0)[i] = 0;
599
325
  }
600
- const hardwareRate = this.audioContext.sampleRate;
601
326
  let processedData = new Float32Array(inputData);
602
- if (hardwareRate !== this.inputSampleRate) {
603
- processedData = resampleWithAntiAliasing(
604
- processedData,
605
- hardwareRate,
606
- this.inputSampleRate
607
- );
327
+ if (this.resampler) {
328
+ processedData = this.resampler.process(processedData);
608
329
  }
330
+ if (processedData.length === 0) return;
609
331
  const int16Data = float32ToPcm16(processedData);
610
332
  const uint8Data = new Uint8Array(
611
333
  int16Data.buffer,
@@ -657,117 +379,487 @@ var BrowserAudioManager = class {
657
379
  this._schedulePlayback(audioBuffer);
658
380
  }
659
381
  /**
660
- * Internal method to schedule and play audio with sample-accurate timing
382
+ * Internal method to schedule and play audio with sample-accurate timing
383
+ */
384
+ _schedulePlayback(audioBuffer) {
385
+ if (!this.audioContext) return;
386
+ const currentTime = this.audioContext.currentTime;
387
+ const duration = audioBuffer.length / this.outputSampleRate;
388
+ const startTime = Math.max(
389
+ currentTime + 0.01,
390
+ // Minimum 10ms delay
391
+ this.nextPlaybackTime
392
+ );
393
+ this.nextPlaybackTime = startTime + duration;
394
+ const source = this.audioContext.createBufferSource();
395
+ source.buffer = audioBuffer;
396
+ source.connect(this.audioContext.destination);
397
+ if (this.analyserNode) {
398
+ source.connect(this.analyserNode);
399
+ }
400
+ source.start(startTime);
401
+ this.activeSources.push(source);
402
+ source.onended = () => {
403
+ const index = this.activeSources.indexOf(source);
404
+ if (index > -1) {
405
+ this.activeSources.splice(index, 1);
406
+ }
407
+ };
408
+ }
409
+ /**
410
+ * Stop all currently playing audio and clear the queue
411
+ */
412
+ stopPlayback() {
413
+ this.activeSources.forEach((source) => {
414
+ try {
415
+ source.stop();
416
+ } catch (e) {
417
+ }
418
+ });
419
+ this.activeSources = [];
420
+ this.playbackQueue = [];
421
+ this.nextPlaybackTime = this.audioContext?.currentTime ?? 0;
422
+ console.log("\u{1F507} Playback stopped");
423
+ }
424
+ /**
425
+ * Toggle mute state
426
+ */
427
+ setMuted(muted) {
428
+ this.isMuted = muted;
429
+ }
430
+ /**
431
+ * Get current mute state
432
+ */
433
+ isMicMuted() {
434
+ return this.isMuted;
435
+ }
436
+ /**
437
+ * Get current amplitude from analyser (for visualization)
438
+ * Returns value between 0 and 1
439
+ */
440
+ getAmplitude() {
441
+ if (!this.analyserNode) return 0;
442
+ const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
443
+ this.analyserNode.getByteTimeDomainData(dataArray);
444
+ const rms = calculateRMS(dataArray);
445
+ return Math.min(rms * 10, 1);
446
+ }
447
+ /**
448
+ * Get frequency data from analyser for visualization
449
+ */
450
+ getFrequencyData() {
451
+ if (!this.analyserNode) {
452
+ return new Uint8Array(0);
453
+ }
454
+ const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
455
+ this.analyserNode.getByteFrequencyData(dataArray);
456
+ return dataArray;
457
+ }
458
+ /**
459
+ * Get time-domain data from analyser for waveform visualization
460
+ */
461
+ getWaveformData() {
462
+ if (!this.analyserNode) {
463
+ return new Uint8Array(0);
464
+ }
465
+ const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
466
+ this.analyserNode.getByteTimeDomainData(dataArray);
467
+ return dataArray;
468
+ }
469
+ /**
470
+ * Cleanup and close AudioContext
471
+ */
472
+ cleanup() {
473
+ this.stopMicrophone();
474
+ this.stopPlayback();
475
+ if (this.analyserNode) {
476
+ this.analyserNode.disconnect();
477
+ this.analyserNode = null;
478
+ }
479
+ }
480
+ /**
481
+ * Get current audio context state
482
+ */
483
+ getState() {
484
+ return this.audioContext?.state ?? null;
485
+ }
486
+ /**
487
+ * Check if microphone is currently listening
488
+ */
489
+ isRecording() {
490
+ return this.isListening;
491
+ }
492
+ };
493
+
494
+ // src/client.ts
495
+ function base64ToUint8Array(base64) {
496
+ const binaryString = atob(base64);
497
+ const bytes = new Uint8Array(binaryString.length);
498
+ for (let i = 0; i < binaryString.length; i++) {
499
+ bytes[i] = binaryString.charCodeAt(i);
500
+ }
501
+ return bytes;
502
+ }
503
+ var VoiceAgentClient = class {
504
+ ws = null;
505
+ apiKey;
506
+ prompt;
507
+ voice;
508
+ language;
509
+ tools = [];
510
+ // Callbacks
511
+ onTranscription;
512
+ onResponse;
513
+ onAudioCallback;
514
+ onVisemesCallback;
515
+ onStatus;
516
+ onError;
517
+ isConnected = false;
518
+ messages = [];
519
+ visemeListeners = [];
520
+ wantVisemes = false;
521
+ audioManager = null;
522
+ enableAudio = false;
523
+ currentGeneration = 0;
524
+ // Connection resilience
525
+ isUserDisconnect = false;
526
+ reconnecting = false;
527
+ reconnectAttempts = 0;
528
+ maxReconnectAttempts = 5;
529
+ constructor(config) {
530
+ this.apiKey = config.apiKey;
531
+ this.prompt = config.prompt;
532
+ this.voice = config.voice || "F1" /* F1 */;
533
+ this.language = config.language || "en" /* ENGLISH */;
534
+ this.onTranscription = config.onTranscription;
535
+ this.onResponse = config.onResponse;
536
+ this.onAudioCallback = config.onAudio;
537
+ this.onVisemesCallback = config.onVisemes;
538
+ this.onStatus = config.onStatus;
539
+ this.onError = config.onError;
540
+ this.wantVisemes = config.visemes || false;
541
+ this.enableAudio = config.enableAudio ?? false;
542
+ this.tools = config.tools || [];
543
+ }
544
+ /**
545
+ * Connect to the Lokutor Voice Agent server
546
+ */
547
+ async connect() {
548
+ this.isUserDisconnect = false;
549
+ if (this.enableAudio) {
550
+ if (!this.audioManager) {
551
+ this.audioManager = new BrowserAudioManager();
552
+ }
553
+ await this.audioManager.init();
554
+ }
555
+ return new Promise((resolve, reject) => {
556
+ try {
557
+ let url = DEFAULT_URLS.VOICE_AGENT;
558
+ if (this.apiKey) {
559
+ const separator = url.includes("?") ? "&" : "?";
560
+ url += `${separator}api_key=${this.apiKey}`;
561
+ }
562
+ console.log(`\u{1F517} Connecting to ${DEFAULT_URLS.VOICE_AGENT}...`);
563
+ this.ws = new WebSocket(url);
564
+ this.ws.binaryType = "arraybuffer";
565
+ this.ws.onopen = async () => {
566
+ this.isConnected = true;
567
+ this.reconnectAttempts = 0;
568
+ this.reconnecting = false;
569
+ console.log("\u2705 Connected to voice agent!");
570
+ this.sendConfig();
571
+ if (this.audioManager) {
572
+ await this.audioManager.startMicrophone((data) => {
573
+ if (this.isConnected) {
574
+ this.sendAudio(data);
575
+ }
576
+ });
577
+ }
578
+ resolve(true);
579
+ };
580
+ this.ws.onmessage = async (event) => {
581
+ if (event.data instanceof ArrayBuffer) {
582
+ this.handleBinaryMessage(new Uint8Array(event.data));
583
+ } else {
584
+ this.handleTextMessage(event.data.toString());
585
+ }
586
+ };
587
+ this.ws.onerror = (err) => {
588
+ console.error("\u274C WebSocket error:", err);
589
+ if (this.onError) this.onError(err);
590
+ if (!this.isConnected) reject(err);
591
+ };
592
+ this.ws.onclose = () => {
593
+ this.isConnected = false;
594
+ if (!this.isUserDisconnect && this.reconnectAttempts < this.maxReconnectAttempts) {
595
+ this.reconnecting = true;
596
+ this.reconnectAttempts++;
597
+ const backoffDelay = Math.min(1e3 * Math.pow(2, this.reconnectAttempts), 1e4);
598
+ console.warn(`Connection lost. Reconnecting in ${backoffDelay}ms (attempt ${this.reconnectAttempts}/${this.maxReconnectAttempts})`);
599
+ if (this.onStatus) this.onStatus("reconnecting");
600
+ setTimeout(() => {
601
+ this.connect().catch((e) => console.error("Reconnect failed", e));
602
+ }, backoffDelay);
603
+ } else {
604
+ console.log("Disconnected");
605
+ if (this.onStatus) this.onStatus("disconnected");
606
+ }
607
+ };
608
+ } catch (err) {
609
+ if (this.onError) this.onError(err);
610
+ reject(err);
611
+ }
612
+ });
613
+ }
614
+ /**
615
+ * Send initial configuration to the server
661
616
  */
662
- _schedulePlayback(audioBuffer) {
663
- if (!this.audioContext) return;
664
- const currentTime = this.audioContext.currentTime;
665
- const duration = audioBuffer.length / this.outputSampleRate;
666
- const startTime = Math.max(
667
- currentTime + 0.01,
668
- // Minimum 10ms delay
669
- this.nextPlaybackTime
670
- );
671
- this.nextPlaybackTime = startTime + duration;
672
- const source = this.audioContext.createBufferSource();
673
- source.buffer = audioBuffer;
674
- source.connect(this.audioContext.destination);
675
- if (this.analyserNode) {
676
- source.connect(this.analyserNode);
617
+ sendConfig() {
618
+ if (!this.ws || !this.isConnected) return;
619
+ this.ws.send(JSON.stringify({ type: "prompt", data: this.prompt }));
620
+ this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
621
+ this.ws.send(JSON.stringify({ type: "language", data: this.language }));
622
+ this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
623
+ if (this.tools && this.tools.length > 0) {
624
+ this.ws.send(JSON.stringify({ type: "tools", data: this.tools }));
677
625
  }
678
- source.start(startTime);
679
- this.activeSources.push(source);
680
- source.onended = () => {
681
- const index = this.activeSources.indexOf(source);
682
- if (index > -1) {
683
- this.activeSources.splice(index, 1);
684
- }
685
- };
626
+ console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}, tools=${this.tools.length}`);
686
627
  }
687
628
  /**
688
- * Stop all currently playing audio and clear the queue
629
+ * Send raw PCM audio data to the server
630
+ * @param audioData Int16 PCM audio buffer
689
631
  */
690
- stopPlayback() {
691
- this.activeSources.forEach((source) => {
692
- try {
693
- source.stop();
694
- } catch (e) {
695
- }
696
- });
697
- this.activeSources = [];
698
- this.playbackQueue = [];
699
- this.nextPlaybackTime = this.audioContext?.currentTime ?? 0;
700
- console.log("\u{1F507} Playback stopped");
632
+ sendAudio(audioData) {
633
+ if (this.ws && this.ws.readyState === WebSocket.OPEN && this.isConnected) {
634
+ this.ws.send(audioData);
635
+ }
701
636
  }
702
637
  /**
703
- * Toggle mute state
638
+ * Handle incoming binary data (audio response)
704
639
  */
705
- setMuted(muted) {
706
- this.isMuted = muted;
640
+ handleBinaryMessage(data, generation) {
641
+ if (generation !== void 0 && generation < this.currentGeneration) {
642
+ console.log(`\u{1F5D1}\uFE0F Discarding ghost audio (Gen ${generation} < ${this.currentGeneration})`);
643
+ return;
644
+ }
645
+ if (this.audioManager) {
646
+ this.audioManager.playAudio(data);
647
+ }
648
+ this.emit("audio", data);
707
649
  }
708
650
  /**
709
- * Get current mute state
651
+ * Handle incoming text messages (metadata/transcriptions)
710
652
  */
711
- isMicMuted() {
712
- return this.isMuted;
653
+ handleTextMessage(text) {
654
+ try {
655
+ const msg = JSON.parse(text);
656
+ switch (msg.type) {
657
+ case "audio":
658
+ if (msg.data) {
659
+ const buffer = base64ToUint8Array(msg.data);
660
+ this.handleBinaryMessage(buffer, msg.generation);
661
+ }
662
+ break;
663
+ case "transcript":
664
+ const role = msg.role === "user" ? "user" : "agent";
665
+ this.messages.push({
666
+ role,
667
+ text: msg.data,
668
+ timestamp: Date.now()
669
+ });
670
+ if (msg.role === "user") {
671
+ if (this.onTranscription) this.onTranscription(msg.data);
672
+ console.log(`\u{1F4AC} You: ${msg.data}`);
673
+ } else {
674
+ if (this.onResponse) this.onResponse(msg.data);
675
+ console.log(`\u{1F916} Agent: ${msg.data}`);
676
+ }
677
+ break;
678
+ case "status":
679
+ if (msg.data === "thinking") {
680
+ const newGen = msg.generation || 0;
681
+ if (newGen > this.currentGeneration) {
682
+ console.log(`\u{1F9E0} New thought (Gen ${newGen}) - Clearing audio queue`);
683
+ this.currentGeneration = newGen;
684
+ if (this.audioManager) this.audioManager.stopPlayback();
685
+ }
686
+ }
687
+ if (msg.data === "interrupted" && this.audioManager) {
688
+ this.audioManager.stopPlayback();
689
+ }
690
+ if (this.onStatus) this.onStatus(msg.data);
691
+ const icons = {
692
+ "interrupted": "\u26A1",
693
+ "thinking": "\u{1F9E0}",
694
+ "speaking": "\u{1F50A}",
695
+ "listening": "\u{1F442}"
696
+ };
697
+ console.log(`${icons[msg.data] || ""} Status: ${msg.data}`);
698
+ break;
699
+ case "visemes":
700
+ if (Array.isArray(msg.data) && msg.data.length > 0) {
701
+ this.emit("visemes", msg.data);
702
+ }
703
+ break;
704
+ case "error":
705
+ if (this.onError) this.onError(msg.data);
706
+ console.error(`\u274C Server error: ${msg.data}`);
707
+ break;
708
+ case "tool_call":
709
+ console.log(`\u{1F6E0}\uFE0F Tool Call: ${msg.name}(${msg.arguments})`);
710
+ break;
711
+ }
712
+ } catch (e) {
713
+ }
714
+ }
715
+ audioListeners = [];
716
+ emit(event, data) {
717
+ if (event === "audio") {
718
+ if (this.onAudioCallback) this.onAudioCallback(data);
719
+ this.audioListeners.forEach((l) => l(data));
720
+ } else if (event === "visemes") {
721
+ if (this.onVisemesCallback) this.onVisemesCallback(data);
722
+ this.visemeListeners.forEach((l) => l(data));
723
+ }
724
+ }
725
+ onAudio(callback) {
726
+ this.audioListeners.push(callback);
727
+ }
728
+ onVisemes(callback) {
729
+ this.visemeListeners.push(callback);
713
730
  }
714
731
  /**
715
- * Get current amplitude from analyser (for visualization)
716
- * Returns value between 0 and 1
732
+ * Disconnect from the server
717
733
  */
718
- getAmplitude() {
719
- if (!this.analyserNode) return 0;
720
- const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
721
- this.analyserNode.getByteTimeDomainData(dataArray);
722
- const rms = calculateRMS(dataArray);
723
- return Math.min(rms * 10, 1);
734
+ disconnect() {
735
+ this.isUserDisconnect = true;
736
+ if (this.ws) {
737
+ this.ws.close();
738
+ this.ws = null;
739
+ }
740
+ if (this.audioManager) {
741
+ this.audioManager.cleanup();
742
+ }
743
+ this.isConnected = false;
724
744
  }
725
745
  /**
726
- * Get frequency data from analyser for visualization
746
+ * Toggles the microphone mute state (if managed by client)
747
+ * returns the new mute state
727
748
  */
728
- getFrequencyData() {
729
- if (!this.analyserNode) {
730
- return new Uint8Array(0);
749
+ toggleMute() {
750
+ if (this.audioManager) {
751
+ const isMuted = this.audioManager.isMicMuted();
752
+ this.audioManager.setMuted(!isMuted);
753
+ return !isMuted;
731
754
  }
732
- const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
733
- this.analyserNode.getByteFrequencyData(dataArray);
734
- return dataArray;
755
+ return false;
735
756
  }
736
757
  /**
737
- * Get time-domain data from analyser for waveform visualization
758
+ * Gets the microphone volume amplitude 0-1 (if managed by client)
738
759
  */
739
- getWaveformData() {
740
- if (!this.analyserNode) {
741
- return new Uint8Array(0);
760
+ getAmplitude() {
761
+ if (this.audioManager) {
762
+ return this.audioManager.getAmplitude();
742
763
  }
743
- const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
744
- this.analyserNode.getByteTimeDomainData(dataArray);
745
- return dataArray;
764
+ return 0;
746
765
  }
747
766
  /**
748
- * Cleanup and close AudioContext
767
+ * Update the system prompt mid-conversation
749
768
  */
750
- cleanup() {
751
- this.stopMicrophone();
752
- this.stopPlayback();
753
- if (this.analyserNode) {
754
- this.analyserNode.disconnect();
755
- this.analyserNode = null;
769
+ updatePrompt(newPrompt) {
770
+ this.prompt = newPrompt;
771
+ if (this.ws && this.isConnected) {
772
+ try {
773
+ this.ws.send(JSON.stringify({ type: "prompt", data: newPrompt }));
774
+ console.log(`\u2699\uFE0F Updated prompt: ${newPrompt.substring(0, 50)}...`);
775
+ } catch (error) {
776
+ console.error("Error updating prompt:", error);
777
+ }
778
+ } else {
779
+ console.warn("Not connected - prompt will be updated on next connection");
756
780
  }
757
781
  }
758
782
  /**
759
- * Get current audio context state
783
+ * Get full conversation transcript
760
784
  */
761
- getState() {
762
- return this.audioContext?.state ?? null;
785
+ getTranscript() {
786
+ return this.messages.slice();
763
787
  }
764
788
  /**
765
- * Check if microphone is currently listening
789
+ * Get conversation as formatted text
766
790
  */
767
- isRecording() {
768
- return this.isListening;
791
+ getTranscriptText() {
792
+ return this.messages.map((msg) => `${msg.role === "user" ? "You" : "Agent"}: ${msg.text}`).join("\n");
793
+ }
794
+ };
795
+ var TTSClient = class {
796
+ apiKey;
797
+ constructor(config) {
798
+ this.apiKey = config.apiKey;
799
+ }
800
+ /**
801
+ * Synthesize text to speech
802
+ *
803
+ * This opens a temporary WebSocket connection, sends the request,
804
+ * and streams back the audio.
805
+ */
806
+ synthesize(options) {
807
+ return new Promise((resolve, reject) => {
808
+ try {
809
+ let url = DEFAULT_URLS.TTS;
810
+ if (this.apiKey) {
811
+ const separator = url.includes("?") ? "&" : "?";
812
+ url += `${separator}api_key=${this.apiKey}`;
813
+ }
814
+ const ws = new WebSocket(url);
815
+ ws.binaryType = "arraybuffer";
816
+ ws.onopen = () => {
817
+ const req = {
818
+ text: options.text,
819
+ voice: options.voice || "F1" /* F1 */,
820
+ lang: options.language || "en" /* ENGLISH */,
821
+ speed: options.speed || 1.05,
822
+ steps: options.steps || 24,
823
+ visemes: options.visemes || false
824
+ };
825
+ ws.send(JSON.stringify(req));
826
+ };
827
+ ws.onmessage = async (event) => {
828
+ if (event.data instanceof ArrayBuffer) {
829
+ if (options.onAudio) options.onAudio(new Uint8Array(event.data));
830
+ } else {
831
+ try {
832
+ const msg = JSON.parse(event.data.toString());
833
+ if (Array.isArray(msg) && options.onVisemes) {
834
+ options.onVisemes(msg);
835
+ }
836
+ } catch (e) {
837
+ }
838
+ }
839
+ };
840
+ ws.onerror = (err) => {
841
+ if (options.onError) options.onError(err);
842
+ reject(err);
843
+ };
844
+ ws.onclose = () => {
845
+ resolve();
846
+ };
847
+ } catch (err) {
848
+ if (options.onError) options.onError(err);
849
+ reject(err);
850
+ }
851
+ });
769
852
  }
770
853
  };
854
+ async function simpleConversation(config) {
855
+ const client = new VoiceAgentClient(config);
856
+ await client.connect();
857
+ return client;
858
+ }
859
+ async function simpleTTS(options) {
860
+ const client = new TTSClient({ apiKey: options.apiKey });
861
+ return client.synthesize(options);
862
+ }
771
863
  // Annotate the CommonJS export names for ESM import in node:
772
864
  0 && (module.exports = {
773
865
  AUDIO_CONFIG,