@lokutor/sdk 1.1.10 → 1.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -34,287 +34,6 @@ var DEFAULT_URLS = {
34
34
  TTS: "wss://api.lokutor.com/ws/tts"
35
35
  };
36
36
 
37
- // src/client.ts
38
- function base64ToUint8Array(base64) {
39
- const binaryString = atob(base64);
40
- const bytes = new Uint8Array(binaryString.length);
41
- for (let i = 0; i < binaryString.length; i++) {
42
- bytes[i] = binaryString.charCodeAt(i);
43
- }
44
- return bytes;
45
- }
46
- var VoiceAgentClient = class {
47
- ws = null;
48
- apiKey;
49
- prompt;
50
- voice;
51
- language;
52
- // Callbacks
53
- onTranscription;
54
- onResponse;
55
- onAudioCallback;
56
- onVisemesCallback;
57
- onStatus;
58
- onError;
59
- isConnected = false;
60
- messages = [];
61
- visemeListeners = [];
62
- wantVisemes = false;
63
- constructor(config) {
64
- this.apiKey = config.apiKey;
65
- this.prompt = config.prompt;
66
- this.voice = config.voice || "F1" /* F1 */;
67
- this.language = config.language || "en" /* ENGLISH */;
68
- this.onTranscription = config.onTranscription;
69
- this.onResponse = config.onResponse;
70
- this.onAudioCallback = config.onAudio;
71
- this.onVisemesCallback = config.onVisemes;
72
- this.onStatus = config.onStatus;
73
- this.onError = config.onError;
74
- this.wantVisemes = config.visemes || false;
75
- }
76
- /**
77
- * Connect to the Lokutor Voice Agent server
78
- */
79
- async connect() {
80
- return new Promise((resolve, reject) => {
81
- try {
82
- let url = DEFAULT_URLS.VOICE_AGENT;
83
- if (this.apiKey) {
84
- const separator = url.includes("?") ? "&" : "?";
85
- url += `${separator}api_key=${this.apiKey}`;
86
- }
87
- console.log(`\u{1F517} Connecting to ${DEFAULT_URLS.VOICE_AGENT}...`);
88
- this.ws = new WebSocket(url);
89
- this.ws.binaryType = "arraybuffer";
90
- this.ws.onopen = () => {
91
- this.isConnected = true;
92
- console.log("\u2705 Connected to voice agent!");
93
- this.sendConfig();
94
- resolve(true);
95
- };
96
- this.ws.onmessage = async (event) => {
97
- if (event.data instanceof ArrayBuffer) {
98
- this.handleBinaryMessage(new Uint8Array(event.data));
99
- } else {
100
- this.handleTextMessage(event.data.toString());
101
- }
102
- };
103
- this.ws.onerror = (err) => {
104
- console.error("\u274C WebSocket error:", err);
105
- if (this.onError) this.onError(err);
106
- if (!this.isConnected) reject(err);
107
- };
108
- this.ws.onclose = () => {
109
- this.isConnected = false;
110
- console.log("Disconnected");
111
- };
112
- } catch (err) {
113
- if (this.onError) this.onError(err);
114
- reject(err);
115
- }
116
- });
117
- }
118
- /**
119
- * Send initial configuration to the server
120
- */
121
- sendConfig() {
122
- if (!this.ws || !this.isConnected) return;
123
- this.ws.send(JSON.stringify({ type: "prompt", data: this.prompt }));
124
- this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
125
- this.ws.send(JSON.stringify({ type: "language", data: this.language }));
126
- this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
127
- console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}`);
128
- }
129
- /**
130
- * Send raw PCM audio data to the server
131
- * @param audioData Int16 PCM audio buffer
132
- */
133
- sendAudio(audioData) {
134
- if (this.ws && this.isConnected) {
135
- this.ws.send(audioData);
136
- }
137
- }
138
- /**
139
- * Handle incoming binary data (audio response)
140
- */
141
- handleBinaryMessage(data) {
142
- this.emit("audio", data);
143
- }
144
- /**
145
- * Handle incoming text messages (metadata/transcriptions)
146
- */
147
- handleTextMessage(text) {
148
- try {
149
- const msg = JSON.parse(text);
150
- switch (msg.type) {
151
- case "audio":
152
- if (msg.data) {
153
- const buffer = base64ToUint8Array(msg.data);
154
- this.handleBinaryMessage(buffer);
155
- }
156
- break;
157
- case "transcript":
158
- const role = msg.role === "user" ? "user" : "agent";
159
- this.messages.push({
160
- role,
161
- text: msg.data,
162
- timestamp: Date.now()
163
- });
164
- if (msg.role === "user") {
165
- if (this.onTranscription) this.onTranscription(msg.data);
166
- console.log(`\u{1F4AC} You: ${msg.data}`);
167
- } else {
168
- if (this.onResponse) this.onResponse(msg.data);
169
- console.log(`\u{1F916} Agent: ${msg.data}`);
170
- }
171
- break;
172
- case "status":
173
- if (this.onStatus) this.onStatus(msg.data);
174
- const icons = {
175
- "interrupted": "\u26A1",
176
- "thinking": "\u{1F9E0}",
177
- "speaking": "\u{1F50A}",
178
- "listening": "\u{1F442}"
179
- };
180
- console.log(`${icons[msg.data] || ""} Status: ${msg.data}`);
181
- break;
182
- case "visemes":
183
- if (Array.isArray(msg.data) && msg.data.length > 0) {
184
- this.emit("visemes", msg.data);
185
- }
186
- break;
187
- case "error":
188
- if (this.onError) this.onError(msg.data);
189
- console.error(`\u274C Server error: ${msg.data}`);
190
- break;
191
- }
192
- } catch (e) {
193
- }
194
- }
195
- audioListeners = [];
196
- emit(event, data) {
197
- if (event === "audio") {
198
- if (this.onAudioCallback) this.onAudioCallback(data);
199
- this.audioListeners.forEach((l) => l(data));
200
- } else if (event === "visemes") {
201
- if (this.onVisemesCallback) this.onVisemesCallback(data);
202
- this.visemeListeners.forEach((l) => l(data));
203
- }
204
- }
205
- onAudio(callback) {
206
- this.audioListeners.push(callback);
207
- }
208
- onVisemes(callback) {
209
- this.visemeListeners.push(callback);
210
- }
211
- /**
212
- * Disconnect from the server
213
- */
214
- disconnect() {
215
- if (this.ws) {
216
- this.ws.close();
217
- this.ws = null;
218
- }
219
- }
220
- /**
221
- * Update the system prompt mid-conversation
222
- */
223
- updatePrompt(newPrompt) {
224
- this.prompt = newPrompt;
225
- if (this.ws && this.isConnected) {
226
- try {
227
- this.ws.send(JSON.stringify({ type: "prompt", data: newPrompt }));
228
- console.log(`\u2699\uFE0F Updated prompt: ${newPrompt.substring(0, 50)}...`);
229
- } catch (error) {
230
- console.error("Error updating prompt:", error);
231
- }
232
- } else {
233
- console.warn("Not connected - prompt will be updated on next connection");
234
- }
235
- }
236
- /**
237
- * Get full conversation transcript
238
- */
239
- getTranscript() {
240
- return this.messages.slice();
241
- }
242
- /**
243
- * Get conversation as formatted text
244
- */
245
- getTranscriptText() {
246
- return this.messages.map((msg) => `${msg.role === "user" ? "You" : "Agent"}: ${msg.text}`).join("\n");
247
- }
248
- };
249
- var TTSClient = class {
250
- apiKey;
251
- constructor(config) {
252
- this.apiKey = config.apiKey;
253
- }
254
- /**
255
- * Synthesize text to speech
256
- *
257
- * This opens a temporary WebSocket connection, sends the request,
258
- * and streams back the audio.
259
- */
260
- synthesize(options) {
261
- return new Promise((resolve, reject) => {
262
- try {
263
- let url = DEFAULT_URLS.TTS;
264
- if (this.apiKey) {
265
- const separator = url.includes("?") ? "&" : "?";
266
- url += `${separator}api_key=${this.apiKey}`;
267
- }
268
- const ws = new WebSocket(url);
269
- ws.binaryType = "arraybuffer";
270
- ws.onopen = () => {
271
- const req = {
272
- text: options.text,
273
- voice: options.voice || "F1" /* F1 */,
274
- lang: options.language || "en" /* ENGLISH */,
275
- speed: options.speed || 1.05,
276
- steps: options.steps || 24,
277
- visemes: options.visemes || false
278
- };
279
- ws.send(JSON.stringify(req));
280
- };
281
- ws.onmessage = async (event) => {
282
- if (event.data instanceof ArrayBuffer) {
283
- if (options.onAudio) options.onAudio(new Uint8Array(event.data));
284
- } else {
285
- try {
286
- const msg = JSON.parse(event.data.toString());
287
- if (Array.isArray(msg) && options.onVisemes) {
288
- options.onVisemes(msg);
289
- }
290
- } catch (e) {
291
- }
292
- }
293
- };
294
- ws.onerror = (err) => {
295
- if (options.onError) options.onError(err);
296
- reject(err);
297
- };
298
- ws.onclose = () => {
299
- resolve();
300
- };
301
- } catch (err) {
302
- if (options.onError) options.onError(err);
303
- reject(err);
304
- }
305
- });
306
- }
307
- };
308
- async function simpleConversation(config) {
309
- const client = new VoiceAgentClient(config);
310
- await client.connect();
311
- return client;
312
- }
313
- async function simpleTTS(options) {
314
- const client = new TTSClient({ apiKey: options.apiKey });
315
- return client.synthesize(options);
316
- }
317
-
318
37
  // src/audio-utils.ts
319
38
  function pcm16ToFloat32(int16Data) {
320
39
  const float32 = new Float32Array(int16Data.length);
@@ -454,6 +173,7 @@ var BrowserAudioManager = class {
454
173
  scriptProcessor = null;
455
174
  analyserNode = null;
456
175
  mediaStream = null;
176
+ resampler = null;
457
177
  // Playback scheduling
458
178
  nextPlaybackTime = 0;
459
179
  activeSources = [];
@@ -531,6 +251,12 @@ var BrowserAudioManager = class {
531
251
  if (this.analyserNode) {
532
252
  this.mediaStreamAudioSourceNode.connect(this.analyserNode);
533
253
  }
254
+ const hardwareRate = this.audioContext.sampleRate;
255
+ if (hardwareRate !== this.inputSampleRate) {
256
+ this.resampler = new StreamResampler(hardwareRate, this.inputSampleRate);
257
+ } else {
258
+ this.resampler = null;
259
+ }
534
260
  this.scriptProcessor.onaudioprocess = (event) => {
535
261
  this._processAudioInput(event);
536
262
  };
@@ -553,15 +279,11 @@ var BrowserAudioManager = class {
553
279
  for (let i = 0; i < outputBuffer.getChannelData(0).length; i++) {
554
280
  outputBuffer.getChannelData(0)[i] = 0;
555
281
  }
556
- const hardwareRate = this.audioContext.sampleRate;
557
282
  let processedData = new Float32Array(inputData);
558
- if (hardwareRate !== this.inputSampleRate) {
559
- processedData = resampleWithAntiAliasing(
560
- processedData,
561
- hardwareRate,
562
- this.inputSampleRate
563
- );
283
+ if (this.resampler) {
284
+ processedData = this.resampler.process(processedData);
564
285
  }
286
+ if (processedData.length === 0) return;
565
287
  const int16Data = float32ToPcm16(processedData);
566
288
  const uint8Data = new Uint8Array(
567
289
  int16Data.buffer,
@@ -613,117 +335,487 @@ var BrowserAudioManager = class {
613
335
  this._schedulePlayback(audioBuffer);
614
336
  }
615
337
  /**
616
- * Internal method to schedule and play audio with sample-accurate timing
338
+ * Internal method to schedule and play audio with sample-accurate timing
339
+ */
340
+ _schedulePlayback(audioBuffer) {
341
+ if (!this.audioContext) return;
342
+ const currentTime = this.audioContext.currentTime;
343
+ const duration = audioBuffer.length / this.outputSampleRate;
344
+ const startTime = Math.max(
345
+ currentTime + 0.01,
346
+ // Minimum 10ms delay
347
+ this.nextPlaybackTime
348
+ );
349
+ this.nextPlaybackTime = startTime + duration;
350
+ const source = this.audioContext.createBufferSource();
351
+ source.buffer = audioBuffer;
352
+ source.connect(this.audioContext.destination);
353
+ if (this.analyserNode) {
354
+ source.connect(this.analyserNode);
355
+ }
356
+ source.start(startTime);
357
+ this.activeSources.push(source);
358
+ source.onended = () => {
359
+ const index = this.activeSources.indexOf(source);
360
+ if (index > -1) {
361
+ this.activeSources.splice(index, 1);
362
+ }
363
+ };
364
+ }
365
+ /**
366
+ * Stop all currently playing audio and clear the queue
367
+ */
368
+ stopPlayback() {
369
+ this.activeSources.forEach((source) => {
370
+ try {
371
+ source.stop();
372
+ } catch (e) {
373
+ }
374
+ });
375
+ this.activeSources = [];
376
+ this.playbackQueue = [];
377
+ this.nextPlaybackTime = this.audioContext?.currentTime ?? 0;
378
+ console.log("\u{1F507} Playback stopped");
379
+ }
380
+ /**
381
+ * Toggle mute state
382
+ */
383
+ setMuted(muted) {
384
+ this.isMuted = muted;
385
+ }
386
+ /**
387
+ * Get current mute state
388
+ */
389
+ isMicMuted() {
390
+ return this.isMuted;
391
+ }
392
+ /**
393
+ * Get current amplitude from analyser (for visualization)
394
+ * Returns value between 0 and 1
395
+ */
396
+ getAmplitude() {
397
+ if (!this.analyserNode) return 0;
398
+ const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
399
+ this.analyserNode.getByteTimeDomainData(dataArray);
400
+ const rms = calculateRMS(dataArray);
401
+ return Math.min(rms * 10, 1);
402
+ }
403
+ /**
404
+ * Get frequency data from analyser for visualization
405
+ */
406
+ getFrequencyData() {
407
+ if (!this.analyserNode) {
408
+ return new Uint8Array(0);
409
+ }
410
+ const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
411
+ this.analyserNode.getByteFrequencyData(dataArray);
412
+ return dataArray;
413
+ }
414
+ /**
415
+ * Get time-domain data from analyser for waveform visualization
416
+ */
417
+ getWaveformData() {
418
+ if (!this.analyserNode) {
419
+ return new Uint8Array(0);
420
+ }
421
+ const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
422
+ this.analyserNode.getByteTimeDomainData(dataArray);
423
+ return dataArray;
424
+ }
425
+ /**
426
+ * Cleanup and close AudioContext
427
+ */
428
+ cleanup() {
429
+ this.stopMicrophone();
430
+ this.stopPlayback();
431
+ if (this.analyserNode) {
432
+ this.analyserNode.disconnect();
433
+ this.analyserNode = null;
434
+ }
435
+ }
436
+ /**
437
+ * Get current audio context state
438
+ */
439
+ getState() {
440
+ return this.audioContext?.state ?? null;
441
+ }
442
+ /**
443
+ * Check if microphone is currently listening
444
+ */
445
+ isRecording() {
446
+ return this.isListening;
447
+ }
448
+ };
449
+
450
+ // src/client.ts
451
+ function base64ToUint8Array(base64) {
452
+ const binaryString = atob(base64);
453
+ const bytes = new Uint8Array(binaryString.length);
454
+ for (let i = 0; i < binaryString.length; i++) {
455
+ bytes[i] = binaryString.charCodeAt(i);
456
+ }
457
+ return bytes;
458
+ }
459
+ var VoiceAgentClient = class {
460
+ ws = null;
461
+ apiKey;
462
+ prompt;
463
+ voice;
464
+ language;
465
+ tools = [];
466
+ // Callbacks
467
+ onTranscription;
468
+ onResponse;
469
+ onAudioCallback;
470
+ onVisemesCallback;
471
+ onStatus;
472
+ onError;
473
+ isConnected = false;
474
+ messages = [];
475
+ visemeListeners = [];
476
+ wantVisemes = false;
477
+ audioManager = null;
478
+ enableAudio = false;
479
+ currentGeneration = 0;
480
+ // Connection resilience
481
+ isUserDisconnect = false;
482
+ reconnecting = false;
483
+ reconnectAttempts = 0;
484
+ maxReconnectAttempts = 5;
485
+ constructor(config) {
486
+ this.apiKey = config.apiKey;
487
+ this.prompt = config.prompt;
488
+ this.voice = config.voice || "F1" /* F1 */;
489
+ this.language = config.language || "en" /* ENGLISH */;
490
+ this.onTranscription = config.onTranscription;
491
+ this.onResponse = config.onResponse;
492
+ this.onAudioCallback = config.onAudio;
493
+ this.onVisemesCallback = config.onVisemes;
494
+ this.onStatus = config.onStatus;
495
+ this.onError = config.onError;
496
+ this.wantVisemes = config.visemes || false;
497
+ this.enableAudio = config.enableAudio ?? false;
498
+ this.tools = config.tools || [];
499
+ }
500
+ /**
501
+ * Connect to the Lokutor Voice Agent server
502
+ */
503
+ async connect() {
504
+ this.isUserDisconnect = false;
505
+ if (this.enableAudio) {
506
+ if (!this.audioManager) {
507
+ this.audioManager = new BrowserAudioManager();
508
+ }
509
+ await this.audioManager.init();
510
+ }
511
+ return new Promise((resolve, reject) => {
512
+ try {
513
+ let url = DEFAULT_URLS.VOICE_AGENT;
514
+ if (this.apiKey) {
515
+ const separator = url.includes("?") ? "&" : "?";
516
+ url += `${separator}api_key=${this.apiKey}`;
517
+ }
518
+ console.log(`\u{1F517} Connecting to ${DEFAULT_URLS.VOICE_AGENT}...`);
519
+ this.ws = new WebSocket(url);
520
+ this.ws.binaryType = "arraybuffer";
521
+ this.ws.onopen = async () => {
522
+ this.isConnected = true;
523
+ this.reconnectAttempts = 0;
524
+ this.reconnecting = false;
525
+ console.log("\u2705 Connected to voice agent!");
526
+ this.sendConfig();
527
+ if (this.audioManager) {
528
+ await this.audioManager.startMicrophone((data) => {
529
+ if (this.isConnected) {
530
+ this.sendAudio(data);
531
+ }
532
+ });
533
+ }
534
+ resolve(true);
535
+ };
536
+ this.ws.onmessage = async (event) => {
537
+ if (event.data instanceof ArrayBuffer) {
538
+ this.handleBinaryMessage(new Uint8Array(event.data));
539
+ } else {
540
+ this.handleTextMessage(event.data.toString());
541
+ }
542
+ };
543
+ this.ws.onerror = (err) => {
544
+ console.error("\u274C WebSocket error:", err);
545
+ if (this.onError) this.onError(err);
546
+ if (!this.isConnected) reject(err);
547
+ };
548
+ this.ws.onclose = () => {
549
+ this.isConnected = false;
550
+ if (!this.isUserDisconnect && this.reconnectAttempts < this.maxReconnectAttempts) {
551
+ this.reconnecting = true;
552
+ this.reconnectAttempts++;
553
+ const backoffDelay = Math.min(1e3 * Math.pow(2, this.reconnectAttempts), 1e4);
554
+ console.warn(`Connection lost. Reconnecting in ${backoffDelay}ms (attempt ${this.reconnectAttempts}/${this.maxReconnectAttempts})`);
555
+ if (this.onStatus) this.onStatus("reconnecting");
556
+ setTimeout(() => {
557
+ this.connect().catch((e) => console.error("Reconnect failed", e));
558
+ }, backoffDelay);
559
+ } else {
560
+ console.log("Disconnected");
561
+ if (this.onStatus) this.onStatus("disconnected");
562
+ }
563
+ };
564
+ } catch (err) {
565
+ if (this.onError) this.onError(err);
566
+ reject(err);
567
+ }
568
+ });
569
+ }
570
+ /**
571
+ * Send initial configuration to the server
617
572
  */
618
- _schedulePlayback(audioBuffer) {
619
- if (!this.audioContext) return;
620
- const currentTime = this.audioContext.currentTime;
621
- const duration = audioBuffer.length / this.outputSampleRate;
622
- const startTime = Math.max(
623
- currentTime + 0.01,
624
- // Minimum 10ms delay
625
- this.nextPlaybackTime
626
- );
627
- this.nextPlaybackTime = startTime + duration;
628
- const source = this.audioContext.createBufferSource();
629
- source.buffer = audioBuffer;
630
- source.connect(this.audioContext.destination);
631
- if (this.analyserNode) {
632
- source.connect(this.analyserNode);
573
+ sendConfig() {
574
+ if (!this.ws || !this.isConnected) return;
575
+ this.ws.send(JSON.stringify({ type: "prompt", data: this.prompt }));
576
+ this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
577
+ this.ws.send(JSON.stringify({ type: "language", data: this.language }));
578
+ this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
579
+ if (this.tools && this.tools.length > 0) {
580
+ this.ws.send(JSON.stringify({ type: "tools", data: this.tools }));
633
581
  }
634
- source.start(startTime);
635
- this.activeSources.push(source);
636
- source.onended = () => {
637
- const index = this.activeSources.indexOf(source);
638
- if (index > -1) {
639
- this.activeSources.splice(index, 1);
640
- }
641
- };
582
+ console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}, tools=${this.tools.length}`);
642
583
  }
643
584
  /**
644
- * Stop all currently playing audio and clear the queue
585
+ * Send raw PCM audio data to the server
586
+ * @param audioData Int16 PCM audio buffer
645
587
  */
646
- stopPlayback() {
647
- this.activeSources.forEach((source) => {
648
- try {
649
- source.stop();
650
- } catch (e) {
651
- }
652
- });
653
- this.activeSources = [];
654
- this.playbackQueue = [];
655
- this.nextPlaybackTime = this.audioContext?.currentTime ?? 0;
656
- console.log("\u{1F507} Playback stopped");
588
+ sendAudio(audioData) {
589
+ if (this.ws && this.ws.readyState === WebSocket.OPEN && this.isConnected) {
590
+ this.ws.send(audioData);
591
+ }
657
592
  }
658
593
  /**
659
- * Toggle mute state
594
+ * Handle incoming binary data (audio response)
660
595
  */
661
- setMuted(muted) {
662
- this.isMuted = muted;
596
+ handleBinaryMessage(data, generation) {
597
+ if (generation !== void 0 && generation < this.currentGeneration) {
598
+ console.log(`\u{1F5D1}\uFE0F Discarding ghost audio (Gen ${generation} < ${this.currentGeneration})`);
599
+ return;
600
+ }
601
+ if (this.audioManager) {
602
+ this.audioManager.playAudio(data);
603
+ }
604
+ this.emit("audio", data);
663
605
  }
664
606
  /**
665
- * Get current mute state
607
+ * Handle incoming text messages (metadata/transcriptions)
666
608
  */
667
- isMicMuted() {
668
- return this.isMuted;
609
+ handleTextMessage(text) {
610
+ try {
611
+ const msg = JSON.parse(text);
612
+ switch (msg.type) {
613
+ case "audio":
614
+ if (msg.data) {
615
+ const buffer = base64ToUint8Array(msg.data);
616
+ this.handleBinaryMessage(buffer, msg.generation);
617
+ }
618
+ break;
619
+ case "transcript":
620
+ const role = msg.role === "user" ? "user" : "agent";
621
+ this.messages.push({
622
+ role,
623
+ text: msg.data,
624
+ timestamp: Date.now()
625
+ });
626
+ if (msg.role === "user") {
627
+ if (this.onTranscription) this.onTranscription(msg.data);
628
+ console.log(`\u{1F4AC} You: ${msg.data}`);
629
+ } else {
630
+ if (this.onResponse) this.onResponse(msg.data);
631
+ console.log(`\u{1F916} Agent: ${msg.data}`);
632
+ }
633
+ break;
634
+ case "status":
635
+ if (msg.data === "thinking") {
636
+ const newGen = msg.generation || 0;
637
+ if (newGen > this.currentGeneration) {
638
+ console.log(`\u{1F9E0} New thought (Gen ${newGen}) - Clearing audio queue`);
639
+ this.currentGeneration = newGen;
640
+ if (this.audioManager) this.audioManager.stopPlayback();
641
+ }
642
+ }
643
+ if (msg.data === "interrupted" && this.audioManager) {
644
+ this.audioManager.stopPlayback();
645
+ }
646
+ if (this.onStatus) this.onStatus(msg.data);
647
+ const icons = {
648
+ "interrupted": "\u26A1",
649
+ "thinking": "\u{1F9E0}",
650
+ "speaking": "\u{1F50A}",
651
+ "listening": "\u{1F442}"
652
+ };
653
+ console.log(`${icons[msg.data] || ""} Status: ${msg.data}`);
654
+ break;
655
+ case "visemes":
656
+ if (Array.isArray(msg.data) && msg.data.length > 0) {
657
+ this.emit("visemes", msg.data);
658
+ }
659
+ break;
660
+ case "error":
661
+ if (this.onError) this.onError(msg.data);
662
+ console.error(`\u274C Server error: ${msg.data}`);
663
+ break;
664
+ case "tool_call":
665
+ console.log(`\u{1F6E0}\uFE0F Tool Call: ${msg.name}(${msg.arguments})`);
666
+ break;
667
+ }
668
+ } catch (e) {
669
+ }
670
+ }
671
+ audioListeners = [];
672
+ emit(event, data) {
673
+ if (event === "audio") {
674
+ if (this.onAudioCallback) this.onAudioCallback(data);
675
+ this.audioListeners.forEach((l) => l(data));
676
+ } else if (event === "visemes") {
677
+ if (this.onVisemesCallback) this.onVisemesCallback(data);
678
+ this.visemeListeners.forEach((l) => l(data));
679
+ }
680
+ }
681
+ onAudio(callback) {
682
+ this.audioListeners.push(callback);
683
+ }
684
+ onVisemes(callback) {
685
+ this.visemeListeners.push(callback);
669
686
  }
670
687
  /**
671
- * Get current amplitude from analyser (for visualization)
672
- * Returns value between 0 and 1
688
+ * Disconnect from the server
673
689
  */
674
- getAmplitude() {
675
- if (!this.analyserNode) return 0;
676
- const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
677
- this.analyserNode.getByteTimeDomainData(dataArray);
678
- const rms = calculateRMS(dataArray);
679
- return Math.min(rms * 10, 1);
690
+ disconnect() {
691
+ this.isUserDisconnect = true;
692
+ if (this.ws) {
693
+ this.ws.close();
694
+ this.ws = null;
695
+ }
696
+ if (this.audioManager) {
697
+ this.audioManager.cleanup();
698
+ }
699
+ this.isConnected = false;
680
700
  }
681
701
  /**
682
- * Get frequency data from analyser for visualization
702
+ * Toggles the microphone mute state (if managed by client)
703
+ * returns the new mute state
683
704
  */
684
- getFrequencyData() {
685
- if (!this.analyserNode) {
686
- return new Uint8Array(0);
705
+ toggleMute() {
706
+ if (this.audioManager) {
707
+ const isMuted = this.audioManager.isMicMuted();
708
+ this.audioManager.setMuted(!isMuted);
709
+ return !isMuted;
687
710
  }
688
- const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
689
- this.analyserNode.getByteFrequencyData(dataArray);
690
- return dataArray;
711
+ return false;
691
712
  }
692
713
  /**
693
- * Get time-domain data from analyser for waveform visualization
714
+ * Gets the microphone volume amplitude 0-1 (if managed by client)
694
715
  */
695
- getWaveformData() {
696
- if (!this.analyserNode) {
697
- return new Uint8Array(0);
716
+ getAmplitude() {
717
+ if (this.audioManager) {
718
+ return this.audioManager.getAmplitude();
698
719
  }
699
- const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
700
- this.analyserNode.getByteTimeDomainData(dataArray);
701
- return dataArray;
720
+ return 0;
702
721
  }
703
722
  /**
704
- * Cleanup and close AudioContext
723
+ * Update the system prompt mid-conversation
705
724
  */
706
- cleanup() {
707
- this.stopMicrophone();
708
- this.stopPlayback();
709
- if (this.analyserNode) {
710
- this.analyserNode.disconnect();
711
- this.analyserNode = null;
725
+ updatePrompt(newPrompt) {
726
+ this.prompt = newPrompt;
727
+ if (this.ws && this.isConnected) {
728
+ try {
729
+ this.ws.send(JSON.stringify({ type: "prompt", data: newPrompt }));
730
+ console.log(`\u2699\uFE0F Updated prompt: ${newPrompt.substring(0, 50)}...`);
731
+ } catch (error) {
732
+ console.error("Error updating prompt:", error);
733
+ }
734
+ } else {
735
+ console.warn("Not connected - prompt will be updated on next connection");
712
736
  }
713
737
  }
714
738
  /**
715
- * Get current audio context state
739
+ * Get full conversation transcript
716
740
  */
717
- getState() {
718
- return this.audioContext?.state ?? null;
741
+ getTranscript() {
742
+ return this.messages.slice();
719
743
  }
720
744
  /**
721
- * Check if microphone is currently listening
745
+ * Get conversation as formatted text
722
746
  */
723
- isRecording() {
724
- return this.isListening;
747
+ getTranscriptText() {
748
+ return this.messages.map((msg) => `${msg.role === "user" ? "You" : "Agent"}: ${msg.text}`).join("\n");
749
+ }
750
+ };
751
+ var TTSClient = class {
752
+ apiKey;
753
+ constructor(config) {
754
+ this.apiKey = config.apiKey;
755
+ }
756
+ /**
757
+ * Synthesize text to speech
758
+ *
759
+ * This opens a temporary WebSocket connection, sends the request,
760
+ * and streams back the audio.
761
+ */
762
+ synthesize(options) {
763
+ return new Promise((resolve, reject) => {
764
+ try {
765
+ let url = DEFAULT_URLS.TTS;
766
+ if (this.apiKey) {
767
+ const separator = url.includes("?") ? "&" : "?";
768
+ url += `${separator}api_key=${this.apiKey}`;
769
+ }
770
+ const ws = new WebSocket(url);
771
+ ws.binaryType = "arraybuffer";
772
+ ws.onopen = () => {
773
+ const req = {
774
+ text: options.text,
775
+ voice: options.voice || "F1" /* F1 */,
776
+ lang: options.language || "en" /* ENGLISH */,
777
+ speed: options.speed || 1.05,
778
+ steps: options.steps || 24,
779
+ visemes: options.visemes || false
780
+ };
781
+ ws.send(JSON.stringify(req));
782
+ };
783
+ ws.onmessage = async (event) => {
784
+ if (event.data instanceof ArrayBuffer) {
785
+ if (options.onAudio) options.onAudio(new Uint8Array(event.data));
786
+ } else {
787
+ try {
788
+ const msg = JSON.parse(event.data.toString());
789
+ if (Array.isArray(msg) && options.onVisemes) {
790
+ options.onVisemes(msg);
791
+ }
792
+ } catch (e) {
793
+ }
794
+ }
795
+ };
796
+ ws.onerror = (err) => {
797
+ if (options.onError) options.onError(err);
798
+ reject(err);
799
+ };
800
+ ws.onclose = () => {
801
+ resolve();
802
+ };
803
+ } catch (err) {
804
+ if (options.onError) options.onError(err);
805
+ reject(err);
806
+ }
807
+ });
725
808
  }
726
809
  };
810
+ async function simpleConversation(config) {
811
+ const client = new VoiceAgentClient(config);
812
+ await client.connect();
813
+ return client;
814
+ }
815
+ async function simpleTTS(options) {
816
+ const client = new TTSClient({ apiKey: options.apiKey });
817
+ return client.synthesize(options);
818
+ }
727
819
  export {
728
820
  AUDIO_CONFIG,
729
821
  BrowserAudioManager,