@lokutor/sdk 1.1.2 → 1.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -21,11 +21,22 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
21
21
  var index_exports = {};
22
22
  __export(index_exports, {
23
23
  AUDIO_CONFIG: () => AUDIO_CONFIG,
24
+ BrowserAudioManager: () => BrowserAudioManager,
24
25
  DEFAULT_URLS: () => DEFAULT_URLS,
25
26
  Language: () => Language,
27
+ StreamResampler: () => StreamResampler,
26
28
  TTSClient: () => TTSClient,
27
29
  VoiceAgentClient: () => VoiceAgentClient,
28
30
  VoiceStyle: () => VoiceStyle,
31
+ applyLowPassFilter: () => applyLowPassFilter,
32
+ bytesToPcm16: () => bytesToPcm16,
33
+ calculateRMS: () => calculateRMS,
34
+ float32ToPcm16: () => float32ToPcm16,
35
+ normalizeAudio: () => normalizeAudio,
36
+ pcm16ToBytes: () => pcm16ToBytes,
37
+ pcm16ToFloat32: () => pcm16ToFloat32,
38
+ resample: () => resample,
39
+ resampleWithAntiAliasing: () => resampleWithAntiAliasing,
29
40
  simpleConversation: () => simpleConversation,
30
41
  simpleTTS: () => simpleTTS
31
42
  });
@@ -86,10 +97,13 @@ var VoiceAgentClient = class {
86
97
  onTranscription;
87
98
  onResponse;
88
99
  onAudioCallback;
100
+ onVisemesCallback;
89
101
  onStatus;
90
102
  onError;
91
103
  isConnected = false;
92
104
  messages = [];
105
+ visemeListeners = [];
106
+ wantVisemes = false;
93
107
  constructor(config) {
94
108
  this.apiKey = config.apiKey;
95
109
  this.prompt = config.prompt;
@@ -98,8 +112,10 @@ var VoiceAgentClient = class {
98
112
  this.onTranscription = config.onTranscription;
99
113
  this.onResponse = config.onResponse;
100
114
  this.onAudioCallback = config.onAudio;
115
+ this.onVisemesCallback = config.onVisemes;
101
116
  this.onStatus = config.onStatus;
102
117
  this.onError = config.onError;
118
+ this.wantVisemes = config.visemes || false;
103
119
  }
104
120
  /**
105
121
  * Connect to the Lokutor Voice Agent server
@@ -151,7 +167,8 @@ var VoiceAgentClient = class {
151
167
  this.ws.send(JSON.stringify({ type: "prompt", data: this.prompt }));
152
168
  this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
153
169
  this.ws.send(JSON.stringify({ type: "language", data: this.language }));
154
- console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}`);
170
+ this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
171
+ console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}`);
155
172
  }
156
173
  /**
157
174
  * Send raw PCM audio data to the server
@@ -206,6 +223,11 @@ var VoiceAgentClient = class {
206
223
  };
207
224
  console.log(`${icons[msg.data] || ""} Status: ${msg.data}`);
208
225
  break;
226
+ case "visemes":
227
+ if (Array.isArray(msg.data) && msg.data.length > 0) {
228
+ this.emit("visemes", msg.data);
229
+ }
230
+ break;
209
231
  case "error":
210
232
  if (this.onError) this.onError(msg.data);
211
233
  console.error(`\u274C Server error: ${msg.data}`);
@@ -219,11 +241,17 @@ var VoiceAgentClient = class {
219
241
  if (event === "audio") {
220
242
  if (this.onAudioCallback) this.onAudioCallback(data);
221
243
  this.audioListeners.forEach((l) => l(data));
244
+ } else if (event === "visemes") {
245
+ if (this.onVisemesCallback) this.onVisemesCallback(data);
246
+ this.visemeListeners.forEach((l) => l(data));
222
247
  }
223
248
  }
224
249
  onAudio(callback) {
225
250
  this.audioListeners.push(callback);
226
251
  }
252
+ onVisemes(callback) {
253
+ this.visemeListeners.push(callback);
254
+ }
227
255
  /**
228
256
  * Disconnect from the server
229
257
  */
@@ -330,14 +358,435 @@ async function simpleTTS(options) {
330
358
  const client = new TTSClient({ apiKey: options.apiKey });
331
359
  return client.synthesize(options);
332
360
  }
361
+
362
+ // src/audio-utils.ts
363
+ function pcm16ToFloat32(int16Data) {
364
+ const float32 = new Float32Array(int16Data.length);
365
+ for (let i = 0; i < int16Data.length; i++) {
366
+ float32[i] = int16Data[i] / 32768;
367
+ }
368
+ return float32;
369
+ }
370
+ function float32ToPcm16(float32Data) {
371
+ const int16 = new Int16Array(float32Data.length);
372
+ for (let i = 0; i < float32Data.length; i++) {
373
+ const s = Math.max(-1, Math.min(1, float32Data[i]));
374
+ int16[i] = s < 0 ? s * 32768 : s * 32767;
375
+ }
376
+ return int16;
377
+ }
378
+ function resample(input, inputRate, outputRate) {
379
+ if (inputRate === outputRate) {
380
+ return new Float32Array(input);
381
+ }
382
+ const ratio = inputRate / outputRate;
383
+ const outputLength = Math.round(input.length / ratio);
384
+ const output = new Float32Array(outputLength);
385
+ for (let i = 0; i < outputLength; i++) {
386
+ const pos = i * ratio;
387
+ const left = Math.floor(pos);
388
+ const right = Math.min(left + 1, input.length - 1);
389
+ const weight = pos - left;
390
+ output[i] = input[left] * (1 - weight) + input[right] * weight;
391
+ }
392
+ return output;
393
+ }
394
+ function applyLowPassFilter(data, cutoffFreq, sampleRate) {
395
+ const dt = 1 / sampleRate;
396
+ const rc = 1 / (2 * Math.PI * cutoffFreq);
397
+ const alpha = dt / (rc + dt);
398
+ const filtered = new Float32Array(data.length);
399
+ filtered[0] = data[0];
400
+ for (let i = 1; i < data.length; i++) {
401
+ filtered[i] = filtered[i - 1] + alpha * (data[i] - filtered[i - 1]);
402
+ }
403
+ return filtered;
404
+ }
405
+ function resampleWithAntiAliasing(input, inputRate, outputRate) {
406
+ if (inputRate === outputRate) {
407
+ return new Float32Array(input);
408
+ }
409
+ let processed = input;
410
+ if (outputRate < inputRate) {
411
+ const nyquistFreq = outputRate / 2;
412
+ const cutoffFreq = nyquistFreq * 0.9;
413
+ processed = applyLowPassFilter(input, cutoffFreq, inputRate);
414
+ }
415
+ return resample(processed, inputRate, outputRate);
416
+ }
417
+ function pcm16ToBytes(data) {
418
+ return new Uint8Array(data.buffer, data.byteOffset, data.byteLength);
419
+ }
420
+ function bytesToPcm16(bytes) {
421
+ return new Int16Array(bytes.buffer, bytes.byteOffset, bytes.length / 2);
422
+ }
423
+ function normalizeAudio(data, targetPeak = 0.95) {
424
+ let maxAbs = 0;
425
+ for (let i = 0; i < data.length; i++) {
426
+ maxAbs = Math.max(maxAbs, Math.abs(data[i]));
427
+ }
428
+ if (maxAbs === 0) return new Float32Array(data);
429
+ const scale = targetPeak / maxAbs;
430
+ const normalized = new Float32Array(data.length);
431
+ for (let i = 0; i < data.length; i++) {
432
+ normalized[i] = data[i] * scale;
433
+ }
434
+ return normalized;
435
+ }
436
+ function calculateRMS(data) {
437
+ let sum = 0;
438
+ let length = data.length;
439
+ if (data instanceof Uint8Array) {
440
+ for (let i = 0; i < length; i++) {
441
+ const v = (data[i] - 128) / 128;
442
+ sum += v * v;
443
+ }
444
+ } else {
445
+ for (let i = 0; i < length; i++) {
446
+ sum += data[i] * data[i];
447
+ }
448
+ }
449
+ return Math.sqrt(sum / length);
450
+ }
451
+ var StreamResampler = class {
452
+ inputBuffer = new Float32Array(0);
453
+ inputRate;
454
+ outputRate;
455
+ constructor(inputRate, outputRate) {
456
+ this.inputRate = inputRate;
457
+ this.outputRate = outputRate;
458
+ }
459
+ /**
460
+ * Process a chunk of audio and return resampled data
461
+ * @param inputChunk Float32Array chunk to process
462
+ * @param flush If true, output remaining buffered samples
463
+ * @returns Resampled Float32Array (may be empty if more data needed)
464
+ */
465
+ process(inputChunk, flush = false) {
466
+ const combined = new Float32Array(this.inputBuffer.length + inputChunk.length);
467
+ combined.set(this.inputBuffer);
468
+ combined.set(inputChunk, this.inputBuffer.length);
469
+ const ratio = this.inputRate / this.outputRate;
470
+ const outputLength = Math.floor(combined.length / ratio);
471
+ if (outputLength === 0 && !flush) {
472
+ this.inputBuffer = combined;
473
+ return new Float32Array(0);
474
+ }
475
+ const output = new Float32Array(outputLength);
476
+ for (let i = 0; i < outputLength; i++) {
477
+ const pos = i * ratio;
478
+ const left = Math.floor(pos);
479
+ const right = Math.min(left + 1, combined.length - 1);
480
+ const weight = pos - left;
481
+ output[i] = combined[left] * (1 - weight) + combined[right] * weight;
482
+ }
483
+ const remainingSamples = Math.ceil(combined.length - outputLength * ratio);
484
+ this.inputBuffer = combined.slice(
485
+ combined.length - remainingSamples
486
+ );
487
+ return output;
488
+ }
489
+ reset() {
490
+ this.inputBuffer = new Float32Array(0);
491
+ }
492
+ };
493
+
494
+ // src/browser-audio.ts
495
+ var BrowserAudioManager = class {
496
+ audioContext = null;
497
+ mediaStreamAudioSourceNode = null;
498
+ scriptProcessor = null;
499
+ analyserNode = null;
500
+ mediaStream = null;
501
+ // Playback scheduling
502
+ nextPlaybackTime = 0;
503
+ activeSources = [];
504
+ playbackQueue = [];
505
+ // Configuration
506
+ inputSampleRate;
507
+ outputSampleRate;
508
+ autoGainControl;
509
+ echoCancellation;
510
+ noiseSuppression;
511
+ // Callbacks
512
+ onAudioInput;
513
+ onInputError;
514
+ // Audio processing state
515
+ isMuted = false;
516
+ isListening = false;
517
+ constructor(config = {}) {
518
+ this.inputSampleRate = config.inputSampleRate ?? AUDIO_CONFIG.SAMPLE_RATE;
519
+ this.outputSampleRate = config.outputSampleRate ?? AUDIO_CONFIG.SPEAKER_SAMPLE_RATE;
520
+ this.autoGainControl = config.autoGainControl ?? true;
521
+ this.echoCancellation = config.echoCancellation ?? true;
522
+ this.noiseSuppression = config.noiseSuppression ?? true;
523
+ this.onInputError = config.onInputError;
524
+ }
525
+ /**
526
+ * Initialize the AudioContext and analyser
527
+ */
528
+ async init(analyserConfig) {
529
+ if (this.audioContext) return;
530
+ const AudioContextClass = window.AudioContext || window.webkitAudioContext;
531
+ if (!AudioContextClass) {
532
+ throw new Error("Web Audio API not supported in this browser");
533
+ }
534
+ this.audioContext = new AudioContextClass();
535
+ if (!this.audioContext) {
536
+ throw new Error("Failed to initialize AudioContext");
537
+ }
538
+ if (this.audioContext.state === "suspended") {
539
+ await this.audioContext.resume();
540
+ console.log("\u{1F442} AudioContext resumed");
541
+ }
542
+ if (analyserConfig?.enabled !== false) {
543
+ this.analyserNode = this.audioContext.createAnalyser();
544
+ this.analyserNode.fftSize = analyserConfig?.fftSize ?? 256;
545
+ }
546
+ }
547
+ /**
548
+ * Start capturing audio from the microphone
549
+ */
550
+ async startMicrophone(onAudioInput) {
551
+ if (!this.audioContext) {
552
+ await this.init();
553
+ }
554
+ try {
555
+ this.onAudioInput = onAudioInput;
556
+ this.isListening = true;
557
+ this.mediaStream = await navigator.mediaDevices.getUserMedia({
558
+ audio: {
559
+ autoGainControl: this.autoGainControl,
560
+ echoCancellation: this.echoCancellation,
561
+ noiseSuppression: this.noiseSuppression
562
+ }
563
+ });
564
+ this.mediaStreamAudioSourceNode = this.audioContext.createMediaStreamSource(this.mediaStream);
565
+ const bufferSize = 4096;
566
+ this.scriptProcessor = this.audioContext.createScriptProcessor(
567
+ bufferSize,
568
+ 1,
569
+ // input channels
570
+ 1
571
+ // output channels
572
+ );
573
+ this.mediaStreamAudioSourceNode.connect(this.scriptProcessor);
574
+ this.scriptProcessor.connect(this.audioContext.destination);
575
+ if (this.analyserNode) {
576
+ this.mediaStreamAudioSourceNode.connect(this.analyserNode);
577
+ }
578
+ this.scriptProcessor.onaudioprocess = (event) => {
579
+ this._processAudioInput(event);
580
+ };
581
+ console.log("\u{1F3A4} Microphone started");
582
+ } catch (error) {
583
+ const err = error instanceof Error ? error : new Error(String(error));
584
+ if (this.onInputError) this.onInputError(err);
585
+ throw err;
586
+ }
587
+ }
588
+ /**
589
+ * Internal method to process microphone audio data
590
+ */
591
+ _processAudioInput(event) {
592
+ if (!this.onAudioInput || !this.audioContext || !this.isListening) return;
593
+ if (this.isMuted) return;
594
+ const inputBuffer = event.inputBuffer;
595
+ const inputData = inputBuffer.getChannelData(0);
596
+ const outputBuffer = event.outputBuffer;
597
+ for (let i = 0; i < outputBuffer.getChannelData(0).length; i++) {
598
+ outputBuffer.getChannelData(0)[i] = 0;
599
+ }
600
+ const hardwareRate = this.audioContext.sampleRate;
601
+ let processedData = new Float32Array(inputData);
602
+ if (hardwareRate !== this.inputSampleRate) {
603
+ processedData = resampleWithAntiAliasing(
604
+ processedData,
605
+ hardwareRate,
606
+ this.inputSampleRate
607
+ );
608
+ }
609
+ const int16Data = float32ToPcm16(processedData);
610
+ const uint8Data = new Uint8Array(
611
+ int16Data.buffer,
612
+ int16Data.byteOffset,
613
+ int16Data.byteLength
614
+ );
615
+ this.onAudioInput(uint8Data);
616
+ }
617
+ /**
618
+ * Stop capturing microphone input
619
+ */
620
+ stopMicrophone() {
621
+ this.isListening = false;
622
+ if (this.mediaStream) {
623
+ this.mediaStream.getTracks().forEach((track) => track.stop());
624
+ this.mediaStream = null;
625
+ }
626
+ if (this.scriptProcessor) {
627
+ this.scriptProcessor.disconnect();
628
+ this.scriptProcessor = null;
629
+ }
630
+ if (this.mediaStreamAudioSourceNode) {
631
+ this.mediaStreamAudioSourceNode.disconnect();
632
+ this.mediaStreamAudioSourceNode = null;
633
+ }
634
+ console.log("\u{1F3A4} Microphone stopped");
635
+ }
636
+ /**
637
+ * Play back audio received from the server
638
+ * @param pcm16Data Int16 PCM audio data at SPEAKER_SAMPLE_RATE
639
+ */
640
+ playAudio(pcm16Data) {
641
+ if (!this.audioContext) {
642
+ console.warn("AudioContext not initialized");
643
+ return;
644
+ }
645
+ const int16Array = new Int16Array(
646
+ pcm16Data.buffer,
647
+ pcm16Data.byteOffset,
648
+ pcm16Data.length / 2
649
+ );
650
+ const float32Data = pcm16ToFloat32(int16Array);
651
+ const audioBuffer = this.audioContext.createBuffer(
652
+ 1,
653
+ float32Data.length,
654
+ this.outputSampleRate
655
+ );
656
+ audioBuffer.getChannelData(0).set(float32Data);
657
+ this._schedulePlayback(audioBuffer);
658
+ }
659
+ /**
660
+ * Internal method to schedule and play audio with sample-accurate timing
661
+ */
662
+ _schedulePlayback(audioBuffer) {
663
+ if (!this.audioContext) return;
664
+ const currentTime = this.audioContext.currentTime;
665
+ const duration = audioBuffer.length / this.outputSampleRate;
666
+ const startTime = Math.max(
667
+ currentTime + 0.01,
668
+ // Minimum 10ms delay
669
+ this.nextPlaybackTime
670
+ );
671
+ this.nextPlaybackTime = startTime + duration;
672
+ const source = this.audioContext.createBufferSource();
673
+ source.buffer = audioBuffer;
674
+ source.connect(this.audioContext.destination);
675
+ if (this.analyserNode) {
676
+ source.connect(this.analyserNode);
677
+ }
678
+ source.start(startTime);
679
+ this.activeSources.push(source);
680
+ source.onended = () => {
681
+ const index = this.activeSources.indexOf(source);
682
+ if (index > -1) {
683
+ this.activeSources.splice(index, 1);
684
+ }
685
+ };
686
+ }
687
+ /**
688
+ * Stop all currently playing audio and clear the queue
689
+ */
690
+ stopPlayback() {
691
+ this.activeSources.forEach((source) => {
692
+ try {
693
+ source.stop();
694
+ } catch (e) {
695
+ }
696
+ });
697
+ this.activeSources = [];
698
+ this.playbackQueue = [];
699
+ this.nextPlaybackTime = this.audioContext?.currentTime ?? 0;
700
+ console.log("\u{1F507} Playback stopped");
701
+ }
702
+ /**
703
+ * Toggle mute state
704
+ */
705
+ setMuted(muted) {
706
+ this.isMuted = muted;
707
+ }
708
+ /**
709
+ * Get current mute state
710
+ */
711
+ isMicMuted() {
712
+ return this.isMuted;
713
+ }
714
+ /**
715
+ * Get current amplitude from analyser (for visualization)
716
+ * Returns value between 0 and 1
717
+ */
718
+ getAmplitude() {
719
+ if (!this.analyserNode) return 0;
720
+ const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
721
+ this.analyserNode.getByteTimeDomainData(dataArray);
722
+ const rms = calculateRMS(dataArray);
723
+ return Math.min(rms * 10, 1);
724
+ }
725
+ /**
726
+ * Get frequency data from analyser for visualization
727
+ */
728
+ getFrequencyData() {
729
+ if (!this.analyserNode) {
730
+ return new Uint8Array(0);
731
+ }
732
+ const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
733
+ this.analyserNode.getByteFrequencyData(dataArray);
734
+ return dataArray;
735
+ }
736
+ /**
737
+ * Get time-domain data from analyser for waveform visualization
738
+ */
739
+ getWaveformData() {
740
+ if (!this.analyserNode) {
741
+ return new Uint8Array(0);
742
+ }
743
+ const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
744
+ this.analyserNode.getByteTimeDomainData(dataArray);
745
+ return dataArray;
746
+ }
747
+ /**
748
+ * Cleanup and close AudioContext
749
+ */
750
+ cleanup() {
751
+ this.stopMicrophone();
752
+ this.stopPlayback();
753
+ if (this.analyserNode) {
754
+ this.analyserNode.disconnect();
755
+ this.analyserNode = null;
756
+ }
757
+ }
758
+ /**
759
+ * Get current audio context state
760
+ */
761
+ getState() {
762
+ return this.audioContext?.state ?? null;
763
+ }
764
+ /**
765
+ * Check if microphone is currently listening
766
+ */
767
+ isRecording() {
768
+ return this.isListening;
769
+ }
770
+ };
333
771
  // Annotate the CommonJS export names for ESM import in node:
334
772
  0 && (module.exports = {
335
773
  AUDIO_CONFIG,
774
+ BrowserAudioManager,
336
775
  DEFAULT_URLS,
337
776
  Language,
777
+ StreamResampler,
338
778
  TTSClient,
339
779
  VoiceAgentClient,
340
780
  VoiceStyle,
781
+ applyLowPassFilter,
782
+ bytesToPcm16,
783
+ calculateRMS,
784
+ float32ToPcm16,
785
+ normalizeAudio,
786
+ pcm16ToBytes,
787
+ pcm16ToFloat32,
788
+ resample,
789
+ resampleWithAntiAliasing,
341
790
  simpleConversation,
342
791
  simpleTTS
343
792
  });