@lokutor/sdk 1.1.2 → 1.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -21,11 +21,22 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
21
21
  var index_exports = {};
22
22
  __export(index_exports, {
23
23
  AUDIO_CONFIG: () => AUDIO_CONFIG,
24
+ BrowserAudioManager: () => BrowserAudioManager,
24
25
  DEFAULT_URLS: () => DEFAULT_URLS,
25
26
  Language: () => Language,
27
+ StreamResampler: () => StreamResampler,
26
28
  TTSClient: () => TTSClient,
27
29
  VoiceAgentClient: () => VoiceAgentClient,
28
30
  VoiceStyle: () => VoiceStyle,
31
+ applyLowPassFilter: () => applyLowPassFilter,
32
+ bytesToPcm16: () => bytesToPcm16,
33
+ calculateRMS: () => calculateRMS,
34
+ float32ToPcm16: () => float32ToPcm16,
35
+ normalizeAudio: () => normalizeAudio,
36
+ pcm16ToBytes: () => pcm16ToBytes,
37
+ pcm16ToFloat32: () => pcm16ToFloat32,
38
+ resample: () => resample,
39
+ resampleWithAntiAliasing: () => resampleWithAntiAliasing,
29
40
  simpleConversation: () => simpleConversation,
30
41
  simpleTTS: () => simpleTTS
31
42
  });
@@ -54,8 +65,8 @@ var Language = /* @__PURE__ */ ((Language2) => {
54
65
  return Language2;
55
66
  })(Language || {});
56
67
  var AUDIO_CONFIG = {
57
- SAMPLE_RATE: 16e3,
58
- SPEAKER_SAMPLE_RATE: 44100,
68
+ SAMPLE_RATE: 44100,
69
+ SPEAKER_SAMPLE_RATE: 16e3,
59
70
  CHANNELS: 1,
60
71
  CHUNK_DURATION_MS: 20,
61
72
  get CHUNK_SIZE() {
@@ -86,10 +97,12 @@ var VoiceAgentClient = class {
86
97
  onTranscription;
87
98
  onResponse;
88
99
  onAudioCallback;
100
+ onVisemesCallback;
89
101
  onStatus;
90
102
  onError;
91
103
  isConnected = false;
92
104
  messages = [];
105
+ visemeListeners = [];
93
106
  constructor(config) {
94
107
  this.apiKey = config.apiKey;
95
108
  this.prompt = config.prompt;
@@ -98,6 +111,7 @@ var VoiceAgentClient = class {
98
111
  this.onTranscription = config.onTranscription;
99
112
  this.onResponse = config.onResponse;
100
113
  this.onAudioCallback = config.onAudio;
114
+ this.onVisemesCallback = config.onVisemes;
101
115
  this.onStatus = config.onStatus;
102
116
  this.onError = config.onError;
103
117
  }
@@ -206,6 +220,11 @@ var VoiceAgentClient = class {
206
220
  };
207
221
  console.log(`${icons[msg.data] || ""} Status: ${msg.data}`);
208
222
  break;
223
+ case "visemes":
224
+ if (Array.isArray(msg.data) && msg.data.length > 0) {
225
+ this.emit("visemes", msg.data);
226
+ }
227
+ break;
209
228
  case "error":
210
229
  if (this.onError) this.onError(msg.data);
211
230
  console.error(`\u274C Server error: ${msg.data}`);
@@ -219,11 +238,17 @@ var VoiceAgentClient = class {
219
238
  if (event === "audio") {
220
239
  if (this.onAudioCallback) this.onAudioCallback(data);
221
240
  this.audioListeners.forEach((l) => l(data));
241
+ } else if (event === "visemes") {
242
+ if (this.onVisemesCallback) this.onVisemesCallback(data);
243
+ this.visemeListeners.forEach((l) => l(data));
222
244
  }
223
245
  }
224
246
  onAudio(callback) {
225
247
  this.audioListeners.push(callback);
226
248
  }
249
+ onVisemes(callback) {
250
+ this.visemeListeners.push(callback);
251
+ }
227
252
  /**
228
253
  * Disconnect from the server
229
254
  */
@@ -330,14 +355,435 @@ async function simpleTTS(options) {
330
355
  const client = new TTSClient({ apiKey: options.apiKey });
331
356
  return client.synthesize(options);
332
357
  }
358
+
359
+ // src/audio-utils.ts
360
+ function pcm16ToFloat32(int16Data) {
361
+ const float32 = new Float32Array(int16Data.length);
362
+ for (let i = 0; i < int16Data.length; i++) {
363
+ float32[i] = int16Data[i] / 32768;
364
+ }
365
+ return float32;
366
+ }
367
+ function float32ToPcm16(float32Data) {
368
+ const int16 = new Int16Array(float32Data.length);
369
+ for (let i = 0; i < float32Data.length; i++) {
370
+ const s = Math.max(-1, Math.min(1, float32Data[i]));
371
+ int16[i] = s < 0 ? s * 32768 : s * 32767;
372
+ }
373
+ return int16;
374
+ }
375
+ function resample(input, inputRate, outputRate) {
376
+ if (inputRate === outputRate) {
377
+ return new Float32Array(input);
378
+ }
379
+ const ratio = inputRate / outputRate;
380
+ const outputLength = Math.round(input.length / ratio);
381
+ const output = new Float32Array(outputLength);
382
+ for (let i = 0; i < outputLength; i++) {
383
+ const pos = i * ratio;
384
+ const left = Math.floor(pos);
385
+ const right = Math.min(left + 1, input.length - 1);
386
+ const weight = pos - left;
387
+ output[i] = input[left] * (1 - weight) + input[right] * weight;
388
+ }
389
+ return output;
390
+ }
391
+ function applyLowPassFilter(data, cutoffFreq, sampleRate) {
392
+ const dt = 1 / sampleRate;
393
+ const rc = 1 / (2 * Math.PI * cutoffFreq);
394
+ const alpha = dt / (rc + dt);
395
+ const filtered = new Float32Array(data.length);
396
+ filtered[0] = data[0];
397
+ for (let i = 1; i < data.length; i++) {
398
+ filtered[i] = filtered[i - 1] + alpha * (data[i] - filtered[i - 1]);
399
+ }
400
+ return filtered;
401
+ }
402
+ function resampleWithAntiAliasing(input, inputRate, outputRate) {
403
+ if (inputRate === outputRate) {
404
+ return new Float32Array(input);
405
+ }
406
+ let processed = input;
407
+ if (outputRate < inputRate) {
408
+ const nyquistFreq = outputRate / 2;
409
+ const cutoffFreq = nyquistFreq * 0.9;
410
+ processed = applyLowPassFilter(input, cutoffFreq, inputRate);
411
+ }
412
+ return resample(processed, inputRate, outputRate);
413
+ }
414
+ function pcm16ToBytes(data) {
415
+ return new Uint8Array(data.buffer, data.byteOffset, data.byteLength);
416
+ }
417
+ function bytesToPcm16(bytes) {
418
+ return new Int16Array(bytes.buffer, bytes.byteOffset, bytes.length / 2);
419
+ }
420
+ function normalizeAudio(data, targetPeak = 0.95) {
421
+ let maxAbs = 0;
422
+ for (let i = 0; i < data.length; i++) {
423
+ maxAbs = Math.max(maxAbs, Math.abs(data[i]));
424
+ }
425
+ if (maxAbs === 0) return new Float32Array(data);
426
+ const scale = targetPeak / maxAbs;
427
+ const normalized = new Float32Array(data.length);
428
+ for (let i = 0; i < data.length; i++) {
429
+ normalized[i] = data[i] * scale;
430
+ }
431
+ return normalized;
432
+ }
433
+ function calculateRMS(data) {
434
+ let sum = 0;
435
+ let length = data.length;
436
+ if (data instanceof Uint8Array) {
437
+ for (let i = 0; i < length; i++) {
438
+ const v = (data[i] - 128) / 128;
439
+ sum += v * v;
440
+ }
441
+ } else {
442
+ for (let i = 0; i < length; i++) {
443
+ sum += data[i] * data[i];
444
+ }
445
+ }
446
+ return Math.sqrt(sum / length);
447
+ }
448
+ var StreamResampler = class {
449
+ inputBuffer = new Float32Array(0);
450
+ inputRate;
451
+ outputRate;
452
+ constructor(inputRate, outputRate) {
453
+ this.inputRate = inputRate;
454
+ this.outputRate = outputRate;
455
+ }
456
+ /**
457
+ * Process a chunk of audio and return resampled data
458
+ * @param inputChunk Float32Array chunk to process
459
+ * @param flush If true, output remaining buffered samples
460
+ * @returns Resampled Float32Array (may be empty if more data needed)
461
+ */
462
+ process(inputChunk, flush = false) {
463
+ const combined = new Float32Array(this.inputBuffer.length + inputChunk.length);
464
+ combined.set(this.inputBuffer);
465
+ combined.set(inputChunk, this.inputBuffer.length);
466
+ const ratio = this.inputRate / this.outputRate;
467
+ const outputLength = Math.floor(combined.length / ratio);
468
+ if (outputLength === 0 && !flush) {
469
+ this.inputBuffer = combined;
470
+ return new Float32Array(0);
471
+ }
472
+ const output = new Float32Array(outputLength);
473
+ for (let i = 0; i < outputLength; i++) {
474
+ const pos = i * ratio;
475
+ const left = Math.floor(pos);
476
+ const right = Math.min(left + 1, combined.length - 1);
477
+ const weight = pos - left;
478
+ output[i] = combined[left] * (1 - weight) + combined[right] * weight;
479
+ }
480
+ const remainingSamples = Math.ceil(combined.length - outputLength * ratio);
481
+ this.inputBuffer = combined.slice(
482
+ combined.length - remainingSamples
483
+ );
484
+ return output;
485
+ }
486
+ reset() {
487
+ this.inputBuffer = new Float32Array(0);
488
+ }
489
+ };
490
+
491
+ // src/browser-audio.ts
492
+ var BrowserAudioManager = class {
493
+ audioContext = null;
494
+ mediaStreamAudioSourceNode = null;
495
+ scriptProcessor = null;
496
+ analyserNode = null;
497
+ mediaStream = null;
498
+ // Playback scheduling
499
+ nextPlaybackTime = 0;
500
+ activeSources = [];
501
+ playbackQueue = [];
502
+ // Configuration
503
+ inputSampleRate;
504
+ outputSampleRate;
505
+ autoGainControl;
506
+ echoCancellation;
507
+ noiseSuppression;
508
+ // Callbacks
509
+ onAudioInput;
510
+ onInputError;
511
+ // Audio processing state
512
+ isMuted = false;
513
+ isListening = false;
514
+ constructor(config = {}) {
515
+ this.inputSampleRate = config.inputSampleRate ?? AUDIO_CONFIG.SAMPLE_RATE;
516
+ this.outputSampleRate = config.outputSampleRate ?? AUDIO_CONFIG.SPEAKER_SAMPLE_RATE;
517
+ this.autoGainControl = config.autoGainControl ?? true;
518
+ this.echoCancellation = config.echoCancellation ?? true;
519
+ this.noiseSuppression = config.noiseSuppression ?? true;
520
+ this.onInputError = config.onInputError;
521
+ }
522
+ /**
523
+ * Initialize the AudioContext and analyser
524
+ */
525
+ async init(analyserConfig) {
526
+ if (this.audioContext) return;
527
+ const AudioContextClass = window.AudioContext || window.webkitAudioContext;
528
+ if (!AudioContextClass) {
529
+ throw new Error("Web Audio API not supported in this browser");
530
+ }
531
+ this.audioContext = new AudioContextClass();
532
+ if (!this.audioContext) {
533
+ throw new Error("Failed to initialize AudioContext");
534
+ }
535
+ if (this.audioContext.state === "suspended") {
536
+ await this.audioContext.resume();
537
+ console.log("\u{1F442} AudioContext resumed");
538
+ }
539
+ if (analyserConfig?.enabled !== false) {
540
+ this.analyserNode = this.audioContext.createAnalyser();
541
+ this.analyserNode.fftSize = analyserConfig?.fftSize ?? 256;
542
+ }
543
+ }
544
+ /**
545
+ * Start capturing audio from the microphone
546
+ */
547
+ async startMicrophone(onAudioInput) {
548
+ if (!this.audioContext) {
549
+ await this.init();
550
+ }
551
+ try {
552
+ this.onAudioInput = onAudioInput;
553
+ this.isListening = true;
554
+ this.mediaStream = await navigator.mediaDevices.getUserMedia({
555
+ audio: {
556
+ autoGainControl: this.autoGainControl,
557
+ echoCancellation: this.echoCancellation,
558
+ noiseSuppression: this.noiseSuppression
559
+ }
560
+ });
561
+ this.mediaStreamAudioSourceNode = this.audioContext.createMediaStreamSource(this.mediaStream);
562
+ const bufferSize = 4096;
563
+ this.scriptProcessor = this.audioContext.createScriptProcessor(
564
+ bufferSize,
565
+ 1,
566
+ // input channels
567
+ 1
568
+ // output channels
569
+ );
570
+ this.mediaStreamAudioSourceNode.connect(this.scriptProcessor);
571
+ this.scriptProcessor.connect(this.audioContext.destination);
572
+ if (this.analyserNode) {
573
+ this.mediaStreamAudioSourceNode.connect(this.analyserNode);
574
+ }
575
+ this.scriptProcessor.onaudioprocess = (event) => {
576
+ this._processAudioInput(event);
577
+ };
578
+ console.log("\u{1F3A4} Microphone started");
579
+ } catch (error) {
580
+ const err = error instanceof Error ? error : new Error(String(error));
581
+ if (this.onInputError) this.onInputError(err);
582
+ throw err;
583
+ }
584
+ }
585
+ /**
586
+ * Internal method to process microphone audio data
587
+ */
588
+ _processAudioInput(event) {
589
+ if (!this.onAudioInput || !this.audioContext || !this.isListening) return;
590
+ if (this.isMuted) return;
591
+ const inputBuffer = event.inputBuffer;
592
+ const inputData = inputBuffer.getChannelData(0);
593
+ const outputBuffer = event.outputBuffer;
594
+ for (let i = 0; i < outputBuffer.getChannelData(0).length; i++) {
595
+ outputBuffer.getChannelData(0)[i] = 0;
596
+ }
597
+ const hardwareRate = this.audioContext.sampleRate;
598
+ let processedData = new Float32Array(inputData);
599
+ if (hardwareRate !== this.inputSampleRate) {
600
+ processedData = resampleWithAntiAliasing(
601
+ processedData,
602
+ hardwareRate,
603
+ this.inputSampleRate
604
+ );
605
+ }
606
+ const int16Data = float32ToPcm16(processedData);
607
+ const uint8Data = new Uint8Array(
608
+ int16Data.buffer,
609
+ int16Data.byteOffset,
610
+ int16Data.byteLength
611
+ );
612
+ this.onAudioInput(uint8Data);
613
+ }
614
+ /**
615
+ * Stop capturing microphone input
616
+ */
617
+ stopMicrophone() {
618
+ this.isListening = false;
619
+ if (this.mediaStream) {
620
+ this.mediaStream.getTracks().forEach((track) => track.stop());
621
+ this.mediaStream = null;
622
+ }
623
+ if (this.scriptProcessor) {
624
+ this.scriptProcessor.disconnect();
625
+ this.scriptProcessor = null;
626
+ }
627
+ if (this.mediaStreamAudioSourceNode) {
628
+ this.mediaStreamAudioSourceNode.disconnect();
629
+ this.mediaStreamAudioSourceNode = null;
630
+ }
631
+ console.log("\u{1F3A4} Microphone stopped");
632
+ }
633
+ /**
634
+ * Play back audio received from the server
635
+ * @param pcm16Data Int16 PCM audio data at SPEAKER_SAMPLE_RATE
636
+ */
637
+ playAudio(pcm16Data) {
638
+ if (!this.audioContext) {
639
+ console.warn("AudioContext not initialized");
640
+ return;
641
+ }
642
+ const int16Array = new Int16Array(
643
+ pcm16Data.buffer,
644
+ pcm16Data.byteOffset,
645
+ pcm16Data.length / 2
646
+ );
647
+ const float32Data = pcm16ToFloat32(int16Array);
648
+ const audioBuffer = this.audioContext.createBuffer(
649
+ 1,
650
+ float32Data.length,
651
+ this.outputSampleRate
652
+ );
653
+ audioBuffer.getChannelData(0).set(float32Data);
654
+ this._schedulePlayback(audioBuffer);
655
+ }
656
+ /**
657
+ * Internal method to schedule and play audio with sample-accurate timing
658
+ */
659
+ _schedulePlayback(audioBuffer) {
660
+ if (!this.audioContext) return;
661
+ const currentTime = this.audioContext.currentTime;
662
+ const duration = audioBuffer.length / this.outputSampleRate;
663
+ const startTime = Math.max(
664
+ currentTime + 0.01,
665
+ // Minimum 10ms delay
666
+ this.nextPlaybackTime
667
+ );
668
+ this.nextPlaybackTime = startTime + duration;
669
+ const source = this.audioContext.createBufferSource();
670
+ source.buffer = audioBuffer;
671
+ source.connect(this.audioContext.destination);
672
+ if (this.analyserNode) {
673
+ source.connect(this.analyserNode);
674
+ }
675
+ source.start(startTime);
676
+ this.activeSources.push(source);
677
+ source.onended = () => {
678
+ const index = this.activeSources.indexOf(source);
679
+ if (index > -1) {
680
+ this.activeSources.splice(index, 1);
681
+ }
682
+ };
683
+ }
684
+ /**
685
+ * Stop all currently playing audio and clear the queue
686
+ */
687
+ stopPlayback() {
688
+ this.activeSources.forEach((source) => {
689
+ try {
690
+ source.stop();
691
+ } catch (e) {
692
+ }
693
+ });
694
+ this.activeSources = [];
695
+ this.playbackQueue = [];
696
+ this.nextPlaybackTime = this.audioContext?.currentTime ?? 0;
697
+ console.log("\u{1F507} Playback stopped");
698
+ }
699
+ /**
700
+ * Toggle mute state
701
+ */
702
+ setMuted(muted) {
703
+ this.isMuted = muted;
704
+ }
705
+ /**
706
+ * Get current mute state
707
+ */
708
+ isMicMuted() {
709
+ return this.isMuted;
710
+ }
711
+ /**
712
+ * Get current amplitude from analyser (for visualization)
713
+ * Returns value between 0 and 1
714
+ */
715
+ getAmplitude() {
716
+ if (!this.analyserNode) return 0;
717
+ const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
718
+ this.analyserNode.getByteTimeDomainData(dataArray);
719
+ const rms = calculateRMS(dataArray);
720
+ return Math.min(rms * 10, 1);
721
+ }
722
+ /**
723
+ * Get frequency data from analyser for visualization
724
+ */
725
+ getFrequencyData() {
726
+ if (!this.analyserNode) {
727
+ return new Uint8Array(0);
728
+ }
729
+ const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
730
+ this.analyserNode.getByteFrequencyData(dataArray);
731
+ return dataArray;
732
+ }
733
+ /**
734
+ * Get time-domain data from analyser for waveform visualization
735
+ */
736
+ getWaveformData() {
737
+ if (!this.analyserNode) {
738
+ return new Uint8Array(0);
739
+ }
740
+ const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
741
+ this.analyserNode.getByteTimeDomainData(dataArray);
742
+ return dataArray;
743
+ }
744
+ /**
745
+ * Cleanup and close AudioContext
746
+ */
747
+ cleanup() {
748
+ this.stopMicrophone();
749
+ this.stopPlayback();
750
+ if (this.analyserNode) {
751
+ this.analyserNode.disconnect();
752
+ this.analyserNode = null;
753
+ }
754
+ }
755
+ /**
756
+ * Get current audio context state
757
+ */
758
+ getState() {
759
+ return this.audioContext?.state ?? null;
760
+ }
761
+ /**
762
+ * Check if microphone is currently listening
763
+ */
764
+ isRecording() {
765
+ return this.isListening;
766
+ }
767
+ };
333
768
  // Annotate the CommonJS export names for ESM import in node:
334
769
  0 && (module.exports = {
335
770
  AUDIO_CONFIG,
771
+ BrowserAudioManager,
336
772
  DEFAULT_URLS,
337
773
  Language,
774
+ StreamResampler,
338
775
  TTSClient,
339
776
  VoiceAgentClient,
340
777
  VoiceStyle,
778
+ applyLowPassFilter,
779
+ bytesToPcm16,
780
+ calculateRMS,
781
+ float32ToPcm16,
782
+ normalizeAudio,
783
+ pcm16ToBytes,
784
+ pcm16ToFloat32,
785
+ resample,
786
+ resampleWithAntiAliasing,
341
787
  simpleConversation,
342
788
  simpleTTS
343
789
  });