@lokutor/sdk 1.1.1 → 1.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -53,10 +53,12 @@ var VoiceAgentClient = class {
53
53
  onTranscription;
54
54
  onResponse;
55
55
  onAudioCallback;
56
+ onVisemesCallback;
56
57
  onStatus;
57
58
  onError;
58
59
  isConnected = false;
59
60
  messages = [];
61
+ visemeListeners = [];
60
62
  constructor(config) {
61
63
  this.apiKey = config.apiKey;
62
64
  this.prompt = config.prompt;
@@ -65,6 +67,7 @@ var VoiceAgentClient = class {
65
67
  this.onTranscription = config.onTranscription;
66
68
  this.onResponse = config.onResponse;
67
69
  this.onAudioCallback = config.onAudio;
70
+ this.onVisemesCallback = config.onVisemes;
68
71
  this.onStatus = config.onStatus;
69
72
  this.onError = config.onError;
70
73
  }
@@ -173,6 +176,11 @@ var VoiceAgentClient = class {
173
176
  };
174
177
  console.log(`${icons[msg.data] || ""} Status: ${msg.data}`);
175
178
  break;
179
+ case "visemes":
180
+ if (Array.isArray(msg.data) && msg.data.length > 0) {
181
+ this.emit("visemes", msg.data);
182
+ }
183
+ break;
176
184
  case "error":
177
185
  if (this.onError) this.onError(msg.data);
178
186
  console.error(`\u274C Server error: ${msg.data}`);
@@ -186,11 +194,17 @@ var VoiceAgentClient = class {
186
194
  if (event === "audio") {
187
195
  if (this.onAudioCallback) this.onAudioCallback(data);
188
196
  this.audioListeners.forEach((l) => l(data));
197
+ } else if (event === "visemes") {
198
+ if (this.onVisemesCallback) this.onVisemesCallback(data);
199
+ this.visemeListeners.forEach((l) => l(data));
189
200
  }
190
201
  }
191
202
  onAudio(callback) {
192
203
  this.audioListeners.push(callback);
193
204
  }
205
+ onVisemes(callback) {
206
+ this.visemeListeners.push(callback);
207
+ }
194
208
  /**
195
209
  * Disconnect from the server
196
210
  */
@@ -297,13 +311,434 @@ async function simpleTTS(options) {
297
311
  const client = new TTSClient({ apiKey: options.apiKey });
298
312
  return client.synthesize(options);
299
313
  }
314
+
315
+ // src/audio-utils.ts
316
+ function pcm16ToFloat32(int16Data) {
317
+ const float32 = new Float32Array(int16Data.length);
318
+ for (let i = 0; i < int16Data.length; i++) {
319
+ float32[i] = int16Data[i] / 32768;
320
+ }
321
+ return float32;
322
+ }
323
+ function float32ToPcm16(float32Data) {
324
+ const int16 = new Int16Array(float32Data.length);
325
+ for (let i = 0; i < float32Data.length; i++) {
326
+ const s = Math.max(-1, Math.min(1, float32Data[i]));
327
+ int16[i] = s < 0 ? s * 32768 : s * 32767;
328
+ }
329
+ return int16;
330
+ }
331
+ function resample(input, inputRate, outputRate) {
332
+ if (inputRate === outputRate) {
333
+ return new Float32Array(input);
334
+ }
335
+ const ratio = inputRate / outputRate;
336
+ const outputLength = Math.round(input.length / ratio);
337
+ const output = new Float32Array(outputLength);
338
+ for (let i = 0; i < outputLength; i++) {
339
+ const pos = i * ratio;
340
+ const left = Math.floor(pos);
341
+ const right = Math.min(left + 1, input.length - 1);
342
+ const weight = pos - left;
343
+ output[i] = input[left] * (1 - weight) + input[right] * weight;
344
+ }
345
+ return output;
346
+ }
347
+ function applyLowPassFilter(data, cutoffFreq, sampleRate) {
348
+ const dt = 1 / sampleRate;
349
+ const rc = 1 / (2 * Math.PI * cutoffFreq);
350
+ const alpha = dt / (rc + dt);
351
+ const filtered = new Float32Array(data.length);
352
+ filtered[0] = data[0];
353
+ for (let i = 1; i < data.length; i++) {
354
+ filtered[i] = filtered[i - 1] + alpha * (data[i] - filtered[i - 1]);
355
+ }
356
+ return filtered;
357
+ }
358
+ function resampleWithAntiAliasing(input, inputRate, outputRate) {
359
+ if (inputRate === outputRate) {
360
+ return new Float32Array(input);
361
+ }
362
+ let processed = input;
363
+ if (outputRate < inputRate) {
364
+ const nyquistFreq = outputRate / 2;
365
+ const cutoffFreq = nyquistFreq * 0.9;
366
+ processed = applyLowPassFilter(input, cutoffFreq, inputRate);
367
+ }
368
+ return resample(processed, inputRate, outputRate);
369
+ }
370
+ function pcm16ToBytes(data) {
371
+ return new Uint8Array(data.buffer, data.byteOffset, data.byteLength);
372
+ }
373
+ function bytesToPcm16(bytes) {
374
+ return new Int16Array(bytes.buffer, bytes.byteOffset, bytes.length / 2);
375
+ }
376
+ function normalizeAudio(data, targetPeak = 0.95) {
377
+ let maxAbs = 0;
378
+ for (let i = 0; i < data.length; i++) {
379
+ maxAbs = Math.max(maxAbs, Math.abs(data[i]));
380
+ }
381
+ if (maxAbs === 0) return new Float32Array(data);
382
+ const scale = targetPeak / maxAbs;
383
+ const normalized = new Float32Array(data.length);
384
+ for (let i = 0; i < data.length; i++) {
385
+ normalized[i] = data[i] * scale;
386
+ }
387
+ return normalized;
388
+ }
389
+ function calculateRMS(data) {
390
+ let sum = 0;
391
+ let length = data.length;
392
+ if (data instanceof Uint8Array) {
393
+ for (let i = 0; i < length; i++) {
394
+ const v = (data[i] - 128) / 128;
395
+ sum += v * v;
396
+ }
397
+ } else {
398
+ for (let i = 0; i < length; i++) {
399
+ sum += data[i] * data[i];
400
+ }
401
+ }
402
+ return Math.sqrt(sum / length);
403
+ }
404
+ var StreamResampler = class {
405
+ inputBuffer = new Float32Array(0);
406
+ inputRate;
407
+ outputRate;
408
+ constructor(inputRate, outputRate) {
409
+ this.inputRate = inputRate;
410
+ this.outputRate = outputRate;
411
+ }
412
+ /**
413
+ * Process a chunk of audio and return resampled data
414
+ * @param inputChunk Float32Array chunk to process
415
+ * @param flush If true, output remaining buffered samples
416
+ * @returns Resampled Float32Array (may be empty if more data needed)
417
+ */
418
+ process(inputChunk, flush = false) {
419
+ const combined = new Float32Array(this.inputBuffer.length + inputChunk.length);
420
+ combined.set(this.inputBuffer);
421
+ combined.set(inputChunk, this.inputBuffer.length);
422
+ const ratio = this.inputRate / this.outputRate;
423
+ const outputLength = Math.floor(combined.length / ratio);
424
+ if (outputLength === 0 && !flush) {
425
+ this.inputBuffer = combined;
426
+ return new Float32Array(0);
427
+ }
428
+ const output = new Float32Array(outputLength);
429
+ for (let i = 0; i < outputLength; i++) {
430
+ const pos = i * ratio;
431
+ const left = Math.floor(pos);
432
+ const right = Math.min(left + 1, combined.length - 1);
433
+ const weight = pos - left;
434
+ output[i] = combined[left] * (1 - weight) + combined[right] * weight;
435
+ }
436
+ const remainingSamples = Math.ceil(combined.length - outputLength * ratio);
437
+ this.inputBuffer = combined.slice(
438
+ combined.length - remainingSamples
439
+ );
440
+ return output;
441
+ }
442
+ reset() {
443
+ this.inputBuffer = new Float32Array(0);
444
+ }
445
+ };
446
+
447
+ // src/browser-audio.ts
448
+ var BrowserAudioManager = class {
449
+ audioContext = null;
450
+ mediaStreamAudioSourceNode = null;
451
+ scriptProcessor = null;
452
+ analyserNode = null;
453
+ mediaStream = null;
454
+ // Playback scheduling
455
+ nextPlaybackTime = 0;
456
+ activeSources = [];
457
+ playbackQueue = [];
458
+ // Configuration
459
+ inputSampleRate;
460
+ outputSampleRate;
461
+ autoGainControl;
462
+ echoCancellation;
463
+ noiseSuppression;
464
+ // Callbacks
465
+ onAudioInput;
466
+ onInputError;
467
+ // Audio processing state
468
+ isMuted = false;
469
+ isListening = false;
470
+ constructor(config = {}) {
471
+ this.inputSampleRate = config.inputSampleRate ?? AUDIO_CONFIG.SAMPLE_RATE;
472
+ this.outputSampleRate = config.outputSampleRate ?? AUDIO_CONFIG.SPEAKER_SAMPLE_RATE;
473
+ this.autoGainControl = config.autoGainControl ?? true;
474
+ this.echoCancellation = config.echoCancellation ?? true;
475
+ this.noiseSuppression = config.noiseSuppression ?? true;
476
+ this.onInputError = config.onInputError;
477
+ }
478
+ /**
479
+ * Initialize the AudioContext and analyser
480
+ */
481
+ async init(analyserConfig) {
482
+ if (this.audioContext) return;
483
+ const AudioContextClass = window.AudioContext || window.webkitAudioContext;
484
+ if (!AudioContextClass) {
485
+ throw new Error("Web Audio API not supported in this browser");
486
+ }
487
+ this.audioContext = new AudioContextClass();
488
+ if (!this.audioContext) {
489
+ throw new Error("Failed to initialize AudioContext");
490
+ }
491
+ if (this.audioContext.state === "suspended") {
492
+ await this.audioContext.resume();
493
+ console.log("\u{1F442} AudioContext resumed");
494
+ }
495
+ if (analyserConfig?.enabled !== false) {
496
+ this.analyserNode = this.audioContext.createAnalyser();
497
+ this.analyserNode.fftSize = analyserConfig?.fftSize ?? 256;
498
+ }
499
+ }
500
+ /**
501
+ * Start capturing audio from the microphone
502
+ */
503
+ async startMicrophone(onAudioInput) {
504
+ if (!this.audioContext) {
505
+ await this.init();
506
+ }
507
+ try {
508
+ this.onAudioInput = onAudioInput;
509
+ this.isListening = true;
510
+ this.mediaStream = await navigator.mediaDevices.getUserMedia({
511
+ audio: {
512
+ autoGainControl: this.autoGainControl,
513
+ echoCancellation: this.echoCancellation,
514
+ noiseSuppression: this.noiseSuppression
515
+ }
516
+ });
517
+ this.mediaStreamAudioSourceNode = this.audioContext.createMediaStreamSource(this.mediaStream);
518
+ const bufferSize = 4096;
519
+ this.scriptProcessor = this.audioContext.createScriptProcessor(
520
+ bufferSize,
521
+ 1,
522
+ // input channels
523
+ 1
524
+ // output channels
525
+ );
526
+ this.mediaStreamAudioSourceNode.connect(this.scriptProcessor);
527
+ this.scriptProcessor.connect(this.audioContext.destination);
528
+ if (this.analyserNode) {
529
+ this.mediaStreamAudioSourceNode.connect(this.analyserNode);
530
+ }
531
+ this.scriptProcessor.onaudioprocess = (event) => {
532
+ this._processAudioInput(event);
533
+ };
534
+ console.log("\u{1F3A4} Microphone started");
535
+ } catch (error) {
536
+ const err = error instanceof Error ? error : new Error(String(error));
537
+ if (this.onInputError) this.onInputError(err);
538
+ throw err;
539
+ }
540
+ }
541
+ /**
542
+ * Internal method to process microphone audio data
543
+ */
544
+ _processAudioInput(event) {
545
+ if (!this.onAudioInput || !this.audioContext || !this.isListening) return;
546
+ if (this.isMuted) return;
547
+ const inputBuffer = event.inputBuffer;
548
+ const inputData = inputBuffer.getChannelData(0);
549
+ const outputBuffer = event.outputBuffer;
550
+ for (let i = 0; i < outputBuffer.getChannelData(0).length; i++) {
551
+ outputBuffer.getChannelData(0)[i] = 0;
552
+ }
553
+ const hardwareRate = this.audioContext.sampleRate;
554
+ let processedData = new Float32Array(inputData);
555
+ if (hardwareRate !== this.inputSampleRate) {
556
+ processedData = resampleWithAntiAliasing(
557
+ processedData,
558
+ hardwareRate,
559
+ this.inputSampleRate
560
+ );
561
+ }
562
+ const int16Data = float32ToPcm16(processedData);
563
+ const uint8Data = new Uint8Array(
564
+ int16Data.buffer,
565
+ int16Data.byteOffset,
566
+ int16Data.byteLength
567
+ );
568
+ this.onAudioInput(uint8Data);
569
+ }
570
+ /**
571
+ * Stop capturing microphone input
572
+ */
573
+ stopMicrophone() {
574
+ this.isListening = false;
575
+ if (this.mediaStream) {
576
+ this.mediaStream.getTracks().forEach((track) => track.stop());
577
+ this.mediaStream = null;
578
+ }
579
+ if (this.scriptProcessor) {
580
+ this.scriptProcessor.disconnect();
581
+ this.scriptProcessor = null;
582
+ }
583
+ if (this.mediaStreamAudioSourceNode) {
584
+ this.mediaStreamAudioSourceNode.disconnect();
585
+ this.mediaStreamAudioSourceNode = null;
586
+ }
587
+ console.log("\u{1F3A4} Microphone stopped");
588
+ }
589
+ /**
590
+ * Play back audio received from the server
591
+ * @param pcm16Data Int16 PCM audio data at SPEAKER_SAMPLE_RATE
592
+ */
593
+ playAudio(pcm16Data) {
594
+ if (!this.audioContext) {
595
+ console.warn("AudioContext not initialized");
596
+ return;
597
+ }
598
+ const int16Array = new Int16Array(
599
+ pcm16Data.buffer,
600
+ pcm16Data.byteOffset,
601
+ pcm16Data.length / 2
602
+ );
603
+ const float32Data = pcm16ToFloat32(int16Array);
604
+ const audioBuffer = this.audioContext.createBuffer(
605
+ 1,
606
+ float32Data.length,
607
+ this.outputSampleRate
608
+ );
609
+ audioBuffer.getChannelData(0).set(float32Data);
610
+ this._schedulePlayback(audioBuffer);
611
+ }
612
+ /**
613
+ * Internal method to schedule and play audio with sample-accurate timing
614
+ */
615
+ _schedulePlayback(audioBuffer) {
616
+ if (!this.audioContext) return;
617
+ const currentTime = this.audioContext.currentTime;
618
+ const duration = audioBuffer.length / this.outputSampleRate;
619
+ const startTime = Math.max(
620
+ currentTime + 0.01,
621
+ // Minimum 10ms delay
622
+ this.nextPlaybackTime
623
+ );
624
+ this.nextPlaybackTime = startTime + duration;
625
+ const source = this.audioContext.createBufferSource();
626
+ source.buffer = audioBuffer;
627
+ source.connect(this.audioContext.destination);
628
+ if (this.analyserNode) {
629
+ source.connect(this.analyserNode);
630
+ }
631
+ source.start(startTime);
632
+ this.activeSources.push(source);
633
+ source.onended = () => {
634
+ const index = this.activeSources.indexOf(source);
635
+ if (index > -1) {
636
+ this.activeSources.splice(index, 1);
637
+ }
638
+ };
639
+ }
640
+ /**
641
+ * Stop all currently playing audio and clear the queue
642
+ */
643
+ stopPlayback() {
644
+ this.activeSources.forEach((source) => {
645
+ try {
646
+ source.stop();
647
+ } catch (e) {
648
+ }
649
+ });
650
+ this.activeSources = [];
651
+ this.playbackQueue = [];
652
+ this.nextPlaybackTime = this.audioContext?.currentTime ?? 0;
653
+ console.log("\u{1F507} Playback stopped");
654
+ }
655
+ /**
656
+ * Toggle mute state
657
+ */
658
+ setMuted(muted) {
659
+ this.isMuted = muted;
660
+ }
661
+ /**
662
+ * Get current mute state
663
+ */
664
+ isMicMuted() {
665
+ return this.isMuted;
666
+ }
667
+ /**
668
+ * Get current amplitude from analyser (for visualization)
669
+ * Returns value between 0 and 1
670
+ */
671
+ getAmplitude() {
672
+ if (!this.analyserNode) return 0;
673
+ const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
674
+ this.analyserNode.getByteTimeDomainData(dataArray);
675
+ const rms = calculateRMS(dataArray);
676
+ return Math.min(rms * 10, 1);
677
+ }
678
+ /**
679
+ * Get frequency data from analyser for visualization
680
+ */
681
+ getFrequencyData() {
682
+ if (!this.analyserNode) {
683
+ return new Uint8Array(0);
684
+ }
685
+ const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
686
+ this.analyserNode.getByteFrequencyData(dataArray);
687
+ return dataArray;
688
+ }
689
+ /**
690
+ * Get time-domain data from analyser for waveform visualization
691
+ */
692
+ getWaveformData() {
693
+ if (!this.analyserNode) {
694
+ return new Uint8Array(0);
695
+ }
696
+ const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
697
+ this.analyserNode.getByteTimeDomainData(dataArray);
698
+ return dataArray;
699
+ }
700
+ /**
701
+ * Cleanup and close AudioContext
702
+ */
703
+ cleanup() {
704
+ this.stopMicrophone();
705
+ this.stopPlayback();
706
+ if (this.analyserNode) {
707
+ this.analyserNode.disconnect();
708
+ this.analyserNode = null;
709
+ }
710
+ }
711
+ /**
712
+ * Get current audio context state
713
+ */
714
+ getState() {
715
+ return this.audioContext?.state ?? null;
716
+ }
717
+ /**
718
+ * Check if microphone is currently listening
719
+ */
720
+ isRecording() {
721
+ return this.isListening;
722
+ }
723
+ };
300
724
  export {
301
725
  AUDIO_CONFIG,
726
+ BrowserAudioManager,
302
727
  DEFAULT_URLS,
303
728
  Language,
729
+ StreamResampler,
304
730
  TTSClient,
305
731
  VoiceAgentClient,
306
732
  VoiceStyle,
733
+ applyLowPassFilter,
734
+ bytesToPcm16,
735
+ calculateRMS,
736
+ float32ToPcm16,
737
+ normalizeAudio,
738
+ pcm16ToBytes,
739
+ pcm16ToFloat32,
740
+ resample,
741
+ resampleWithAntiAliasing,
307
742
  simpleConversation,
308
743
  simpleTTS
309
744
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lokutor/sdk",
3
- "version": "1.1.1",
3
+ "version": "1.1.7",
4
4
  "description": "JavaScript/TypeScript SDK for Lokutor Real-time Voice AI",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",