@lokutor/sdk 1.1.2 → 1.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -53,10 +53,13 @@ var VoiceAgentClient = class {
53
53
  onTranscription;
54
54
  onResponse;
55
55
  onAudioCallback;
56
+ onVisemesCallback;
56
57
  onStatus;
57
58
  onError;
58
59
  isConnected = false;
59
60
  messages = [];
61
+ visemeListeners = [];
62
+ wantVisemes = false;
60
63
  constructor(config) {
61
64
  this.apiKey = config.apiKey;
62
65
  this.prompt = config.prompt;
@@ -65,8 +68,10 @@ var VoiceAgentClient = class {
65
68
  this.onTranscription = config.onTranscription;
66
69
  this.onResponse = config.onResponse;
67
70
  this.onAudioCallback = config.onAudio;
71
+ this.onVisemesCallback = config.onVisemes;
68
72
  this.onStatus = config.onStatus;
69
73
  this.onError = config.onError;
74
+ this.wantVisemes = config.visemes || false;
70
75
  }
71
76
  /**
72
77
  * Connect to the Lokutor Voice Agent server
@@ -118,7 +123,8 @@ var VoiceAgentClient = class {
118
123
  this.ws.send(JSON.stringify({ type: "prompt", data: this.prompt }));
119
124
  this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
120
125
  this.ws.send(JSON.stringify({ type: "language", data: this.language }));
121
- console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}`);
126
+ this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
127
+ console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}`);
122
128
  }
123
129
  /**
124
130
  * Send raw PCM audio data to the server
@@ -173,6 +179,11 @@ var VoiceAgentClient = class {
173
179
  };
174
180
  console.log(`${icons[msg.data] || ""} Status: ${msg.data}`);
175
181
  break;
182
+ case "visemes":
183
+ if (Array.isArray(msg.data) && msg.data.length > 0) {
184
+ this.emit("visemes", msg.data);
185
+ }
186
+ break;
176
187
  case "error":
177
188
  if (this.onError) this.onError(msg.data);
178
189
  console.error(`\u274C Server error: ${msg.data}`);
@@ -186,11 +197,17 @@ var VoiceAgentClient = class {
186
197
  if (event === "audio") {
187
198
  if (this.onAudioCallback) this.onAudioCallback(data);
188
199
  this.audioListeners.forEach((l) => l(data));
200
+ } else if (event === "visemes") {
201
+ if (this.onVisemesCallback) this.onVisemesCallback(data);
202
+ this.visemeListeners.forEach((l) => l(data));
189
203
  }
190
204
  }
191
205
  onAudio(callback) {
192
206
  this.audioListeners.push(callback);
193
207
  }
208
+ onVisemes(callback) {
209
+ this.visemeListeners.push(callback);
210
+ }
194
211
  /**
195
212
  * Disconnect from the server
196
213
  */
@@ -297,13 +314,434 @@ async function simpleTTS(options) {
297
314
  const client = new TTSClient({ apiKey: options.apiKey });
298
315
  return client.synthesize(options);
299
316
  }
317
+
318
+ // src/audio-utils.ts
319
+ function pcm16ToFloat32(int16Data) {
320
+ const float32 = new Float32Array(int16Data.length);
321
+ for (let i = 0; i < int16Data.length; i++) {
322
+ float32[i] = int16Data[i] / 32768;
323
+ }
324
+ return float32;
325
+ }
326
+ function float32ToPcm16(float32Data) {
327
+ const int16 = new Int16Array(float32Data.length);
328
+ for (let i = 0; i < float32Data.length; i++) {
329
+ const s = Math.max(-1, Math.min(1, float32Data[i]));
330
+ int16[i] = s < 0 ? s * 32768 : s * 32767;
331
+ }
332
+ return int16;
333
+ }
334
+ function resample(input, inputRate, outputRate) {
335
+ if (inputRate === outputRate) {
336
+ return new Float32Array(input);
337
+ }
338
+ const ratio = inputRate / outputRate;
339
+ const outputLength = Math.round(input.length / ratio);
340
+ const output = new Float32Array(outputLength);
341
+ for (let i = 0; i < outputLength; i++) {
342
+ const pos = i * ratio;
343
+ const left = Math.floor(pos);
344
+ const right = Math.min(left + 1, input.length - 1);
345
+ const weight = pos - left;
346
+ output[i] = input[left] * (1 - weight) + input[right] * weight;
347
+ }
348
+ return output;
349
+ }
350
+ function applyLowPassFilter(data, cutoffFreq, sampleRate) {
351
+ const dt = 1 / sampleRate;
352
+ const rc = 1 / (2 * Math.PI * cutoffFreq);
353
+ const alpha = dt / (rc + dt);
354
+ const filtered = new Float32Array(data.length);
355
+ filtered[0] = data[0];
356
+ for (let i = 1; i < data.length; i++) {
357
+ filtered[i] = filtered[i - 1] + alpha * (data[i] - filtered[i - 1]);
358
+ }
359
+ return filtered;
360
+ }
361
+ function resampleWithAntiAliasing(input, inputRate, outputRate) {
362
+ if (inputRate === outputRate) {
363
+ return new Float32Array(input);
364
+ }
365
+ let processed = input;
366
+ if (outputRate < inputRate) {
367
+ const nyquistFreq = outputRate / 2;
368
+ const cutoffFreq = nyquistFreq * 0.9;
369
+ processed = applyLowPassFilter(input, cutoffFreq, inputRate);
370
+ }
371
+ return resample(processed, inputRate, outputRate);
372
+ }
373
+ function pcm16ToBytes(data) {
374
+ return new Uint8Array(data.buffer, data.byteOffset, data.byteLength);
375
+ }
376
+ function bytesToPcm16(bytes) {
377
+ return new Int16Array(bytes.buffer, bytes.byteOffset, bytes.length / 2);
378
+ }
379
+ function normalizeAudio(data, targetPeak = 0.95) {
380
+ let maxAbs = 0;
381
+ for (let i = 0; i < data.length; i++) {
382
+ maxAbs = Math.max(maxAbs, Math.abs(data[i]));
383
+ }
384
+ if (maxAbs === 0) return new Float32Array(data);
385
+ const scale = targetPeak / maxAbs;
386
+ const normalized = new Float32Array(data.length);
387
+ for (let i = 0; i < data.length; i++) {
388
+ normalized[i] = data[i] * scale;
389
+ }
390
+ return normalized;
391
+ }
392
+ function calculateRMS(data) {
393
+ let sum = 0;
394
+ let length = data.length;
395
+ if (data instanceof Uint8Array) {
396
+ for (let i = 0; i < length; i++) {
397
+ const v = (data[i] - 128) / 128;
398
+ sum += v * v;
399
+ }
400
+ } else {
401
+ for (let i = 0; i < length; i++) {
402
+ sum += data[i] * data[i];
403
+ }
404
+ }
405
+ return Math.sqrt(sum / length);
406
+ }
407
+ var StreamResampler = class {
408
+ inputBuffer = new Float32Array(0);
409
+ inputRate;
410
+ outputRate;
411
+ constructor(inputRate, outputRate) {
412
+ this.inputRate = inputRate;
413
+ this.outputRate = outputRate;
414
+ }
415
+ /**
416
+ * Process a chunk of audio and return resampled data
417
+ * @param inputChunk Float32Array chunk to process
418
+ * @param flush If true, output remaining buffered samples
419
+ * @returns Resampled Float32Array (may be empty if more data needed)
420
+ */
421
+ process(inputChunk, flush = false) {
422
+ const combined = new Float32Array(this.inputBuffer.length + inputChunk.length);
423
+ combined.set(this.inputBuffer);
424
+ combined.set(inputChunk, this.inputBuffer.length);
425
+ const ratio = this.inputRate / this.outputRate;
426
+ const outputLength = Math.floor(combined.length / ratio);
427
+ if (outputLength === 0 && !flush) {
428
+ this.inputBuffer = combined;
429
+ return new Float32Array(0);
430
+ }
431
+ const output = new Float32Array(outputLength);
432
+ for (let i = 0; i < outputLength; i++) {
433
+ const pos = i * ratio;
434
+ const left = Math.floor(pos);
435
+ const right = Math.min(left + 1, combined.length - 1);
436
+ const weight = pos - left;
437
+ output[i] = combined[left] * (1 - weight) + combined[right] * weight;
438
+ }
439
+ const remainingSamples = Math.ceil(combined.length - outputLength * ratio);
440
+ this.inputBuffer = combined.slice(
441
+ combined.length - remainingSamples
442
+ );
443
+ return output;
444
+ }
445
+ reset() {
446
+ this.inputBuffer = new Float32Array(0);
447
+ }
448
+ };
449
+
450
+ // src/browser-audio.ts
451
+ var BrowserAudioManager = class {
452
+ audioContext = null;
453
+ mediaStreamAudioSourceNode = null;
454
+ scriptProcessor = null;
455
+ analyserNode = null;
456
+ mediaStream = null;
457
+ // Playback scheduling
458
+ nextPlaybackTime = 0;
459
+ activeSources = [];
460
+ playbackQueue = [];
461
+ // Configuration
462
+ inputSampleRate;
463
+ outputSampleRate;
464
+ autoGainControl;
465
+ echoCancellation;
466
+ noiseSuppression;
467
+ // Callbacks
468
+ onAudioInput;
469
+ onInputError;
470
+ // Audio processing state
471
+ isMuted = false;
472
+ isListening = false;
473
+ constructor(config = {}) {
474
+ this.inputSampleRate = config.inputSampleRate ?? AUDIO_CONFIG.SAMPLE_RATE;
475
+ this.outputSampleRate = config.outputSampleRate ?? AUDIO_CONFIG.SPEAKER_SAMPLE_RATE;
476
+ this.autoGainControl = config.autoGainControl ?? true;
477
+ this.echoCancellation = config.echoCancellation ?? true;
478
+ this.noiseSuppression = config.noiseSuppression ?? true;
479
+ this.onInputError = config.onInputError;
480
+ }
481
+ /**
482
+ * Initialize the AudioContext and analyser
483
+ */
484
+ async init(analyserConfig) {
485
+ if (this.audioContext) return;
486
+ const AudioContextClass = window.AudioContext || window.webkitAudioContext;
487
+ if (!AudioContextClass) {
488
+ throw new Error("Web Audio API not supported in this browser");
489
+ }
490
+ this.audioContext = new AudioContextClass();
491
+ if (!this.audioContext) {
492
+ throw new Error("Failed to initialize AudioContext");
493
+ }
494
+ if (this.audioContext.state === "suspended") {
495
+ await this.audioContext.resume();
496
+ console.log("\u{1F442} AudioContext resumed");
497
+ }
498
+ if (analyserConfig?.enabled !== false) {
499
+ this.analyserNode = this.audioContext.createAnalyser();
500
+ this.analyserNode.fftSize = analyserConfig?.fftSize ?? 256;
501
+ }
502
+ }
503
+ /**
504
+ * Start capturing audio from the microphone
505
+ */
506
+ async startMicrophone(onAudioInput) {
507
+ if (!this.audioContext) {
508
+ await this.init();
509
+ }
510
+ try {
511
+ this.onAudioInput = onAudioInput;
512
+ this.isListening = true;
513
+ this.mediaStream = await navigator.mediaDevices.getUserMedia({
514
+ audio: {
515
+ autoGainControl: this.autoGainControl,
516
+ echoCancellation: this.echoCancellation,
517
+ noiseSuppression: this.noiseSuppression
518
+ }
519
+ });
520
+ this.mediaStreamAudioSourceNode = this.audioContext.createMediaStreamSource(this.mediaStream);
521
+ const bufferSize = 4096;
522
+ this.scriptProcessor = this.audioContext.createScriptProcessor(
523
+ bufferSize,
524
+ 1,
525
+ // input channels
526
+ 1
527
+ // output channels
528
+ );
529
+ this.mediaStreamAudioSourceNode.connect(this.scriptProcessor);
530
+ this.scriptProcessor.connect(this.audioContext.destination);
531
+ if (this.analyserNode) {
532
+ this.mediaStreamAudioSourceNode.connect(this.analyserNode);
533
+ }
534
+ this.scriptProcessor.onaudioprocess = (event) => {
535
+ this._processAudioInput(event);
536
+ };
537
+ console.log("\u{1F3A4} Microphone started");
538
+ } catch (error) {
539
+ const err = error instanceof Error ? error : new Error(String(error));
540
+ if (this.onInputError) this.onInputError(err);
541
+ throw err;
542
+ }
543
+ }
544
+ /**
545
+ * Internal method to process microphone audio data
546
+ */
547
+ _processAudioInput(event) {
548
+ if (!this.onAudioInput || !this.audioContext || !this.isListening) return;
549
+ if (this.isMuted) return;
550
+ const inputBuffer = event.inputBuffer;
551
+ const inputData = inputBuffer.getChannelData(0);
552
+ const outputBuffer = event.outputBuffer;
553
+ for (let i = 0; i < outputBuffer.getChannelData(0).length; i++) {
554
+ outputBuffer.getChannelData(0)[i] = 0;
555
+ }
556
+ const hardwareRate = this.audioContext.sampleRate;
557
+ let processedData = new Float32Array(inputData);
558
+ if (hardwareRate !== this.inputSampleRate) {
559
+ processedData = resampleWithAntiAliasing(
560
+ processedData,
561
+ hardwareRate,
562
+ this.inputSampleRate
563
+ );
564
+ }
565
+ const int16Data = float32ToPcm16(processedData);
566
+ const uint8Data = new Uint8Array(
567
+ int16Data.buffer,
568
+ int16Data.byteOffset,
569
+ int16Data.byteLength
570
+ );
571
+ this.onAudioInput(uint8Data);
572
+ }
573
+ /**
574
+ * Stop capturing microphone input
575
+ */
576
+ stopMicrophone() {
577
+ this.isListening = false;
578
+ if (this.mediaStream) {
579
+ this.mediaStream.getTracks().forEach((track) => track.stop());
580
+ this.mediaStream = null;
581
+ }
582
+ if (this.scriptProcessor) {
583
+ this.scriptProcessor.disconnect();
584
+ this.scriptProcessor = null;
585
+ }
586
+ if (this.mediaStreamAudioSourceNode) {
587
+ this.mediaStreamAudioSourceNode.disconnect();
588
+ this.mediaStreamAudioSourceNode = null;
589
+ }
590
+ console.log("\u{1F3A4} Microphone stopped");
591
+ }
592
+ /**
593
+ * Play back audio received from the server
594
+ * @param pcm16Data Int16 PCM audio data at SPEAKER_SAMPLE_RATE
595
+ */
596
+ playAudio(pcm16Data) {
597
+ if (!this.audioContext) {
598
+ console.warn("AudioContext not initialized");
599
+ return;
600
+ }
601
+ const int16Array = new Int16Array(
602
+ pcm16Data.buffer,
603
+ pcm16Data.byteOffset,
604
+ pcm16Data.length / 2
605
+ );
606
+ const float32Data = pcm16ToFloat32(int16Array);
607
+ const audioBuffer = this.audioContext.createBuffer(
608
+ 1,
609
+ float32Data.length,
610
+ this.outputSampleRate
611
+ );
612
+ audioBuffer.getChannelData(0).set(float32Data);
613
+ this._schedulePlayback(audioBuffer);
614
+ }
615
+ /**
616
+ * Internal method to schedule and play audio with sample-accurate timing
617
+ */
618
+ _schedulePlayback(audioBuffer) {
619
+ if (!this.audioContext) return;
620
+ const currentTime = this.audioContext.currentTime;
621
+ const duration = audioBuffer.length / this.outputSampleRate;
622
+ const startTime = Math.max(
623
+ currentTime + 0.01,
624
+ // Minimum 10ms delay
625
+ this.nextPlaybackTime
626
+ );
627
+ this.nextPlaybackTime = startTime + duration;
628
+ const source = this.audioContext.createBufferSource();
629
+ source.buffer = audioBuffer;
630
+ source.connect(this.audioContext.destination);
631
+ if (this.analyserNode) {
632
+ source.connect(this.analyserNode);
633
+ }
634
+ source.start(startTime);
635
+ this.activeSources.push(source);
636
+ source.onended = () => {
637
+ const index = this.activeSources.indexOf(source);
638
+ if (index > -1) {
639
+ this.activeSources.splice(index, 1);
640
+ }
641
+ };
642
+ }
643
+ /**
644
+ * Stop all currently playing audio and clear the queue
645
+ */
646
+ stopPlayback() {
647
+ this.activeSources.forEach((source) => {
648
+ try {
649
+ source.stop();
650
+ } catch (e) {
651
+ }
652
+ });
653
+ this.activeSources = [];
654
+ this.playbackQueue = [];
655
+ this.nextPlaybackTime = this.audioContext?.currentTime ?? 0;
656
+ console.log("\u{1F507} Playback stopped");
657
+ }
658
+ /**
659
+ * Toggle mute state
660
+ */
661
+ setMuted(muted) {
662
+ this.isMuted = muted;
663
+ }
664
+ /**
665
+ * Get current mute state
666
+ */
667
+ isMicMuted() {
668
+ return this.isMuted;
669
+ }
670
+ /**
671
+ * Get current amplitude from analyser (for visualization)
672
+ * Returns value between 0 and 1
673
+ */
674
+ getAmplitude() {
675
+ if (!this.analyserNode) return 0;
676
+ const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
677
+ this.analyserNode.getByteTimeDomainData(dataArray);
678
+ const rms = calculateRMS(dataArray);
679
+ return Math.min(rms * 10, 1);
680
+ }
681
+ /**
682
+ * Get frequency data from analyser for visualization
683
+ */
684
+ getFrequencyData() {
685
+ if (!this.analyserNode) {
686
+ return new Uint8Array(0);
687
+ }
688
+ const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
689
+ this.analyserNode.getByteFrequencyData(dataArray);
690
+ return dataArray;
691
+ }
692
+ /**
693
+ * Get time-domain data from analyser for waveform visualization
694
+ */
695
+ getWaveformData() {
696
+ if (!this.analyserNode) {
697
+ return new Uint8Array(0);
698
+ }
699
+ const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
700
+ this.analyserNode.getByteTimeDomainData(dataArray);
701
+ return dataArray;
702
+ }
703
+ /**
704
+ * Cleanup and close AudioContext
705
+ */
706
+ cleanup() {
707
+ this.stopMicrophone();
708
+ this.stopPlayback();
709
+ if (this.analyserNode) {
710
+ this.analyserNode.disconnect();
711
+ this.analyserNode = null;
712
+ }
713
+ }
714
+ /**
715
+ * Get current audio context state
716
+ */
717
+ getState() {
718
+ return this.audioContext?.state ?? null;
719
+ }
720
+ /**
721
+ * Check if microphone is currently listening
722
+ */
723
+ isRecording() {
724
+ return this.isListening;
725
+ }
726
+ };
300
727
  export {
301
728
  AUDIO_CONFIG,
729
+ BrowserAudioManager,
302
730
  DEFAULT_URLS,
303
731
  Language,
732
+ StreamResampler,
304
733
  TTSClient,
305
734
  VoiceAgentClient,
306
735
  VoiceStyle,
736
+ applyLowPassFilter,
737
+ bytesToPcm16,
738
+ calculateRMS,
739
+ float32ToPcm16,
740
+ normalizeAudio,
741
+ pcm16ToBytes,
742
+ pcm16ToFloat32,
743
+ resample,
744
+ resampleWithAntiAliasing,
307
745
  simpleConversation,
308
746
  simpleTTS
309
747
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lokutor/sdk",
3
- "version": "1.1.2",
3
+ "version": "1.1.8",
4
4
  "description": "JavaScript/TypeScript SDK for Lokutor Real-time Voice AI",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",