npm - @lokutor/sdk - Versions diffs - 1.1.2 → 1.1.7 - Mend

@lokutor/sdk 1.1.2 → 1.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.js CHANGED Viewed

@@ -21,11 +21,22 @@ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: tru
 var index_exports = {};
 __export(index_exports, {
   AUDIO_CONFIG: () => AUDIO_CONFIG,
+  BrowserAudioManager: () => BrowserAudioManager,
   DEFAULT_URLS: () => DEFAULT_URLS,
   Language: () => Language,
+  StreamResampler: () => StreamResampler,
   TTSClient: () => TTSClient,
   VoiceAgentClient: () => VoiceAgentClient,
   VoiceStyle: () => VoiceStyle,
+  applyLowPassFilter: () => applyLowPassFilter,
+  bytesToPcm16: () => bytesToPcm16,
+  calculateRMS: () => calculateRMS,
+  float32ToPcm16: () => float32ToPcm16,
+  normalizeAudio: () => normalizeAudio,
+  pcm16ToBytes: () => pcm16ToBytes,
+  pcm16ToFloat32: () => pcm16ToFloat32,
+  resample: () => resample,
+  resampleWithAntiAliasing: () => resampleWithAntiAliasing,
   simpleConversation: () => simpleConversation,
   simpleTTS: () => simpleTTS
 });
@@ -54,8 +65,8 @@ var Language = /* @__PURE__ */ ((Language2) => {
   return Language2;
 })(Language || {});
 var AUDIO_CONFIG = {
-  SAMPLE_RATE: 16e3,
-  SPEAKER_SAMPLE_RATE: 44100,
+  SAMPLE_RATE: 44100,
+  SPEAKER_SAMPLE_RATE: 16e3,
   CHANNELS: 1,
   CHUNK_DURATION_MS: 20,
   get CHUNK_SIZE() {
@@ -86,10 +97,12 @@ var VoiceAgentClient = class {
   onTranscription;
   onResponse;
   onAudioCallback;
+  onVisemesCallback;
   onStatus;
   onError;
   isConnected = false;
   messages = [];
+  visemeListeners = [];
   constructor(config) {
     this.apiKey = config.apiKey;
     this.prompt = config.prompt;
@@ -98,6 +111,7 @@ var VoiceAgentClient = class {
     this.onTranscription = config.onTranscription;
     this.onResponse = config.onResponse;
     this.onAudioCallback = config.onAudio;
+    this.onVisemesCallback = config.onVisemes;
     this.onStatus = config.onStatus;
     this.onError = config.onError;
   }
@@ -206,6 +220,11 @@ var VoiceAgentClient = class {
           };
           console.log(`${icons[msg.data] || ""} Status: ${msg.data}`);
           break;
+        case "visemes":
+          if (Array.isArray(msg.data) && msg.data.length > 0) {
+            this.emit("visemes", msg.data);
+          }
+          break;
         case "error":
           if (this.onError) this.onError(msg.data);
           console.error(`\u274C Server error: ${msg.data}`);
@@ -219,11 +238,17 @@ var VoiceAgentClient = class {
     if (event === "audio") {
       if (this.onAudioCallback) this.onAudioCallback(data);
       this.audioListeners.forEach((l) => l(data));
+    } else if (event === "visemes") {
+      if (this.onVisemesCallback) this.onVisemesCallback(data);
+      this.visemeListeners.forEach((l) => l(data));
     }
   }
   onAudio(callback) {
     this.audioListeners.push(callback);
   }
+  onVisemes(callback) {
+    this.visemeListeners.push(callback);
+  }
   /**
    * Disconnect from the server
    */
@@ -330,14 +355,435 @@ async function simpleTTS(options) {
   const client = new TTSClient({ apiKey: options.apiKey });
   return client.synthesize(options);
 }
+// src/audio-utils.ts
+function pcm16ToFloat32(int16Data) {
+  const float32 = new Float32Array(int16Data.length);
+  for (let i = 0; i < int16Data.length; i++) {
+    float32[i] = int16Data[i] / 32768;
+  }
+  return float32;
+}
+function float32ToPcm16(float32Data) {
+  const int16 = new Int16Array(float32Data.length);
+  for (let i = 0; i < float32Data.length; i++) {
+    const s = Math.max(-1, Math.min(1, float32Data[i]));
+    int16[i] = s < 0 ? s * 32768 : s * 32767;
+  }
+  return int16;
+}
+function resample(input, inputRate, outputRate) {
+  if (inputRate === outputRate) {
+    return new Float32Array(input);
+  }
+  const ratio = inputRate / outputRate;
+  const outputLength = Math.round(input.length / ratio);
+  const output = new Float32Array(outputLength);
+  for (let i = 0; i < outputLength; i++) {
+    const pos = i * ratio;
+    const left = Math.floor(pos);
+    const right = Math.min(left + 1, input.length - 1);
+    const weight = pos - left;
+    output[i] = input[left] * (1 - weight) + input[right] * weight;
+  }
+  return output;
+}
+function applyLowPassFilter(data, cutoffFreq, sampleRate) {
+  const dt = 1 / sampleRate;
+  const rc = 1 / (2 * Math.PI * cutoffFreq);
+  const alpha = dt / (rc + dt);
+  const filtered = new Float32Array(data.length);
+  filtered[0] = data[0];
+  for (let i = 1; i < data.length; i++) {
+    filtered[i] = filtered[i - 1] + alpha * (data[i] - filtered[i - 1]);
+  }
+  return filtered;
+}
+function resampleWithAntiAliasing(input, inputRate, outputRate) {
+  if (inputRate === outputRate) {
+    return new Float32Array(input);
+  }
+  let processed = input;
+  if (outputRate < inputRate) {
+    const nyquistFreq = outputRate / 2;
+    const cutoffFreq = nyquistFreq * 0.9;
+    processed = applyLowPassFilter(input, cutoffFreq, inputRate);
+  }
+  return resample(processed, inputRate, outputRate);
+}
+function pcm16ToBytes(data) {
+  return new Uint8Array(data.buffer, data.byteOffset, data.byteLength);
+}
+function bytesToPcm16(bytes) {
+  return new Int16Array(bytes.buffer, bytes.byteOffset, bytes.length / 2);
+}
+function normalizeAudio(data, targetPeak = 0.95) {
+  let maxAbs = 0;
+  for (let i = 0; i < data.length; i++) {
+    maxAbs = Math.max(maxAbs, Math.abs(data[i]));
+  }
+  if (maxAbs === 0) return new Float32Array(data);
+  const scale = targetPeak / maxAbs;
+  const normalized = new Float32Array(data.length);
+  for (let i = 0; i < data.length; i++) {
+    normalized[i] = data[i] * scale;
+  }
+  return normalized;
+}
+function calculateRMS(data) {
+  let sum = 0;
+  let length = data.length;
+  if (data instanceof Uint8Array) {
+    for (let i = 0; i < length; i++) {
+      const v = (data[i] - 128) / 128;
+      sum += v * v;
+    }
+  } else {
+    for (let i = 0; i < length; i++) {
+      sum += data[i] * data[i];
+    }
+  }
+  return Math.sqrt(sum / length);
+}
+var StreamResampler = class {
+  inputBuffer = new Float32Array(0);
+  inputRate;
+  outputRate;
+  constructor(inputRate, outputRate) {
+    this.inputRate = inputRate;
+    this.outputRate = outputRate;
+  }
+  /**
+   * Process a chunk of audio and return resampled data
+   * @param inputChunk Float32Array chunk to process
+   * @param flush If true, output remaining buffered samples
+   * @returns Resampled Float32Array (may be empty if more data needed)
+   */
+  process(inputChunk, flush = false) {
+    const combined = new Float32Array(this.inputBuffer.length + inputChunk.length);
+    combined.set(this.inputBuffer);
+    combined.set(inputChunk, this.inputBuffer.length);
+    const ratio = this.inputRate / this.outputRate;
+    const outputLength = Math.floor(combined.length / ratio);
+    if (outputLength === 0 && !flush) {
+      this.inputBuffer = combined;
+      return new Float32Array(0);
+    }
+    const output = new Float32Array(outputLength);
+    for (let i = 0; i < outputLength; i++) {
+      const pos = i * ratio;
+      const left = Math.floor(pos);
+      const right = Math.min(left + 1, combined.length - 1);
+      const weight = pos - left;
+      output[i] = combined[left] * (1 - weight) + combined[right] * weight;
+    }
+    const remainingSamples = Math.ceil(combined.length - outputLength * ratio);
+    this.inputBuffer = combined.slice(
+      combined.length - remainingSamples
+    );
+    return output;
+  }
+  reset() {
+    this.inputBuffer = new Float32Array(0);
+  }
+};
+// src/browser-audio.ts
+var BrowserAudioManager = class {
+  audioContext = null;
+  mediaStreamAudioSourceNode = null;
+  scriptProcessor = null;
+  analyserNode = null;
+  mediaStream = null;
+  // Playback scheduling
+  nextPlaybackTime = 0;
+  activeSources = [];
+  playbackQueue = [];
+  // Configuration
+  inputSampleRate;
+  outputSampleRate;
+  autoGainControl;
+  echoCancellation;
+  noiseSuppression;
+  // Callbacks
+  onAudioInput;
+  onInputError;
+  // Audio processing state
+  isMuted = false;
+  isListening = false;
+  constructor(config = {}) {
+    this.inputSampleRate = config.inputSampleRate ?? AUDIO_CONFIG.SAMPLE_RATE;
+    this.outputSampleRate = config.outputSampleRate ?? AUDIO_CONFIG.SPEAKER_SAMPLE_RATE;
+    this.autoGainControl = config.autoGainControl ?? true;
+    this.echoCancellation = config.echoCancellation ?? true;
+    this.noiseSuppression = config.noiseSuppression ?? true;
+    this.onInputError = config.onInputError;
+  }
+  /**
+   * Initialize the AudioContext and analyser
+   */
+  async init(analyserConfig) {
+    if (this.audioContext) return;
+    const AudioContextClass = window.AudioContext || window.webkitAudioContext;
+    if (!AudioContextClass) {
+      throw new Error("Web Audio API not supported in this browser");
+    }
+    this.audioContext = new AudioContextClass();
+    if (!this.audioContext) {
+      throw new Error("Failed to initialize AudioContext");
+    }
+    if (this.audioContext.state === "suspended") {
+      await this.audioContext.resume();
+      console.log("\u{1F442} AudioContext resumed");
+    }
+    if (analyserConfig?.enabled !== false) {
+      this.analyserNode = this.audioContext.createAnalyser();
+      this.analyserNode.fftSize = analyserConfig?.fftSize ?? 256;
+    }
+  }
+  /**
+   * Start capturing audio from the microphone
+   */
+  async startMicrophone(onAudioInput) {
+    if (!this.audioContext) {
+      await this.init();
+    }
+    try {
+      this.onAudioInput = onAudioInput;
+      this.isListening = true;
+      this.mediaStream = await navigator.mediaDevices.getUserMedia({
+        audio: {
+          autoGainControl: this.autoGainControl,
+          echoCancellation: this.echoCancellation,
+          noiseSuppression: this.noiseSuppression
+        }
+      });
+      this.mediaStreamAudioSourceNode = this.audioContext.createMediaStreamSource(this.mediaStream);
+      const bufferSize = 4096;
+      this.scriptProcessor = this.audioContext.createScriptProcessor(
+        bufferSize,
+        1,
+        // input channels
+        1
+        // output channels
+      );
+      this.mediaStreamAudioSourceNode.connect(this.scriptProcessor);
+      this.scriptProcessor.connect(this.audioContext.destination);
+      if (this.analyserNode) {
+        this.mediaStreamAudioSourceNode.connect(this.analyserNode);
+      }
+      this.scriptProcessor.onaudioprocess = (event) => {
+        this._processAudioInput(event);
+      };
+      console.log("\u{1F3A4} Microphone started");
+    } catch (error) {
+      const err = error instanceof Error ? error : new Error(String(error));
+      if (this.onInputError) this.onInputError(err);
+      throw err;
+    }
+  }
+  /**
+   * Internal method to process microphone audio data
+   */
+  _processAudioInput(event) {
+    if (!this.onAudioInput || !this.audioContext || !this.isListening) return;
+    if (this.isMuted) return;
+    const inputBuffer = event.inputBuffer;
+    const inputData = inputBuffer.getChannelData(0);
+    const outputBuffer = event.outputBuffer;
+    for (let i = 0; i < outputBuffer.getChannelData(0).length; i++) {
+      outputBuffer.getChannelData(0)[i] = 0;
+    }
+    const hardwareRate = this.audioContext.sampleRate;
+    let processedData = new Float32Array(inputData);
+    if (hardwareRate !== this.inputSampleRate) {
+      processedData = resampleWithAntiAliasing(
+        processedData,
+        hardwareRate,
+        this.inputSampleRate
+      );
+    }
+    const int16Data = float32ToPcm16(processedData);
+    const uint8Data = new Uint8Array(
+      int16Data.buffer,
+      int16Data.byteOffset,
+      int16Data.byteLength
+    );
+    this.onAudioInput(uint8Data);
+  }
+  /**
+   * Stop capturing microphone input
+   */
+  stopMicrophone() {
+    this.isListening = false;
+    if (this.mediaStream) {
+      this.mediaStream.getTracks().forEach((track) => track.stop());
+      this.mediaStream = null;
+    }
+    if (this.scriptProcessor) {
+      this.scriptProcessor.disconnect();
+      this.scriptProcessor = null;
+    }
+    if (this.mediaStreamAudioSourceNode) {
+      this.mediaStreamAudioSourceNode.disconnect();
+      this.mediaStreamAudioSourceNode = null;
+    }
+    console.log("\u{1F3A4} Microphone stopped");
+  }
+  /**
+   * Play back audio received from the server
+   * @param pcm16Data Int16 PCM audio data at SPEAKER_SAMPLE_RATE
+   */
+  playAudio(pcm16Data) {
+    if (!this.audioContext) {
+      console.warn("AudioContext not initialized");
+      return;
+    }
+    const int16Array = new Int16Array(
+      pcm16Data.buffer,
+      pcm16Data.byteOffset,
+      pcm16Data.length / 2
+    );
+    const float32Data = pcm16ToFloat32(int16Array);
+    const audioBuffer = this.audioContext.createBuffer(
+      1,
+      float32Data.length,
+      this.outputSampleRate
+    );
+    audioBuffer.getChannelData(0).set(float32Data);
+    this._schedulePlayback(audioBuffer);
+  }
+  /**
+   * Internal method to schedule and play audio with sample-accurate timing
+   */
+  _schedulePlayback(audioBuffer) {
+    if (!this.audioContext) return;
+    const currentTime = this.audioContext.currentTime;
+    const duration = audioBuffer.length / this.outputSampleRate;
+    const startTime = Math.max(
+      currentTime + 0.01,
+      // Minimum 10ms delay
+      this.nextPlaybackTime
+    );
+    this.nextPlaybackTime = startTime + duration;
+    const source = this.audioContext.createBufferSource();
+    source.buffer = audioBuffer;
+    source.connect(this.audioContext.destination);
+    if (this.analyserNode) {
+      source.connect(this.analyserNode);
+    }
+    source.start(startTime);
+    this.activeSources.push(source);
+    source.onended = () => {
+      const index = this.activeSources.indexOf(source);
+      if (index > -1) {
+        this.activeSources.splice(index, 1);
+      }
+    };
+  }
+  /**
+   * Stop all currently playing audio and clear the queue
+   */
+  stopPlayback() {
+    this.activeSources.forEach((source) => {
+      try {
+        source.stop();
+      } catch (e) {
+      }
+    });
+    this.activeSources = [];
+    this.playbackQueue = [];
+    this.nextPlaybackTime = this.audioContext?.currentTime ?? 0;
+    console.log("\u{1F507} Playback stopped");
+  }
+  /**
+   * Toggle mute state
+   */
+  setMuted(muted) {
+    this.isMuted = muted;
+  }
+  /**
+   * Get current mute state
+   */
+  isMicMuted() {
+    return this.isMuted;
+  }
+  /**
+   * Get current amplitude from analyser (for visualization)
+   * Returns value between 0 and 1
+   */
+  getAmplitude() {
+    if (!this.analyserNode) return 0;
+    const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
+    this.analyserNode.getByteTimeDomainData(dataArray);
+    const rms = calculateRMS(dataArray);
+    return Math.min(rms * 10, 1);
+  }
+  /**
+   * Get frequency data from analyser for visualization
+   */
+  getFrequencyData() {
+    if (!this.analyserNode) {
+      return new Uint8Array(0);
+    }
+    const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
+    this.analyserNode.getByteFrequencyData(dataArray);
+    return dataArray;
+  }
+  /**
+   * Get time-domain data from analyser for waveform visualization
+   */
+  getWaveformData() {
+    if (!this.analyserNode) {
+      return new Uint8Array(0);
+    }
+    const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
+    this.analyserNode.getByteTimeDomainData(dataArray);
+    return dataArray;
+  }
+  /**
+   * Cleanup and close AudioContext
+   */
+  cleanup() {
+    this.stopMicrophone();
+    this.stopPlayback();
+    if (this.analyserNode) {
+      this.analyserNode.disconnect();
+      this.analyserNode = null;
+    }
+  }
+  /**
+   * Get current audio context state
+   */
+  getState() {
+    return this.audioContext?.state ?? null;
+  }
+  /**
+   * Check if microphone is currently listening
+   */
+  isRecording() {
+    return this.isListening;
+  }
+};
 // Annotate the CommonJS export names for ESM import in node:
 0 && (module.exports = {
   AUDIO_CONFIG,
+  BrowserAudioManager,
   DEFAULT_URLS,
   Language,
+  StreamResampler,
   TTSClient,
   VoiceAgentClient,
   VoiceStyle,
+  applyLowPassFilter,
+  bytesToPcm16,
+  calculateRMS,
+  float32ToPcm16,
+  normalizeAudio,
+  pcm16ToBytes,
+  pcm16ToFloat32,
+  resample,
+  resampleWithAntiAliasing,
   simpleConversation,
   simpleTTS
 });