npm - @lokutor/sdk - Versions diffs - 1.1.9 → 1.1.10 - Mend

@lokutor/sdk 1.1.9 → 1.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.d.mts CHANGED Viewed

@@ -119,13 +119,11 @@ declare class VoiceAgentClient {
     private messages;
     private visemeListeners;
     private wantVisemes;
-    private serverUrl;
     constructor(config: LokutorConfig & {
         prompt: string;
         voice?: VoiceStyle;
         language?: Language;
         visemes?: boolean;
-        serverUrl?: string;
         onVisemes?: (visemes: Viseme[]) => void;
     });
     /**
@@ -328,7 +326,7 @@ declare class BrowserAudioManager {
     private mediaStream;
     private nextPlaybackTime;
     private activeSources;
-    private audioClockOffset;
+    private playbackQueue;
     private inputSampleRate;
     private outputSampleRate;
     private autoGainControl;
@@ -338,7 +336,6 @@ declare class BrowserAudioManager {
     private onInputError?;
     private isMuted;
     private isListening;
-    private resampler;
     constructor(config?: BrowserAudioConfig);
     /**
      * Initialize the AudioContext and analyser
@@ -348,77 +345,60 @@ declare class BrowserAudioManager {
      * Start capturing audio from the microphone
      */
     startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
+    /**
+     * Internal method to process microphone audio data
+     */
     private _processAudioInput;
+    /**
+     * Stop capturing microphone input
+     */
     stopMicrophone(): void;
     /**
      * Play back audio received from the server
+     * @param pcm16Data Int16 PCM audio data at SPEAKER_SAMPLE_RATE
      */
     playAudio(pcm16Data: Uint8Array): void;
-    private _schedulePlayback;
     /**
-     * Get the current high-precision audio clock offset for viseme synchronization.
-     * Total stream time (in ms) = (audioContext.currentTime - audioClockOffset) * 1000
+     * Internal method to schedule and play audio with sample-accurate timing
      */
-    getAudioClockOffset(): number | null;
+    private _schedulePlayback;
     /**
-     * Reset the audio clock offset (call when a response is interrupted or finished)
+     * Stop all currently playing audio and clear the queue
      */
-    resetAudioClock(): void;
     stopPlayback(): void;
+    /**
+     * Toggle mute state
+     */
     setMuted(muted: boolean): void;
-    isMicMuted(): boolean;
-    getAmplitude(): number;
-    getFrequencyData(): Uint8Array;
-    getWaveformData(): Uint8Array;
-    cleanup(): void;
-    getAudioContext(): AudioContext | null;
-}
-/**
- * High-level AI Voice Agent for browser-based conversations.
- *
- * This class orchestrates microphone input, AI processing, and
- * speaker output, providing a simple interface for building
- * voice assistants with lip-sync support.
- */
-declare class VoiceAgent {
-    private client;
-    private audioManager;
-    private options;
-    private isConnected;
-    private visemeQueue;
-    constructor(options: VoiceAgentOptions & {
-        apiKey: string;
-    });
     /**
-     * Initialize hardware and connect to the AI server.
-     * This must be called in response to a user guesture (like a click)
-     * to satisfy browser AudioContext requirements.
+     * Get current mute state
      */
-    connect(): Promise<boolean>;
+    isMicMuted(): boolean;
     /**
-     * Get the current amplitude/volume of the microphone or output audio.
-     * Useful for voice activity visualization.
-     * @returns value between 0 and 1
+     * Get current amplitude from analyser (for visualization)
+     * Returns value between 0 and 1
      */
     getAmplitude(): number;
     /**
-     * Mute or unmute the microphone.
+     * Get frequency data from analyser for visualization
      */
-    toggleMute(): boolean;
+    getFrequencyData(): Uint8Array;
     /**
-     * High-precision method to get visemes that should be active
-     * at the current playback frame. Use this in a requestAnimationFrame loop.
+     * Get time-domain data from analyser for waveform visualization
      */
-    getFrameVisemes(): Viseme[];
+    getWaveformData(): Uint8Array;
     /**
-     * Change the system prompt mid-conversation.
+     * Cleanup and close AudioContext
      */
-    updatePrompt(newPrompt: string): void;
+    cleanup(): void;
     /**
-     * Disconnect and release audio resources.
+     * Get current audio context state
      */
-    disconnect(): void;
+    getState(): 'running' | 'suspended' | 'closed' | 'interrupted' | null;
+    /**
+     * Check if microphone is currently listening
+     */
+    isRecording(): boolean;
 }
-export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type Viseme, VoiceAgent, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
+export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };

package/dist/index.d.ts CHANGED Viewed

@@ -119,13 +119,11 @@ declare class VoiceAgentClient {
     private messages;
     private visemeListeners;
     private wantVisemes;
-    private serverUrl;
     constructor(config: LokutorConfig & {
         prompt: string;
         voice?: VoiceStyle;
         language?: Language;
         visemes?: boolean;
-        serverUrl?: string;
         onVisemes?: (visemes: Viseme[]) => void;
     });
     /**
@@ -328,7 +326,7 @@ declare class BrowserAudioManager {
     private mediaStream;
     private nextPlaybackTime;
     private activeSources;
-    private audioClockOffset;
+    private playbackQueue;
     private inputSampleRate;
     private outputSampleRate;
     private autoGainControl;
@@ -338,7 +336,6 @@ declare class BrowserAudioManager {
     private onInputError?;
     private isMuted;
     private isListening;
-    private resampler;
     constructor(config?: BrowserAudioConfig);
     /**
      * Initialize the AudioContext and analyser
@@ -348,77 +345,60 @@ declare class BrowserAudioManager {
      * Start capturing audio from the microphone
      */
     startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
+    /**
+     * Internal method to process microphone audio data
+     */
     private _processAudioInput;
+    /**
+     * Stop capturing microphone input
+     */
     stopMicrophone(): void;
     /**
      * Play back audio received from the server
+     * @param pcm16Data Int16 PCM audio data at SPEAKER_SAMPLE_RATE
      */
     playAudio(pcm16Data: Uint8Array): void;
-    private _schedulePlayback;
     /**
-     * Get the current high-precision audio clock offset for viseme synchronization.
-     * Total stream time (in ms) = (audioContext.currentTime - audioClockOffset) * 1000
+     * Internal method to schedule and play audio with sample-accurate timing
      */
-    getAudioClockOffset(): number | null;
+    private _schedulePlayback;
     /**
-     * Reset the audio clock offset (call when a response is interrupted or finished)
+     * Stop all currently playing audio and clear the queue
      */
-    resetAudioClock(): void;
     stopPlayback(): void;
+    /**
+     * Toggle mute state
+     */
     setMuted(muted: boolean): void;
-    isMicMuted(): boolean;
-    getAmplitude(): number;
-    getFrequencyData(): Uint8Array;
-    getWaveformData(): Uint8Array;
-    cleanup(): void;
-    getAudioContext(): AudioContext | null;
-}
-/**
- * High-level AI Voice Agent for browser-based conversations.
- *
- * This class orchestrates microphone input, AI processing, and
- * speaker output, providing a simple interface for building
- * voice assistants with lip-sync support.
- */
-declare class VoiceAgent {
-    private client;
-    private audioManager;
-    private options;
-    private isConnected;
-    private visemeQueue;
-    constructor(options: VoiceAgentOptions & {
-        apiKey: string;
-    });
     /**
-     * Initialize hardware and connect to the AI server.
-     * This must be called in response to a user guesture (like a click)
-     * to satisfy browser AudioContext requirements.
+     * Get current mute state
      */
-    connect(): Promise<boolean>;
+    isMicMuted(): boolean;
     /**
-     * Get the current amplitude/volume of the microphone or output audio.
-     * Useful for voice activity visualization.
-     * @returns value between 0 and 1
+     * Get current amplitude from analyser (for visualization)
+     * Returns value between 0 and 1
      */
     getAmplitude(): number;
     /**
-     * Mute or unmute the microphone.
+     * Get frequency data from analyser for visualization
      */
-    toggleMute(): boolean;
+    getFrequencyData(): Uint8Array;
     /**
-     * High-precision method to get visemes that should be active
-     * at the current playback frame. Use this in a requestAnimationFrame loop.
+     * Get time-domain data from analyser for waveform visualization
      */
-    getFrameVisemes(): Viseme[];
+    getWaveformData(): Uint8Array;
     /**
-     * Change the system prompt mid-conversation.
+     * Cleanup and close AudioContext
      */
-    updatePrompt(newPrompt: string): void;
+    cleanup(): void;
     /**
-     * Disconnect and release audio resources.
+     * Get current audio context state
      */
-    disconnect(): void;
+    getState(): 'running' | 'suspended' | 'closed' | 'interrupted' | null;
+    /**
+     * Check if microphone is currently listening
+     */
+    isRecording(): boolean;
 }
-export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type Viseme, VoiceAgent, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
+export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };

package/dist/index.js CHANGED Viewed

@@ -26,7 +26,6 @@ __export(index_exports, {
   Language: () => Language,
   StreamResampler: () => StreamResampler,
   TTSClient: () => TTSClient,
-  VoiceAgent: () => VoiceAgent,
   VoiceAgentClient: () => VoiceAgentClient,
   VoiceStyle: () => VoiceStyle,
   applyLowPassFilter: () => applyLowPassFilter,
@@ -105,13 +104,11 @@ var VoiceAgentClient = class {
   messages = [];
   visemeListeners = [];
   wantVisemes = false;
-  serverUrl = null;
   constructor(config) {
     this.apiKey = config.apiKey;
     this.prompt = config.prompt;
     this.voice = config.voice || "F1" /* F1 */;
     this.language = config.language || "en" /* ENGLISH */;
-    this.serverUrl = config.serverUrl || null;
     this.onTranscription = config.onTranscription;
     this.onResponse = config.onResponse;
     this.onAudioCallback = config.onAudio;
@@ -126,12 +123,12 @@ var VoiceAgentClient = class {
   async connect() {
     return new Promise((resolve, reject) => {
       try {
-        let url = this.serverUrl || DEFAULT_URLS.VOICE_AGENT;
+        let url = DEFAULT_URLS.VOICE_AGENT;
         if (this.apiKey) {
           const separator = url.includes("?") ? "&" : "?";
           url += `${separator}api_key=${this.apiKey}`;
         }
-        console.log(`\u{1F517} Connecting to ${url}...`);
+        console.log(`\u{1F517} Connecting to ${DEFAULT_URLS.VOICE_AGENT}...`);
         this.ws = new WebSocket(url);
         this.ws.binaryType = "arraybuffer";
         this.ws.onopen = () => {
@@ -504,8 +501,7 @@ var BrowserAudioManager = class {
   // Playback scheduling
   nextPlaybackTime = 0;
   activeSources = [];
-  // High-precision clock anchor for viseme sync
-  audioClockOffset = null;
+  playbackQueue = [];
   // Configuration
   inputSampleRate;
   outputSampleRate;
@@ -518,7 +514,6 @@ var BrowserAudioManager = class {
   // Audio processing state
   isMuted = false;
   isListening = false;
-  resampler = null;
   constructor(config = {}) {
     this.inputSampleRate = config.inputSampleRate ?? AUDIO_CONFIG.SAMPLE_RATE;
     this.outputSampleRate = config.outputSampleRate ?? AUDIO_CONFIG.SPEAKER_SAMPLE_RATE;
@@ -542,6 +537,7 @@ var BrowserAudioManager = class {
     }
     if (this.audioContext.state === "suspended") {
       await this.audioContext.resume();
+      console.log("\u{1F442} AudioContext resumed");
     }
     if (analyserConfig?.enabled !== false) {
       this.analyserNode = this.audioContext.createAnalyser();
@@ -555,7 +551,6 @@ var BrowserAudioManager = class {
     if (!this.audioContext) {
       await this.init();
     }
-    this.resampler = new StreamResampler(this.audioContext.sampleRate, this.inputSampleRate);
     try {
       this.onAudioInput = onAudioInput;
       this.isListening = true;
@@ -571,7 +566,9 @@ var BrowserAudioManager = class {
       this.scriptProcessor = this.audioContext.createScriptProcessor(
         bufferSize,
         1,
+        // input channels
         1
+        // output channels
       );
       this.mediaStreamAudioSourceNode.connect(this.scriptProcessor);
       this.scriptProcessor.connect(this.audioContext.destination);
@@ -588,19 +585,40 @@ var BrowserAudioManager = class {
       throw err;
     }
   }
+  /**
+   * Internal method to process microphone audio data
+   */
   _processAudioInput(event) {
-    if (!this.onAudioInput || !this.audioContext || !this.isListening || this.isMuted) return;
-    const inputData = event.inputBuffer.getChannelData(0);
-    event.outputBuffer.getChannelData(0).fill(0);
-    const resampled = this.resampler ? this.resampler.process(inputData) : inputData;
-    if (resampled && resampled.length > 0) {
-      const int16Data = float32ToPcm16(resampled);
-      this.onAudioInput(new Uint8Array(int16Data.buffer, int16Data.byteOffset, int16Data.byteLength));
+    if (!this.onAudioInput || !this.audioContext || !this.isListening) return;
+    if (this.isMuted) return;
+    const inputBuffer = event.inputBuffer;
+    const inputData = inputBuffer.getChannelData(0);
+    const outputBuffer = event.outputBuffer;
+    for (let i = 0; i < outputBuffer.getChannelData(0).length; i++) {
+      outputBuffer.getChannelData(0)[i] = 0;
+    }
+    const hardwareRate = this.audioContext.sampleRate;
+    let processedData = new Float32Array(inputData);
+    if (hardwareRate !== this.inputSampleRate) {
+      processedData = resampleWithAntiAliasing(
+        processedData,
+        hardwareRate,
+        this.inputSampleRate
+      );
     }
+    const int16Data = float32ToPcm16(processedData);
+    const uint8Data = new Uint8Array(
+      int16Data.buffer,
+      int16Data.byteOffset,
+      int16Data.byteLength
+    );
+    this.onAudioInput(uint8Data);
   }
+  /**
+   * Stop capturing microphone input
+   */
   stopMicrophone() {
     this.isListening = false;
-    this.resampler = null;
     if (this.mediaStream) {
       this.mediaStream.getTracks().forEach((track) => track.stop());
       this.mediaStream = null;
@@ -613,12 +631,17 @@ var BrowserAudioManager = class {
       this.mediaStreamAudioSourceNode.disconnect();
       this.mediaStreamAudioSourceNode = null;
     }
+    console.log("\u{1F3A4} Microphone stopped");
   }
   /**
    * Play back audio received from the server
+   * @param pcm16Data Int16 PCM audio data at SPEAKER_SAMPLE_RATE
    */
   playAudio(pcm16Data) {
-    if (!this.audioContext) return;
+    if (!this.audioContext) {
+      console.warn("AudioContext not initialized");
+      return;
+    }
     const int16Array = new Int16Array(
       pcm16Data.buffer,
       pcm16Data.byteOffset,
@@ -633,17 +656,18 @@ var BrowserAudioManager = class {
     audioBuffer.getChannelData(0).set(float32Data);
     this._schedulePlayback(audioBuffer);
   }
+  /**
+   * Internal method to schedule and play audio with sample-accurate timing
+   */
   _schedulePlayback(audioBuffer) {
     if (!this.audioContext) return;
     const currentTime = this.audioContext.currentTime;
     const duration = audioBuffer.length / this.outputSampleRate;
     const startTime = Math.max(
       currentTime + 0.01,
+      // Minimum 10ms delay
       this.nextPlaybackTime
     );
-    if (this.audioClockOffset === null) {
-      this.audioClockOffset = startTime;
-    }
     this.nextPlaybackTime = startTime + duration;
     const source = this.audioContext.createBufferSource();
     source.buffer = audioBuffer;
@@ -661,18 +685,8 @@ var BrowserAudioManager = class {
     };
   }
   /**
-   * Get the current high-precision audio clock offset for viseme synchronization.
-   * Total stream time (in ms) = (audioContext.currentTime - audioClockOffset) * 1000
-   */
-  getAudioClockOffset() {
-    return this.audioClockOffset;
-  }
-  /**
-   * Reset the audio clock offset (call when a response is interrupted or finished)
+   * Stop all currently playing audio and clear the queue
    */
-  resetAudioClock() {
-    this.audioClockOffset = null;
-  }
   stopPlayback() {
     this.activeSources.forEach((source) => {
       try {
@@ -681,15 +695,26 @@ var BrowserAudioManager = class {
       }
     });
     this.activeSources = [];
-    this.nextPlaybackTime = 0;
-    this.resetAudioClock();
+    this.playbackQueue = [];
+    this.nextPlaybackTime = this.audioContext?.currentTime ?? 0;
+    console.log("\u{1F507} Playback stopped");
   }
+  /**
+   * Toggle mute state
+   */
   setMuted(muted) {
     this.isMuted = muted;
   }
+  /**
+   * Get current mute state
+   */
   isMicMuted() {
     return this.isMuted;
   }
+  /**
+   * Get current amplitude from analyser (for visualization)
+   * Returns value between 0 and 1
+   */
   getAmplitude() {
     if (!this.analyserNode) return 0;
     const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
@@ -697,18 +722,31 @@ var BrowserAudioManager = class {
     const rms = calculateRMS(dataArray);
     return Math.min(rms * 10, 1);
   }
+  /**
+   * Get frequency data from analyser for visualization
+   */
   getFrequencyData() {
-    if (!this.analyserNode) return new Uint8Array(0);
+    if (!this.analyserNode) {
+      return new Uint8Array(0);
+    }
     const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
     this.analyserNode.getByteFrequencyData(dataArray);
     return dataArray;
   }
+  /**
+   * Get time-domain data from analyser for waveform visualization
+   */
   getWaveformData() {
-    if (!this.analyserNode) return new Uint8Array(0);
+    if (!this.analyserNode) {
+      return new Uint8Array(0);
+    }
     const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
     this.analyserNode.getByteTimeDomainData(dataArray);
     return dataArray;
   }
+  /**
+   * Cleanup and close AudioContext
+   */
   cleanup() {
     this.stopMicrophone();
     this.stopPlayback();
@@ -717,124 +755,17 @@ var BrowserAudioManager = class {
       this.analyserNode = null;
     }
   }
-  getAudioContext() {
-    return this.audioContext;
-  }
-};
-// src/voice-agent.ts
-var VoiceAgent = class {
-  client;
-  audioManager;
-  options;
-  isConnected = false;
-  visemeQueue = [];
-  constructor(options) {
-    this.options = options;
-    this.client = new VoiceAgentClient({
-      apiKey: options.apiKey,
-      prompt: options.prompt || "You are a helpful and friendly AI assistant.",
-      voice: options.voice || "F1" /* F1 */,
-      language: options.language || "en" /* ENGLISH */,
-      visemes: options.visemes ?? true,
-      serverUrl: options.serverUrl,
-      onTranscription: (text) => {
-        if (options.onTranscription) options.onTranscription(text, true);
-      },
-      onResponse: (text) => {
-        if (options.onTranscription) options.onTranscription(text, false);
-      },
-      onAudio: (data) => {
-        this.audioManager.playAudio(data);
-      },
-      onVisemes: (visemes) => {
-        this.visemeQueue.push(...visemes);
-        if (options.onVisemes) options.onVisemes(visemes);
-      },
-      onStatus: (status) => {
-        if (options.onStatusChange) options.onStatusChange(status);
-        if (status === "interrupted" || status === "thinking") {
-          this.audioManager.stopPlayback();
-          this.visemeQueue = [];
-        }
-      },
-      onError: (err) => {
-        if (options.onError) options.onError(err);
-      }
-    });
-    this.audioManager = new BrowserAudioManager({
-      autoGainControl: true,
-      echoCancellation: true,
-      noiseSuppression: true
-    });
-  }
   /**
-   * Initialize hardware and connect to the AI server.
-   * This must be called in response to a user guesture (like a click)
-   * to satisfy browser AudioContext requirements.
+   * Get current audio context state
    */
-  async connect() {
-    try {
-      await this.audioManager.init();
-      const connected = await this.client.connect();
-      if (!connected) return false;
-      this.isConnected = true;
-      await this.audioManager.startMicrophone((pcm16Data) => {
-        if (this.isConnected) {
-          this.client.sendAudio(pcm16Data);
-        }
-      });
-      return true;
-    } catch (err) {
-      if (this.options.onError) this.options.onError(err);
-      return false;
-    }
+  getState() {
+    return this.audioContext?.state ?? null;
   }
   /**
-   * Get the current amplitude/volume of the microphone or output audio.
-   * Useful for voice activity visualization.
-   * @returns value between 0 and 1
+   * Check if microphone is currently listening
    */
-  getAmplitude() {
-    return this.audioManager.getAmplitude();
-  }
-  /**
-   * Mute or unmute the microphone.
-   */
-  toggleMute() {
-    const currentState = this.audioManager.isMicMuted();
-    this.audioManager.setMuted(!currentState);
-    return !currentState;
-  }
-  /**
-   * High-precision method to get visemes that should be active
-   * at the current playback frame. Use this in a requestAnimationFrame loop.
-   */
-  getFrameVisemes() {
-    const offset = this.audioManager.getAudioClockOffset();
-    const audioCtx = this.audioManager.getAudioContext();
-    if (offset === null || !audioCtx) return [];
-    const streamTime = (audioCtx.currentTime - offset) * 1e3;
-    const currentBatch = [];
-    while (this.visemeQueue.length > 0 && this.visemeQueue[0].t * 1e3 <= streamTime) {
-      currentBatch.push(this.visemeQueue.shift());
-    }
-    return currentBatch;
-  }
-  /**
-   * Change the system prompt mid-conversation.
-   */
-  updatePrompt(newPrompt) {
-    this.client.updatePrompt(newPrompt);
-  }
-  /**
-   * Disconnect and release audio resources.
-   */
-  disconnect() {
-    this.isConnected = false;
-    this.client.disconnect();
-    this.audioManager.cleanup();
-    this.visemeQueue = [];
+  isRecording() {
+    return this.isListening;
   }
 };
 // Annotate the CommonJS export names for ESM import in node:
@@ -845,7 +776,6 @@ var VoiceAgent = class {
   Language,
   StreamResampler,
   TTSClient,
-  VoiceAgent,
   VoiceAgentClient,
   VoiceStyle,
   applyLowPassFilter,

package/dist/index.mjs CHANGED Viewed

@@ -60,13 +60,11 @@ var VoiceAgentClient = class {
   messages = [];
   visemeListeners = [];
   wantVisemes = false;
-  serverUrl = null;
   constructor(config) {
     this.apiKey = config.apiKey;
     this.prompt = config.prompt;
     this.voice = config.voice || "F1" /* F1 */;
     this.language = config.language || "en" /* ENGLISH */;
-    this.serverUrl = config.serverUrl || null;
     this.onTranscription = config.onTranscription;
     this.onResponse = config.onResponse;
     this.onAudioCallback = config.onAudio;
@@ -81,12 +79,12 @@ var VoiceAgentClient = class {
   async connect() {
     return new Promise((resolve, reject) => {
       try {
-        let url = this.serverUrl || DEFAULT_URLS.VOICE_AGENT;
+        let url = DEFAULT_URLS.VOICE_AGENT;
         if (this.apiKey) {
           const separator = url.includes("?") ? "&" : "?";
           url += `${separator}api_key=${this.apiKey}`;
         }
-        console.log(`\u{1F517} Connecting to ${url}...`);
+        console.log(`\u{1F517} Connecting to ${DEFAULT_URLS.VOICE_AGENT}...`);
         this.ws = new WebSocket(url);
         this.ws.binaryType = "arraybuffer";
         this.ws.onopen = () => {
@@ -459,8 +457,7 @@ var BrowserAudioManager = class {
   // Playback scheduling
   nextPlaybackTime = 0;
   activeSources = [];
-  // High-precision clock anchor for viseme sync
-  audioClockOffset = null;
+  playbackQueue = [];
   // Configuration
   inputSampleRate;
   outputSampleRate;
@@ -473,7 +470,6 @@ var BrowserAudioManager = class {
   // Audio processing state
   isMuted = false;
   isListening = false;
-  resampler = null;
   constructor(config = {}) {
     this.inputSampleRate = config.inputSampleRate ?? AUDIO_CONFIG.SAMPLE_RATE;
     this.outputSampleRate = config.outputSampleRate ?? AUDIO_CONFIG.SPEAKER_SAMPLE_RATE;
@@ -497,6 +493,7 @@ var BrowserAudioManager = class {
     }
     if (this.audioContext.state === "suspended") {
       await this.audioContext.resume();
+      console.log("\u{1F442} AudioContext resumed");
     }
     if (analyserConfig?.enabled !== false) {
       this.analyserNode = this.audioContext.createAnalyser();
@@ -510,7 +507,6 @@ var BrowserAudioManager = class {
     if (!this.audioContext) {
       await this.init();
     }
-    this.resampler = new StreamResampler(this.audioContext.sampleRate, this.inputSampleRate);
     try {
       this.onAudioInput = onAudioInput;
       this.isListening = true;
@@ -526,7 +522,9 @@ var BrowserAudioManager = class {
       this.scriptProcessor = this.audioContext.createScriptProcessor(
         bufferSize,
         1,
+        // input channels
         1
+        // output channels
       );
       this.mediaStreamAudioSourceNode.connect(this.scriptProcessor);
       this.scriptProcessor.connect(this.audioContext.destination);
@@ -543,19 +541,40 @@ var BrowserAudioManager = class {
       throw err;
     }
   }
+  /**
+   * Internal method to process microphone audio data
+   */
   _processAudioInput(event) {
-    if (!this.onAudioInput || !this.audioContext || !this.isListening || this.isMuted) return;
-    const inputData = event.inputBuffer.getChannelData(0);
-    event.outputBuffer.getChannelData(0).fill(0);
-    const resampled = this.resampler ? this.resampler.process(inputData) : inputData;
-    if (resampled && resampled.length > 0) {
-      const int16Data = float32ToPcm16(resampled);
-      this.onAudioInput(new Uint8Array(int16Data.buffer, int16Data.byteOffset, int16Data.byteLength));
+    if (!this.onAudioInput || !this.audioContext || !this.isListening) return;
+    if (this.isMuted) return;
+    const inputBuffer = event.inputBuffer;
+    const inputData = inputBuffer.getChannelData(0);
+    const outputBuffer = event.outputBuffer;
+    for (let i = 0; i < outputBuffer.getChannelData(0).length; i++) {
+      outputBuffer.getChannelData(0)[i] = 0;
+    }
+    const hardwareRate = this.audioContext.sampleRate;
+    let processedData = new Float32Array(inputData);
+    if (hardwareRate !== this.inputSampleRate) {
+      processedData = resampleWithAntiAliasing(
+        processedData,
+        hardwareRate,
+        this.inputSampleRate
+      );
     }
+    const int16Data = float32ToPcm16(processedData);
+    const uint8Data = new Uint8Array(
+      int16Data.buffer,
+      int16Data.byteOffset,
+      int16Data.byteLength
+    );
+    this.onAudioInput(uint8Data);
   }
+  /**
+   * Stop capturing microphone input
+   */
   stopMicrophone() {
     this.isListening = false;
-    this.resampler = null;
     if (this.mediaStream) {
       this.mediaStream.getTracks().forEach((track) => track.stop());
       this.mediaStream = null;
@@ -568,12 +587,17 @@ var BrowserAudioManager = class {
       this.mediaStreamAudioSourceNode.disconnect();
       this.mediaStreamAudioSourceNode = null;
     }
+    console.log("\u{1F3A4} Microphone stopped");
   }
   /**
    * Play back audio received from the server
+   * @param pcm16Data Int16 PCM audio data at SPEAKER_SAMPLE_RATE
    */
   playAudio(pcm16Data) {
-    if (!this.audioContext) return;
+    if (!this.audioContext) {
+      console.warn("AudioContext not initialized");
+      return;
+    }
     const int16Array = new Int16Array(
       pcm16Data.buffer,
       pcm16Data.byteOffset,
@@ -588,17 +612,18 @@ var BrowserAudioManager = class {
     audioBuffer.getChannelData(0).set(float32Data);
     this._schedulePlayback(audioBuffer);
   }
+  /**
+   * Internal method to schedule and play audio with sample-accurate timing
+   */
   _schedulePlayback(audioBuffer) {
     if (!this.audioContext) return;
     const currentTime = this.audioContext.currentTime;
     const duration = audioBuffer.length / this.outputSampleRate;
     const startTime = Math.max(
       currentTime + 0.01,
+      // Minimum 10ms delay
       this.nextPlaybackTime
     );
-    if (this.audioClockOffset === null) {
-      this.audioClockOffset = startTime;
-    }
     this.nextPlaybackTime = startTime + duration;
     const source = this.audioContext.createBufferSource();
     source.buffer = audioBuffer;
@@ -616,18 +641,8 @@ var BrowserAudioManager = class {
     };
   }
   /**
-   * Get the current high-precision audio clock offset for viseme synchronization.
-   * Total stream time (in ms) = (audioContext.currentTime - audioClockOffset) * 1000
-   */
-  getAudioClockOffset() {
-    return this.audioClockOffset;
-  }
-  /**
-   * Reset the audio clock offset (call when a response is interrupted or finished)
+   * Stop all currently playing audio and clear the queue
    */
-  resetAudioClock() {
-    this.audioClockOffset = null;
-  }
   stopPlayback() {
     this.activeSources.forEach((source) => {
       try {
@@ -636,15 +651,26 @@ var BrowserAudioManager = class {
       }
     });
     this.activeSources = [];
-    this.nextPlaybackTime = 0;
-    this.resetAudioClock();
+    this.playbackQueue = [];
+    this.nextPlaybackTime = this.audioContext?.currentTime ?? 0;
+    console.log("\u{1F507} Playback stopped");
   }
+  /**
+   * Toggle mute state
+   */
   setMuted(muted) {
     this.isMuted = muted;
   }
+  /**
+   * Get current mute state
+   */
   isMicMuted() {
     return this.isMuted;
   }
+  /**
+   * Get current amplitude from analyser (for visualization)
+   * Returns value between 0 and 1
+   */
   getAmplitude() {
     if (!this.analyserNode) return 0;
     const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
@@ -652,18 +678,31 @@ var BrowserAudioManager = class {
     const rms = calculateRMS(dataArray);
     return Math.min(rms * 10, 1);
   }
+  /**
+   * Get frequency data from analyser for visualization
+   */
   getFrequencyData() {
-    if (!this.analyserNode) return new Uint8Array(0);
+    if (!this.analyserNode) {
+      return new Uint8Array(0);
+    }
     const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
     this.analyserNode.getByteFrequencyData(dataArray);
     return dataArray;
   }
+  /**
+   * Get time-domain data from analyser for waveform visualization
+   */
   getWaveformData() {
-    if (!this.analyserNode) return new Uint8Array(0);
+    if (!this.analyserNode) {
+      return new Uint8Array(0);
+    }
     const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
     this.analyserNode.getByteTimeDomainData(dataArray);
     return dataArray;
   }
+  /**
+   * Cleanup and close AudioContext
+   */
   cleanup() {
     this.stopMicrophone();
     this.stopPlayback();
@@ -672,124 +711,17 @@ var BrowserAudioManager = class {
       this.analyserNode = null;
     }
   }
-  getAudioContext() {
-    return this.audioContext;
-  }
-};
-// src/voice-agent.ts
-var VoiceAgent = class {
-  client;
-  audioManager;
-  options;
-  isConnected = false;
-  visemeQueue = [];
-  constructor(options) {
-    this.options = options;
-    this.client = new VoiceAgentClient({
-      apiKey: options.apiKey,
-      prompt: options.prompt || "You are a helpful and friendly AI assistant.",
-      voice: options.voice || "F1" /* F1 */,
-      language: options.language || "en" /* ENGLISH */,
-      visemes: options.visemes ?? true,
-      serverUrl: options.serverUrl,
-      onTranscription: (text) => {
-        if (options.onTranscription) options.onTranscription(text, true);
-      },
-      onResponse: (text) => {
-        if (options.onTranscription) options.onTranscription(text, false);
-      },
-      onAudio: (data) => {
-        this.audioManager.playAudio(data);
-      },
-      onVisemes: (visemes) => {
-        this.visemeQueue.push(...visemes);
-        if (options.onVisemes) options.onVisemes(visemes);
-      },
-      onStatus: (status) => {
-        if (options.onStatusChange) options.onStatusChange(status);
-        if (status === "interrupted" || status === "thinking") {
-          this.audioManager.stopPlayback();
-          this.visemeQueue = [];
-        }
-      },
-      onError: (err) => {
-        if (options.onError) options.onError(err);
-      }
-    });
-    this.audioManager = new BrowserAudioManager({
-      autoGainControl: true,
-      echoCancellation: true,
-      noiseSuppression: true
-    });
-  }
   /**
-   * Initialize hardware and connect to the AI server.
-   * This must be called in response to a user guesture (like a click)
-   * to satisfy browser AudioContext requirements.
+   * Get current audio context state
    */
-  async connect() {
-    try {
-      await this.audioManager.init();
-      const connected = await this.client.connect();
-      if (!connected) return false;
-      this.isConnected = true;
-      await this.audioManager.startMicrophone((pcm16Data) => {
-        if (this.isConnected) {
-          this.client.sendAudio(pcm16Data);
-        }
-      });
-      return true;
-    } catch (err) {
-      if (this.options.onError) this.options.onError(err);
-      return false;
-    }
+  getState() {
+    return this.audioContext?.state ?? null;
   }
   /**
-   * Get the current amplitude/volume of the microphone or output audio.
-   * Useful for voice activity visualization.
-   * @returns value between 0 and 1
+   * Check if microphone is currently listening
    */
-  getAmplitude() {
-    return this.audioManager.getAmplitude();
-  }
-  /**
-   * Mute or unmute the microphone.
-   */
-  toggleMute() {
-    const currentState = this.audioManager.isMicMuted();
-    this.audioManager.setMuted(!currentState);
-    return !currentState;
-  }
-  /**
-   * High-precision method to get visemes that should be active
-   * at the current playback frame. Use this in a requestAnimationFrame loop.
-   */
-  getFrameVisemes() {
-    const offset = this.audioManager.getAudioClockOffset();
-    const audioCtx = this.audioManager.getAudioContext();
-    if (offset === null || !audioCtx) return [];
-    const streamTime = (audioCtx.currentTime - offset) * 1e3;
-    const currentBatch = [];
-    while (this.visemeQueue.length > 0 && this.visemeQueue[0].t * 1e3 <= streamTime) {
-      currentBatch.push(this.visemeQueue.shift());
-    }
-    return currentBatch;
-  }
-  /**
-   * Change the system prompt mid-conversation.
-   */
-  updatePrompt(newPrompt) {
-    this.client.updatePrompt(newPrompt);
-  }
-  /**
-   * Disconnect and release audio resources.
-   */
-  disconnect() {
-    this.isConnected = false;
-    this.client.disconnect();
-    this.audioManager.cleanup();
-    this.visemeQueue = [];
+  isRecording() {
+    return this.isListening;
   }
 };
 export {
@@ -799,7 +731,6 @@ export {
   Language,
   StreamResampler,
   TTSClient,
-  VoiceAgent,
   VoiceAgentClient,
   VoiceStyle,
   applyLowPassFilter,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lokutor/sdk",
-  "version": "1.1.9",
+  "version": "1.1.10",
   "description": "JavaScript/TypeScript SDK for Lokutor Real-time Voice AI",
   "main": "./dist/index.js",
   "module": "./dist/index.mjs",