npm - @lokutor/sdk - Versions diffs - 1.1.7 → 1.1.9 - Mend

@lokutor/sdk 1.1.7 → 1.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.d.mts CHANGED Viewed

@@ -90,11 +90,12 @@ interface VoiceAgentOptions {
 }
 /**
  * Viseme data for lip-sync animation
+ * Format: {"v": index, "c": character, "t": timestamp}
  */
 interface Viseme {
-    id: number;
-    char: string;
-    timestamp: number;
+    v: number;
+    c: string;
+    t: number;
 }
 /**
@@ -117,10 +118,14 @@ declare class VoiceAgentClient {
     private isConnected;
     private messages;
     private visemeListeners;
+    private wantVisemes;
+    private serverUrl;
     constructor(config: LokutorConfig & {
         prompt: string;
         voice?: VoiceStyle;
         language?: Language;
+        visemes?: boolean;
+        serverUrl?: string;
         onVisemes?: (visemes: Viseme[]) => void;
     });
     /**
@@ -323,7 +328,7 @@ declare class BrowserAudioManager {
     private mediaStream;
     private nextPlaybackTime;
     private activeSources;
-    private playbackQueue;
+    private audioClockOffset;
     private inputSampleRate;
     private outputSampleRate;
     private autoGainControl;
@@ -333,6 +338,7 @@ declare class BrowserAudioManager {
     private onInputError?;
     private isMuted;
     private isListening;
+    private resampler;
     constructor(config?: BrowserAudioConfig);
     /**
      * Initialize the AudioContext and analyser
@@ -342,60 +348,77 @@ declare class BrowserAudioManager {
      * Start capturing audio from the microphone
      */
     startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
-    /**
-     * Internal method to process microphone audio data
-     */
     private _processAudioInput;
-    /**
-     * Stop capturing microphone input
-     */
     stopMicrophone(): void;
     /**
      * Play back audio received from the server
-     * @param pcm16Data Int16 PCM audio data at SPEAKER_SAMPLE_RATE
      */
     playAudio(pcm16Data: Uint8Array): void;
-    /**
-     * Internal method to schedule and play audio with sample-accurate timing
-     */
     private _schedulePlayback;
     /**
-     * Stop all currently playing audio and clear the queue
+     * Get the current high-precision audio clock offset for viseme synchronization.
+     * Total stream time (in ms) = (audioContext.currentTime - audioClockOffset) * 1000
      */
-    stopPlayback(): void;
+    getAudioClockOffset(): number | null;
     /**
-     * Toggle mute state
+     * Reset the audio clock offset (call when a response is interrupted or finished)
      */
+    resetAudioClock(): void;
+    stopPlayback(): void;
     setMuted(muted: boolean): void;
-    /**
-     * Get current mute state
-     */
     isMicMuted(): boolean;
+    getAmplitude(): number;
+    getFrequencyData(): Uint8Array;
+    getWaveformData(): Uint8Array;
+    cleanup(): void;
+    getAudioContext(): AudioContext | null;
+}
+/**
+ * High-level AI Voice Agent for browser-based conversations.
+ *
+ * This class orchestrates microphone input, AI processing, and
+ * speaker output, providing a simple interface for building
+ * voice assistants with lip-sync support.
+ */
+declare class VoiceAgent {
+    private client;
+    private audioManager;
+    private options;
+    private isConnected;
+    private visemeQueue;
+    constructor(options: VoiceAgentOptions & {
+        apiKey: string;
+    });
     /**
-     * Get current amplitude from analyser (for visualization)
-     * Returns value between 0 and 1
+     * Initialize hardware and connect to the AI server.
+     * This must be called in response to a user guesture (like a click)
+     * to satisfy browser AudioContext requirements.
      */
-    getAmplitude(): number;
+    connect(): Promise<boolean>;
     /**
-     * Get frequency data from analyser for visualization
+     * Get the current amplitude/volume of the microphone or output audio.
+     * Useful for voice activity visualization.
+     * @returns value between 0 and 1
      */
-    getFrequencyData(): Uint8Array;
+    getAmplitude(): number;
     /**
-     * Get time-domain data from analyser for waveform visualization
+     * Mute or unmute the microphone.
      */
-    getWaveformData(): Uint8Array;
+    toggleMute(): boolean;
     /**
-     * Cleanup and close AudioContext
+     * High-precision method to get visemes that should be active
+     * at the current playback frame. Use this in a requestAnimationFrame loop.
      */
-    cleanup(): void;
+    getFrameVisemes(): Viseme[];
     /**
-     * Get current audio context state
+     * Change the system prompt mid-conversation.
      */
-    getState(): 'running' | 'suspended' | 'closed' | 'interrupted' | null;
+    updatePrompt(newPrompt: string): void;
     /**
-     * Check if microphone is currently listening
+     * Disconnect and release audio resources.
      */
-    isRecording(): boolean;
+    disconnect(): void;
 }
-export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
+export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type Viseme, VoiceAgent, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };

package/dist/index.d.ts CHANGED Viewed

@@ -90,11 +90,12 @@ interface VoiceAgentOptions {
 }
 /**
  * Viseme data for lip-sync animation
+ * Format: {"v": index, "c": character, "t": timestamp}
  */
 interface Viseme {
-    id: number;
-    char: string;
-    timestamp: number;
+    v: number;
+    c: string;
+    t: number;
 }
 /**
@@ -117,10 +118,14 @@ declare class VoiceAgentClient {
     private isConnected;
     private messages;
     private visemeListeners;
+    private wantVisemes;
+    private serverUrl;
     constructor(config: LokutorConfig & {
         prompt: string;
         voice?: VoiceStyle;
         language?: Language;
+        visemes?: boolean;
+        serverUrl?: string;
         onVisemes?: (visemes: Viseme[]) => void;
     });
     /**
@@ -323,7 +328,7 @@ declare class BrowserAudioManager {
     private mediaStream;
     private nextPlaybackTime;
     private activeSources;
-    private playbackQueue;
+    private audioClockOffset;
     private inputSampleRate;
     private outputSampleRate;
     private autoGainControl;
@@ -333,6 +338,7 @@ declare class BrowserAudioManager {
     private onInputError?;
     private isMuted;
     private isListening;
+    private resampler;
     constructor(config?: BrowserAudioConfig);
     /**
      * Initialize the AudioContext and analyser
@@ -342,60 +348,77 @@ declare class BrowserAudioManager {
      * Start capturing audio from the microphone
      */
     startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
-    /**
-     * Internal method to process microphone audio data
-     */
     private _processAudioInput;
-    /**
-     * Stop capturing microphone input
-     */
     stopMicrophone(): void;
     /**
      * Play back audio received from the server
-     * @param pcm16Data Int16 PCM audio data at SPEAKER_SAMPLE_RATE
      */
     playAudio(pcm16Data: Uint8Array): void;
-    /**
-     * Internal method to schedule and play audio with sample-accurate timing
-     */
     private _schedulePlayback;
     /**
-     * Stop all currently playing audio and clear the queue
+     * Get the current high-precision audio clock offset for viseme synchronization.
+     * Total stream time (in ms) = (audioContext.currentTime - audioClockOffset) * 1000
      */
-    stopPlayback(): void;
+    getAudioClockOffset(): number | null;
     /**
-     * Toggle mute state
+     * Reset the audio clock offset (call when a response is interrupted or finished)
      */
+    resetAudioClock(): void;
+    stopPlayback(): void;
     setMuted(muted: boolean): void;
-    /**
-     * Get current mute state
-     */
     isMicMuted(): boolean;
+    getAmplitude(): number;
+    getFrequencyData(): Uint8Array;
+    getWaveformData(): Uint8Array;
+    cleanup(): void;
+    getAudioContext(): AudioContext | null;
+}
+/**
+ * High-level AI Voice Agent for browser-based conversations.
+ *
+ * This class orchestrates microphone input, AI processing, and
+ * speaker output, providing a simple interface for building
+ * voice assistants with lip-sync support.
+ */
+declare class VoiceAgent {
+    private client;
+    private audioManager;
+    private options;
+    private isConnected;
+    private visemeQueue;
+    constructor(options: VoiceAgentOptions & {
+        apiKey: string;
+    });
     /**
-     * Get current amplitude from analyser (for visualization)
-     * Returns value between 0 and 1
+     * Initialize hardware and connect to the AI server.
+     * This must be called in response to a user guesture (like a click)
+     * to satisfy browser AudioContext requirements.
      */
-    getAmplitude(): number;
+    connect(): Promise<boolean>;
     /**
-     * Get frequency data from analyser for visualization
+     * Get the current amplitude/volume of the microphone or output audio.
+     * Useful for voice activity visualization.
+     * @returns value between 0 and 1
      */
-    getFrequencyData(): Uint8Array;
+    getAmplitude(): number;
     /**
-     * Get time-domain data from analyser for waveform visualization
+     * Mute or unmute the microphone.
      */
-    getWaveformData(): Uint8Array;
+    toggleMute(): boolean;
     /**
-     * Cleanup and close AudioContext
+     * High-precision method to get visemes that should be active
+     * at the current playback frame. Use this in a requestAnimationFrame loop.
      */
-    cleanup(): void;
+    getFrameVisemes(): Viseme[];
     /**
-     * Get current audio context state
+     * Change the system prompt mid-conversation.
      */
-    getState(): 'running' | 'suspended' | 'closed' | 'interrupted' | null;
+    updatePrompt(newPrompt: string): void;
     /**
-     * Check if microphone is currently listening
+     * Disconnect and release audio resources.
      */
-    isRecording(): boolean;
+    disconnect(): void;
 }
-export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
+export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type Viseme, VoiceAgent, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };

package/dist/index.js CHANGED Viewed

@@ -26,6 +26,7 @@ __export(index_exports, {
   Language: () => Language,
   StreamResampler: () => StreamResampler,
   TTSClient: () => TTSClient,
+  VoiceAgent: () => VoiceAgent,
   VoiceAgentClient: () => VoiceAgentClient,
   VoiceStyle: () => VoiceStyle,
   applyLowPassFilter: () => applyLowPassFilter,
@@ -65,8 +66,8 @@ var Language = /* @__PURE__ */ ((Language2) => {
   return Language2;
 })(Language || {});
 var AUDIO_CONFIG = {
-  SAMPLE_RATE: 44100,
-  SPEAKER_SAMPLE_RATE: 16e3,
+  SAMPLE_RATE: 16e3,
+  SPEAKER_SAMPLE_RATE: 44100,
   CHANNELS: 1,
   CHUNK_DURATION_MS: 20,
   get CHUNK_SIZE() {
@@ -103,17 +104,21 @@ var VoiceAgentClient = class {
   isConnected = false;
   messages = [];
   visemeListeners = [];
+  wantVisemes = false;
+  serverUrl = null;
   constructor(config) {
     this.apiKey = config.apiKey;
     this.prompt = config.prompt;
     this.voice = config.voice || "F1" /* F1 */;
     this.language = config.language || "en" /* ENGLISH */;
+    this.serverUrl = config.serverUrl || null;
     this.onTranscription = config.onTranscription;
     this.onResponse = config.onResponse;
     this.onAudioCallback = config.onAudio;
     this.onVisemesCallback = config.onVisemes;
     this.onStatus = config.onStatus;
     this.onError = config.onError;
+    this.wantVisemes = config.visemes || false;
   }
   /**
    * Connect to the Lokutor Voice Agent server
@@ -121,12 +126,12 @@ var VoiceAgentClient = class {
   async connect() {
     return new Promise((resolve, reject) => {
       try {
-        let url = DEFAULT_URLS.VOICE_AGENT;
+        let url = this.serverUrl || DEFAULT_URLS.VOICE_AGENT;
         if (this.apiKey) {
           const separator = url.includes("?") ? "&" : "?";
           url += `${separator}api_key=${this.apiKey}`;
         }
-        console.log(`\u{1F517} Connecting to ${DEFAULT_URLS.VOICE_AGENT}...`);
+        console.log(`\u{1F517} Connecting to ${url}...`);
         this.ws = new WebSocket(url);
         this.ws.binaryType = "arraybuffer";
         this.ws.onopen = () => {
@@ -165,7 +170,8 @@ var VoiceAgentClient = class {
     this.ws.send(JSON.stringify({ type: "prompt", data: this.prompt }));
     this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
     this.ws.send(JSON.stringify({ type: "language", data: this.language }));
-    console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}`);
+    this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
+    console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}`);
   }
   /**
    * Send raw PCM audio data to the server
@@ -498,7 +504,8 @@ var BrowserAudioManager = class {
   // Playback scheduling
   nextPlaybackTime = 0;
   activeSources = [];
-  playbackQueue = [];
+  // High-precision clock anchor for viseme sync
+  audioClockOffset = null;
   // Configuration
   inputSampleRate;
   outputSampleRate;
@@ -511,6 +518,7 @@ var BrowserAudioManager = class {
   // Audio processing state
   isMuted = false;
   isListening = false;
+  resampler = null;
   constructor(config = {}) {
     this.inputSampleRate = config.inputSampleRate ?? AUDIO_CONFIG.SAMPLE_RATE;
     this.outputSampleRate = config.outputSampleRate ?? AUDIO_CONFIG.SPEAKER_SAMPLE_RATE;
@@ -534,7 +542,6 @@ var BrowserAudioManager = class {
     }
     if (this.audioContext.state === "suspended") {
       await this.audioContext.resume();
-      console.log("\u{1F442} AudioContext resumed");
     }
     if (analyserConfig?.enabled !== false) {
       this.analyserNode = this.audioContext.createAnalyser();
@@ -548,6 +555,7 @@ var BrowserAudioManager = class {
     if (!this.audioContext) {
       await this.init();
     }
+    this.resampler = new StreamResampler(this.audioContext.sampleRate, this.inputSampleRate);
     try {
       this.onAudioInput = onAudioInput;
       this.isListening = true;
@@ -563,9 +571,7 @@ var BrowserAudioManager = class {
       this.scriptProcessor = this.audioContext.createScriptProcessor(
         bufferSize,
         1,
-        // input channels
         1
-        // output channels
       );
       this.mediaStreamAudioSourceNode.connect(this.scriptProcessor);
       this.scriptProcessor.connect(this.audioContext.destination);
@@ -582,40 +588,19 @@ var BrowserAudioManager = class {
       throw err;
     }
   }
-  /**
-   * Internal method to process microphone audio data
-   */
   _processAudioInput(event) {
-    if (!this.onAudioInput || !this.audioContext || !this.isListening) return;
-    if (this.isMuted) return;
-    const inputBuffer = event.inputBuffer;
-    const inputData = inputBuffer.getChannelData(0);
-    const outputBuffer = event.outputBuffer;
-    for (let i = 0; i < outputBuffer.getChannelData(0).length; i++) {
-      outputBuffer.getChannelData(0)[i] = 0;
-    }
-    const hardwareRate = this.audioContext.sampleRate;
-    let processedData = new Float32Array(inputData);
-    if (hardwareRate !== this.inputSampleRate) {
-      processedData = resampleWithAntiAliasing(
-        processedData,
-        hardwareRate,
-        this.inputSampleRate
-      );
+    if (!this.onAudioInput || !this.audioContext || !this.isListening || this.isMuted) return;
+    const inputData = event.inputBuffer.getChannelData(0);
+    event.outputBuffer.getChannelData(0).fill(0);
+    const resampled = this.resampler ? this.resampler.process(inputData) : inputData;
+    if (resampled && resampled.length > 0) {
+      const int16Data = float32ToPcm16(resampled);
+      this.onAudioInput(new Uint8Array(int16Data.buffer, int16Data.byteOffset, int16Data.byteLength));
     }
-    const int16Data = float32ToPcm16(processedData);
-    const uint8Data = new Uint8Array(
-      int16Data.buffer,
-      int16Data.byteOffset,
-      int16Data.byteLength
-    );
-    this.onAudioInput(uint8Data);
   }
-  /**
-   * Stop capturing microphone input
-   */
   stopMicrophone() {
     this.isListening = false;
+    this.resampler = null;
     if (this.mediaStream) {
       this.mediaStream.getTracks().forEach((track) => track.stop());
       this.mediaStream = null;
@@ -628,17 +613,12 @@ var BrowserAudioManager = class {
       this.mediaStreamAudioSourceNode.disconnect();
       this.mediaStreamAudioSourceNode = null;
     }
-    console.log("\u{1F3A4} Microphone stopped");
   }
   /**
    * Play back audio received from the server
-   * @param pcm16Data Int16 PCM audio data at SPEAKER_SAMPLE_RATE
    */
   playAudio(pcm16Data) {
-    if (!this.audioContext) {
-      console.warn("AudioContext not initialized");
-      return;
-    }
+    if (!this.audioContext) return;
     const int16Array = new Int16Array(
       pcm16Data.buffer,
       pcm16Data.byteOffset,
@@ -653,18 +633,17 @@ var BrowserAudioManager = class {
     audioBuffer.getChannelData(0).set(float32Data);
     this._schedulePlayback(audioBuffer);
   }
-  /**
-   * Internal method to schedule and play audio with sample-accurate timing
-   */
   _schedulePlayback(audioBuffer) {
     if (!this.audioContext) return;
     const currentTime = this.audioContext.currentTime;
     const duration = audioBuffer.length / this.outputSampleRate;
     const startTime = Math.max(
       currentTime + 0.01,
-      // Minimum 10ms delay
       this.nextPlaybackTime
     );
+    if (this.audioClockOffset === null) {
+      this.audioClockOffset = startTime;
+    }
     this.nextPlaybackTime = startTime + duration;
     const source = this.audioContext.createBufferSource();
     source.buffer = audioBuffer;
@@ -682,8 +661,18 @@ var BrowserAudioManager = class {
     };
   }
   /**
-   * Stop all currently playing audio and clear the queue
+   * Get the current high-precision audio clock offset for viseme synchronization.
+   * Total stream time (in ms) = (audioContext.currentTime - audioClockOffset) * 1000
+   */
+  getAudioClockOffset() {
+    return this.audioClockOffset;
+  }
+  /**
+   * Reset the audio clock offset (call when a response is interrupted or finished)
    */
+  resetAudioClock() {
+    this.audioClockOffset = null;
+  }
   stopPlayback() {
     this.activeSources.forEach((source) => {
       try {
@@ -692,26 +681,15 @@ var BrowserAudioManager = class {
       }
     });
     this.activeSources = [];
-    this.playbackQueue = [];
-    this.nextPlaybackTime = this.audioContext?.currentTime ?? 0;
-    console.log("\u{1F507} Playback stopped");
+    this.nextPlaybackTime = 0;
+    this.resetAudioClock();
   }
-  /**
-   * Toggle mute state
-   */
   setMuted(muted) {
     this.isMuted = muted;
   }
-  /**
-   * Get current mute state
-   */
   isMicMuted() {
     return this.isMuted;
   }
-  /**
-   * Get current amplitude from analyser (for visualization)
-   * Returns value between 0 and 1
-   */
   getAmplitude() {
     if (!this.analyserNode) return 0;
     const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
@@ -719,31 +697,18 @@ var BrowserAudioManager = class {
     const rms = calculateRMS(dataArray);
     return Math.min(rms * 10, 1);
   }
-  /**
-   * Get frequency data from analyser for visualization
-   */
   getFrequencyData() {
-    if (!this.analyserNode) {
-      return new Uint8Array(0);
-    }
+    if (!this.analyserNode) return new Uint8Array(0);
     const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
     this.analyserNode.getByteFrequencyData(dataArray);
     return dataArray;
   }
-  /**
-   * Get time-domain data from analyser for waveform visualization
-   */
   getWaveformData() {
-    if (!this.analyserNode) {
-      return new Uint8Array(0);
-    }
+    if (!this.analyserNode) return new Uint8Array(0);
     const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
     this.analyserNode.getByteTimeDomainData(dataArray);
     return dataArray;
   }
-  /**
-   * Cleanup and close AudioContext
-   */
   cleanup() {
     this.stopMicrophone();
     this.stopPlayback();
@@ -752,17 +717,124 @@ var BrowserAudioManager = class {
       this.analyserNode = null;
     }
   }
+  getAudioContext() {
+    return this.audioContext;
+  }
+};
+// src/voice-agent.ts
+var VoiceAgent = class {
+  client;
+  audioManager;
+  options;
+  isConnected = false;
+  visemeQueue = [];
+  constructor(options) {
+    this.options = options;
+    this.client = new VoiceAgentClient({
+      apiKey: options.apiKey,
+      prompt: options.prompt || "You are a helpful and friendly AI assistant.",
+      voice: options.voice || "F1" /* F1 */,
+      language: options.language || "en" /* ENGLISH */,
+      visemes: options.visemes ?? true,
+      serverUrl: options.serverUrl,
+      onTranscription: (text) => {
+        if (options.onTranscription) options.onTranscription(text, true);
+      },
+      onResponse: (text) => {
+        if (options.onTranscription) options.onTranscription(text, false);
+      },
+      onAudio: (data) => {
+        this.audioManager.playAudio(data);
+      },
+      onVisemes: (visemes) => {
+        this.visemeQueue.push(...visemes);
+        if (options.onVisemes) options.onVisemes(visemes);
+      },
+      onStatus: (status) => {
+        if (options.onStatusChange) options.onStatusChange(status);
+        if (status === "interrupted" || status === "thinking") {
+          this.audioManager.stopPlayback();
+          this.visemeQueue = [];
+        }
+      },
+      onError: (err) => {
+        if (options.onError) options.onError(err);
+      }
+    });
+    this.audioManager = new BrowserAudioManager({
+      autoGainControl: true,
+      echoCancellation: true,
+      noiseSuppression: true
+    });
+  }
   /**
-   * Get current audio context state
+   * Initialize hardware and connect to the AI server.
+   * This must be called in response to a user guesture (like a click)
+   * to satisfy browser AudioContext requirements.
    */
-  getState() {
-    return this.audioContext?.state ?? null;
+  async connect() {
+    try {
+      await this.audioManager.init();
+      const connected = await this.client.connect();
+      if (!connected) return false;
+      this.isConnected = true;
+      await this.audioManager.startMicrophone((pcm16Data) => {
+        if (this.isConnected) {
+          this.client.sendAudio(pcm16Data);
+        }
+      });
+      return true;
+    } catch (err) {
+      if (this.options.onError) this.options.onError(err);
+      return false;
+    }
   }
   /**
-   * Check if microphone is currently listening
+   * Get the current amplitude/volume of the microphone or output audio.
+   * Useful for voice activity visualization.
+   * @returns value between 0 and 1
    */
-  isRecording() {
-    return this.isListening;
+  getAmplitude() {
+    return this.audioManager.getAmplitude();
+  }
+  /**
+   * Mute or unmute the microphone.
+   */
+  toggleMute() {
+    const currentState = this.audioManager.isMicMuted();
+    this.audioManager.setMuted(!currentState);
+    return !currentState;
+  }
+  /**
+   * High-precision method to get visemes that should be active
+   * at the current playback frame. Use this in a requestAnimationFrame loop.
+   */
+  getFrameVisemes() {
+    const offset = this.audioManager.getAudioClockOffset();
+    const audioCtx = this.audioManager.getAudioContext();
+    if (offset === null || !audioCtx) return [];
+    const streamTime = (audioCtx.currentTime - offset) * 1e3;
+    const currentBatch = [];
+    while (this.visemeQueue.length > 0 && this.visemeQueue[0].t * 1e3 <= streamTime) {
+      currentBatch.push(this.visemeQueue.shift());
+    }
+    return currentBatch;
+  }
+  /**
+   * Change the system prompt mid-conversation.
+   */
+  updatePrompt(newPrompt) {
+    this.client.updatePrompt(newPrompt);
+  }
+  /**
+   * Disconnect and release audio resources.
+   */
+  disconnect() {
+    this.isConnected = false;
+    this.client.disconnect();
+    this.audioManager.cleanup();
+    this.visemeQueue = [];
   }
 };
 // Annotate the CommonJS export names for ESM import in node:
@@ -773,6 +845,7 @@ var BrowserAudioManager = class {
   Language,
   StreamResampler,
   TTSClient,
+  VoiceAgent,
   VoiceAgentClient,
   VoiceStyle,
   applyLowPassFilter,

package/dist/index.mjs CHANGED Viewed

@@ -21,8 +21,8 @@ var Language = /* @__PURE__ */ ((Language2) => {
   return Language2;
 })(Language || {});
 var AUDIO_CONFIG = {
-  SAMPLE_RATE: 44100,
-  SPEAKER_SAMPLE_RATE: 16e3,
+  SAMPLE_RATE: 16e3,
+  SPEAKER_SAMPLE_RATE: 44100,
   CHANNELS: 1,
   CHUNK_DURATION_MS: 20,
   get CHUNK_SIZE() {
@@ -59,17 +59,21 @@ var VoiceAgentClient = class {
   isConnected = false;
   messages = [];
   visemeListeners = [];
+  wantVisemes = false;
+  serverUrl = null;
   constructor(config) {
     this.apiKey = config.apiKey;
     this.prompt = config.prompt;
     this.voice = config.voice || "F1" /* F1 */;
     this.language = config.language || "en" /* ENGLISH */;
+    this.serverUrl = config.serverUrl || null;
     this.onTranscription = config.onTranscription;
     this.onResponse = config.onResponse;
     this.onAudioCallback = config.onAudio;
     this.onVisemesCallback = config.onVisemes;
     this.onStatus = config.onStatus;
     this.onError = config.onError;
+    this.wantVisemes = config.visemes || false;
   }
   /**
    * Connect to the Lokutor Voice Agent server
@@ -77,12 +81,12 @@ var VoiceAgentClient = class {
   async connect() {
     return new Promise((resolve, reject) => {
       try {
-        let url = DEFAULT_URLS.VOICE_AGENT;
+        let url = this.serverUrl || DEFAULT_URLS.VOICE_AGENT;
         if (this.apiKey) {
           const separator = url.includes("?") ? "&" : "?";
           url += `${separator}api_key=${this.apiKey}`;
         }
-        console.log(`\u{1F517} Connecting to ${DEFAULT_URLS.VOICE_AGENT}...`);
+        console.log(`\u{1F517} Connecting to ${url}...`);
         this.ws = new WebSocket(url);
         this.ws.binaryType = "arraybuffer";
         this.ws.onopen = () => {
@@ -121,7 +125,8 @@ var VoiceAgentClient = class {
     this.ws.send(JSON.stringify({ type: "prompt", data: this.prompt }));
     this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
     this.ws.send(JSON.stringify({ type: "language", data: this.language }));
-    console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}`);
+    this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
+    console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}`);
   }
   /**
    * Send raw PCM audio data to the server
@@ -454,7 +459,8 @@ var BrowserAudioManager = class {
   // Playback scheduling
   nextPlaybackTime = 0;
   activeSources = [];
-  playbackQueue = [];
+  // High-precision clock anchor for viseme sync
+  audioClockOffset = null;
   // Configuration
   inputSampleRate;
   outputSampleRate;
@@ -467,6 +473,7 @@ var BrowserAudioManager = class {
   // Audio processing state
   isMuted = false;
   isListening = false;
+  resampler = null;
   constructor(config = {}) {
     this.inputSampleRate = config.inputSampleRate ?? AUDIO_CONFIG.SAMPLE_RATE;
     this.outputSampleRate = config.outputSampleRate ?? AUDIO_CONFIG.SPEAKER_SAMPLE_RATE;
@@ -490,7 +497,6 @@ var BrowserAudioManager = class {
     }
     if (this.audioContext.state === "suspended") {
       await this.audioContext.resume();
-      console.log("\u{1F442} AudioContext resumed");
     }
     if (analyserConfig?.enabled !== false) {
       this.analyserNode = this.audioContext.createAnalyser();
@@ -504,6 +510,7 @@ var BrowserAudioManager = class {
     if (!this.audioContext) {
       await this.init();
     }
+    this.resampler = new StreamResampler(this.audioContext.sampleRate, this.inputSampleRate);
     try {
       this.onAudioInput = onAudioInput;
       this.isListening = true;
@@ -519,9 +526,7 @@ var BrowserAudioManager = class {
       this.scriptProcessor = this.audioContext.createScriptProcessor(
         bufferSize,
         1,
-        // input channels
         1
-        // output channels
       );
       this.mediaStreamAudioSourceNode.connect(this.scriptProcessor);
       this.scriptProcessor.connect(this.audioContext.destination);
@@ -538,40 +543,19 @@ var BrowserAudioManager = class {
       throw err;
     }
   }
-  /**
-   * Internal method to process microphone audio data
-   */
   _processAudioInput(event) {
-    if (!this.onAudioInput || !this.audioContext || !this.isListening) return;
-    if (this.isMuted) return;
-    const inputBuffer = event.inputBuffer;
-    const inputData = inputBuffer.getChannelData(0);
-    const outputBuffer = event.outputBuffer;
-    for (let i = 0; i < outputBuffer.getChannelData(0).length; i++) {
-      outputBuffer.getChannelData(0)[i] = 0;
-    }
-    const hardwareRate = this.audioContext.sampleRate;
-    let processedData = new Float32Array(inputData);
-    if (hardwareRate !== this.inputSampleRate) {
-      processedData = resampleWithAntiAliasing(
-        processedData,
-        hardwareRate,
-        this.inputSampleRate
-      );
+    if (!this.onAudioInput || !this.audioContext || !this.isListening || this.isMuted) return;
+    const inputData = event.inputBuffer.getChannelData(0);
+    event.outputBuffer.getChannelData(0).fill(0);
+    const resampled = this.resampler ? this.resampler.process(inputData) : inputData;
+    if (resampled && resampled.length > 0) {
+      const int16Data = float32ToPcm16(resampled);
+      this.onAudioInput(new Uint8Array(int16Data.buffer, int16Data.byteOffset, int16Data.byteLength));
     }
-    const int16Data = float32ToPcm16(processedData);
-    const uint8Data = new Uint8Array(
-      int16Data.buffer,
-      int16Data.byteOffset,
-      int16Data.byteLength
-    );
-    this.onAudioInput(uint8Data);
   }
-  /**
-   * Stop capturing microphone input
-   */
   stopMicrophone() {
     this.isListening = false;
+    this.resampler = null;
     if (this.mediaStream) {
       this.mediaStream.getTracks().forEach((track) => track.stop());
       this.mediaStream = null;
@@ -584,17 +568,12 @@ var BrowserAudioManager = class {
       this.mediaStreamAudioSourceNode.disconnect();
       this.mediaStreamAudioSourceNode = null;
     }
-    console.log("\u{1F3A4} Microphone stopped");
   }
   /**
    * Play back audio received from the server
-   * @param pcm16Data Int16 PCM audio data at SPEAKER_SAMPLE_RATE
    */
   playAudio(pcm16Data) {
-    if (!this.audioContext) {
-      console.warn("AudioContext not initialized");
-      return;
-    }
+    if (!this.audioContext) return;
     const int16Array = new Int16Array(
       pcm16Data.buffer,
       pcm16Data.byteOffset,
@@ -609,18 +588,17 @@ var BrowserAudioManager = class {
     audioBuffer.getChannelData(0).set(float32Data);
     this._schedulePlayback(audioBuffer);
   }
-  /**
-   * Internal method to schedule and play audio with sample-accurate timing
-   */
   _schedulePlayback(audioBuffer) {
     if (!this.audioContext) return;
     const currentTime = this.audioContext.currentTime;
     const duration = audioBuffer.length / this.outputSampleRate;
     const startTime = Math.max(
       currentTime + 0.01,
-      // Minimum 10ms delay
       this.nextPlaybackTime
     );
+    if (this.audioClockOffset === null) {
+      this.audioClockOffset = startTime;
+    }
     this.nextPlaybackTime = startTime + duration;
     const source = this.audioContext.createBufferSource();
     source.buffer = audioBuffer;
@@ -638,8 +616,18 @@ var BrowserAudioManager = class {
     };
   }
   /**
-   * Stop all currently playing audio and clear the queue
+   * Get the current high-precision audio clock offset for viseme synchronization.
+   * Total stream time (in ms) = (audioContext.currentTime - audioClockOffset) * 1000
+   */
+  getAudioClockOffset() {
+    return this.audioClockOffset;
+  }
+  /**
+   * Reset the audio clock offset (call when a response is interrupted or finished)
    */
+  resetAudioClock() {
+    this.audioClockOffset = null;
+  }
   stopPlayback() {
     this.activeSources.forEach((source) => {
       try {
@@ -648,26 +636,15 @@ var BrowserAudioManager = class {
       }
     });
     this.activeSources = [];
-    this.playbackQueue = [];
-    this.nextPlaybackTime = this.audioContext?.currentTime ?? 0;
-    console.log("\u{1F507} Playback stopped");
+    this.nextPlaybackTime = 0;
+    this.resetAudioClock();
   }
-  /**
-   * Toggle mute state
-   */
   setMuted(muted) {
     this.isMuted = muted;
   }
-  /**
-   * Get current mute state
-   */
   isMicMuted() {
     return this.isMuted;
   }
-  /**
-   * Get current amplitude from analyser (for visualization)
-   * Returns value between 0 and 1
-   */
   getAmplitude() {
     if (!this.analyserNode) return 0;
     const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
@@ -675,31 +652,18 @@ var BrowserAudioManager = class {
     const rms = calculateRMS(dataArray);
     return Math.min(rms * 10, 1);
   }
-  /**
-   * Get frequency data from analyser for visualization
-   */
   getFrequencyData() {
-    if (!this.analyserNode) {
-      return new Uint8Array(0);
-    }
+    if (!this.analyserNode) return new Uint8Array(0);
     const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
     this.analyserNode.getByteFrequencyData(dataArray);
     return dataArray;
   }
-  /**
-   * Get time-domain data from analyser for waveform visualization
-   */
   getWaveformData() {
-    if (!this.analyserNode) {
-      return new Uint8Array(0);
-    }
+    if (!this.analyserNode) return new Uint8Array(0);
     const dataArray = new Uint8Array(this.analyserNode.frequencyBinCount);
     this.analyserNode.getByteTimeDomainData(dataArray);
     return dataArray;
   }
-  /**
-   * Cleanup and close AudioContext
-   */
   cleanup() {
     this.stopMicrophone();
     this.stopPlayback();
@@ -708,17 +672,124 @@ var BrowserAudioManager = class {
       this.analyserNode = null;
     }
   }
+  getAudioContext() {
+    return this.audioContext;
+  }
+};
+// src/voice-agent.ts
+var VoiceAgent = class {
+  client;
+  audioManager;
+  options;
+  isConnected = false;
+  visemeQueue = [];
+  constructor(options) {
+    this.options = options;
+    this.client = new VoiceAgentClient({
+      apiKey: options.apiKey,
+      prompt: options.prompt || "You are a helpful and friendly AI assistant.",
+      voice: options.voice || "F1" /* F1 */,
+      language: options.language || "en" /* ENGLISH */,
+      visemes: options.visemes ?? true,
+      serverUrl: options.serverUrl,
+      onTranscription: (text) => {
+        if (options.onTranscription) options.onTranscription(text, true);
+      },
+      onResponse: (text) => {
+        if (options.onTranscription) options.onTranscription(text, false);
+      },
+      onAudio: (data) => {
+        this.audioManager.playAudio(data);
+      },
+      onVisemes: (visemes) => {
+        this.visemeQueue.push(...visemes);
+        if (options.onVisemes) options.onVisemes(visemes);
+      },
+      onStatus: (status) => {
+        if (options.onStatusChange) options.onStatusChange(status);
+        if (status === "interrupted" || status === "thinking") {
+          this.audioManager.stopPlayback();
+          this.visemeQueue = [];
+        }
+      },
+      onError: (err) => {
+        if (options.onError) options.onError(err);
+      }
+    });
+    this.audioManager = new BrowserAudioManager({
+      autoGainControl: true,
+      echoCancellation: true,
+      noiseSuppression: true
+    });
+  }
   /**
-   * Get current audio context state
+   * Initialize hardware and connect to the AI server.
+   * This must be called in response to a user guesture (like a click)
+   * to satisfy browser AudioContext requirements.
    */
-  getState() {
-    return this.audioContext?.state ?? null;
+  async connect() {
+    try {
+      await this.audioManager.init();
+      const connected = await this.client.connect();
+      if (!connected) return false;
+      this.isConnected = true;
+      await this.audioManager.startMicrophone((pcm16Data) => {
+        if (this.isConnected) {
+          this.client.sendAudio(pcm16Data);
+        }
+      });
+      return true;
+    } catch (err) {
+      if (this.options.onError) this.options.onError(err);
+      return false;
+    }
   }
   /**
-   * Check if microphone is currently listening
+   * Get the current amplitude/volume of the microphone or output audio.
+   * Useful for voice activity visualization.
+   * @returns value between 0 and 1
    */
-  isRecording() {
-    return this.isListening;
+  getAmplitude() {
+    return this.audioManager.getAmplitude();
+  }
+  /**
+   * Mute or unmute the microphone.
+   */
+  toggleMute() {
+    const currentState = this.audioManager.isMicMuted();
+    this.audioManager.setMuted(!currentState);
+    return !currentState;
+  }
+  /**
+   * High-precision method to get visemes that should be active
+   * at the current playback frame. Use this in a requestAnimationFrame loop.
+   */
+  getFrameVisemes() {
+    const offset = this.audioManager.getAudioClockOffset();
+    const audioCtx = this.audioManager.getAudioContext();
+    if (offset === null || !audioCtx) return [];
+    const streamTime = (audioCtx.currentTime - offset) * 1e3;
+    const currentBatch = [];
+    while (this.visemeQueue.length > 0 && this.visemeQueue[0].t * 1e3 <= streamTime) {
+      currentBatch.push(this.visemeQueue.shift());
+    }
+    return currentBatch;
+  }
+  /**
+   * Change the system prompt mid-conversation.
+   */
+  updatePrompt(newPrompt) {
+    this.client.updatePrompt(newPrompt);
+  }
+  /**
+   * Disconnect and release audio resources.
+   */
+  disconnect() {
+    this.isConnected = false;
+    this.client.disconnect();
+    this.audioManager.cleanup();
+    this.visemeQueue = [];
   }
 };
 export {
@@ -728,6 +799,7 @@ export {
   Language,
   StreamResampler,
   TTSClient,
+  VoiceAgent,
   VoiceAgentClient,
   VoiceStyle,
   applyLowPassFilter,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lokutor/sdk",
-  "version": "1.1.7",
+  "version": "1.1.9",
   "description": "JavaScript/TypeScript SDK for Lokutor Real-time Voice AI",
   "main": "./dist/index.js",
   "module": "./dist/index.mjs",