npm - @lokutor/sdk - Versions diffs - 1.1.15 → 1.1.17 - Mend

@lokutor/sdk 1.1.15 → 1.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/{chunk-SNNPJP5R.mjs → chunk-UI24THO7.mjs} +2 -0
package/dist/index.d.mts +35 -3
package/dist/index.d.ts +35 -3
package/dist/index.js +277 -51
package/dist/index.mjs +116 -17
package/dist/{node-audio.mjs → node-audio-5HOWE6MC.mjs} +25 -19
package/package.json +3 -17
package/dist/node-audio.d.mts +0 -25
package/dist/node-audio.d.ts +0 -25
package/dist/node-audio.js +0 -132

package/dist/{chunk-SNNPJP5R.mjs → chunk-UI24THO7.mjs} RENAMED Viewed

@@ -22,7 +22,9 @@ var Language = /* @__PURE__ */ ((Language2) => {
 })(Language || {});
 var AUDIO_CONFIG = {
   SAMPLE_RATE: 16e3,
+  SAMPLE_RATE_INPUT: 16e3,
   SPEAKER_SAMPLE_RATE: 44100,
+  SAMPLE_RATE_OUTPUT: 44100,
   CHANNELS: 1,
   CHUNK_DURATION_MS: 20,
   get CHUNK_SIZE() {

package/dist/index.d.mts CHANGED Viewed

@@ -28,7 +28,9 @@ declare enum Language {
  */
 declare const AUDIO_CONFIG: {
     SAMPLE_RATE: number;
+    SAMPLE_RATE_INPUT: number;
     SPEAKER_SAMPLE_RATE: number;
+    SAMPLE_RATE_OUTPUT: number;
     CHANNELS: number;
     CHUNK_DURATION_MS: number;
     readonly CHUNK_SIZE: number;
@@ -120,6 +122,20 @@ interface ToolCall {
     arguments: string;
 }
+/**
+ * Interface for audio hardware management (Browser/Node parity)
+ */
+interface AudioManager {
+    init(): Promise<void>;
+    startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
+    stopMicrophone(): void;
+    playAudio(pcm16Data: Uint8Array): void;
+    stopPlayback(): void;
+    cleanup(): void;
+    isMicMuted(): boolean;
+    setMuted(muted: boolean): void;
+    getAmplitude(): number;
+}
 /**
  * Main client for Lokutor Voice Agent SDK
  *
@@ -145,6 +161,7 @@ declare class VoiceAgentClient {
     private audioManager;
     private enableAudio;
     private currentGeneration;
+    private listeners;
     private isUserDisconnect;
     private reconnecting;
     private reconnectAttempts;
@@ -160,8 +177,16 @@ declare class VoiceAgentClient {
     });
     /**
      * Connect to the Lokutor Voice Agent server
+     * @param customAudioManager Optional replacement for the default audio hardware handler
+     */
+    connect(customAudioManager?: AudioManager): Promise<boolean>;
+    /**
+     * The "Golden Path" - Starts a managed session with hardware handled automatically.
+     * This is the recommended way to start a conversation in both Browser and Node.js.
      */
-    connect(): Promise<boolean>;
+    startManaged(config?: {
+        audioManager?: AudioManager;
+    }): Promise<this>;
     /**
      * Send initial configuration to the server
      */
@@ -179,7 +204,13 @@ declare class VoiceAgentClient {
      * Handle incoming text messages (metadata/transcriptions)
      */
     private handleTextMessage;
-    private audioListeners;
+    /**
+     * Register an event listener (for Python parity)
+     */
+    on(event: string, callback: Function): this;
+    /**
+     * Internal emitter for all events
+     */
     private emit;
     onAudio(callback: (data: Uint8Array) => void): void;
     onVisemes(callback: (visemes: Viseme[]) => void): void;
@@ -236,6 +267,7 @@ declare class TTSClient {
         visemes?: boolean;
         onAudio?: (data: Uint8Array) => void;
         onVisemes?: (visemes: any[]) => void;
+        onTTFB?: (ms: number) => void;
         onError?: (error: any) => void;
     }): Promise<void>;
 }
@@ -443,4 +475,4 @@ declare class BrowserAudioManager {
     isRecording(): boolean;
 }
-export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
+export { AUDIO_CONFIG, type AnalyserConfig, type AudioManager, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };

package/dist/index.d.ts CHANGED Viewed

@@ -28,7 +28,9 @@ declare enum Language {
  */
 declare const AUDIO_CONFIG: {
     SAMPLE_RATE: number;
+    SAMPLE_RATE_INPUT: number;
     SPEAKER_SAMPLE_RATE: number;
+    SAMPLE_RATE_OUTPUT: number;
     CHANNELS: number;
     CHUNK_DURATION_MS: number;
     readonly CHUNK_SIZE: number;
@@ -120,6 +122,20 @@ interface ToolCall {
     arguments: string;
 }
+/**
+ * Interface for audio hardware management (Browser/Node parity)
+ */
+interface AudioManager {
+    init(): Promise<void>;
+    startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
+    stopMicrophone(): void;
+    playAudio(pcm16Data: Uint8Array): void;
+    stopPlayback(): void;
+    cleanup(): void;
+    isMicMuted(): boolean;
+    setMuted(muted: boolean): void;
+    getAmplitude(): number;
+}
 /**
  * Main client for Lokutor Voice Agent SDK
  *
@@ -145,6 +161,7 @@ declare class VoiceAgentClient {
     private audioManager;
     private enableAudio;
     private currentGeneration;
+    private listeners;
     private isUserDisconnect;
     private reconnecting;
     private reconnectAttempts;
@@ -160,8 +177,16 @@ declare class VoiceAgentClient {
     });
     /**
      * Connect to the Lokutor Voice Agent server
+     * @param customAudioManager Optional replacement for the default audio hardware handler
+     */
+    connect(customAudioManager?: AudioManager): Promise<boolean>;
+    /**
+     * The "Golden Path" - Starts a managed session with hardware handled automatically.
+     * This is the recommended way to start a conversation in both Browser and Node.js.
      */
-    connect(): Promise<boolean>;
+    startManaged(config?: {
+        audioManager?: AudioManager;
+    }): Promise<this>;
     /**
      * Send initial configuration to the server
      */
@@ -179,7 +204,13 @@ declare class VoiceAgentClient {
      * Handle incoming text messages (metadata/transcriptions)
      */
     private handleTextMessage;
-    private audioListeners;
+    /**
+     * Register an event listener (for Python parity)
+     */
+    on(event: string, callback: Function): this;
+    /**
+     * Internal emitter for all events
+     */
     private emit;
     onAudio(callback: (data: Uint8Array) => void): void;
     onVisemes(callback: (visemes: Viseme[]) => void): void;
@@ -236,6 +267,7 @@ declare class TTSClient {
         visemes?: boolean;
         onAudio?: (data: Uint8Array) => void;
         onVisemes?: (visemes: any[]) => void;
+        onTTFB?: (ms: number) => void;
         onError?: (error: any) => void;
     }): Promise<void>;
 }
@@ -443,4 +475,4 @@ declare class BrowserAudioManager {
     isRecording(): boolean;
 }
-export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
+export { AUDIO_CONFIG, type AnalyserConfig, type AudioManager, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };

package/dist/index.js CHANGED Viewed

@@ -1,8 +1,13 @@
 "use strict";
+var __create = Object.create;
 var __defProp = Object.defineProperty;
 var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
 var __getOwnPropNames = Object.getOwnPropertyNames;
+var __getProtoOf = Object.getPrototypeOf;
 var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __esm = (fn, res) => function __init() {
+  return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
+};
 var __export = (target, all) => {
   for (var name in all)
     __defProp(target, name, { get: all[name], enumerable: true });
@@ -15,8 +20,159 @@ var __copyProps = (to, from, except, desc) => {
   }
   return to;
 };
+var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
+  // If the importer is in node compatibility mode or this is not an ESM
+  // file that has been converted to a CommonJS file using a Babel-
+  // compatible transform (i.e. "__esModule" has not been set), then set
+  // "default" to the CommonJS "module.exports" for node compatibility.
+  isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
+  mod
+));
 var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
+// src/types.ts
+var VoiceStyle, Language, AUDIO_CONFIG, DEFAULT_URLS;
+var init_types = __esm({
+  "src/types.ts"() {
+    "use strict";
+    VoiceStyle = /* @__PURE__ */ ((VoiceStyle2) => {
+      VoiceStyle2["F1"] = "F1";
+      VoiceStyle2["F2"] = "F2";
+      VoiceStyle2["F3"] = "F3";
+      VoiceStyle2["F4"] = "F4";
+      VoiceStyle2["F5"] = "F5";
+      VoiceStyle2["M1"] = "M1";
+      VoiceStyle2["M2"] = "M2";
+      VoiceStyle2["M3"] = "M3";
+      VoiceStyle2["M4"] = "M4";
+      VoiceStyle2["M5"] = "M5";
+      return VoiceStyle2;
+    })(VoiceStyle || {});
+    Language = /* @__PURE__ */ ((Language2) => {
+      Language2["ENGLISH"] = "en";
+      Language2["SPANISH"] = "es";
+      Language2["FRENCH"] = "fr";
+      Language2["PORTUGUESE"] = "pt";
+      Language2["KOREAN"] = "ko";
+      return Language2;
+    })(Language || {});
+    AUDIO_CONFIG = {
+      SAMPLE_RATE: 16e3,
+      SAMPLE_RATE_INPUT: 16e3,
+      SPEAKER_SAMPLE_RATE: 44100,
+      SAMPLE_RATE_OUTPUT: 44100,
+      CHANNELS: 1,
+      CHUNK_DURATION_MS: 20,
+      get CHUNK_SIZE() {
+        return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
+      }
+    };
+    DEFAULT_URLS = {
+      VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
+      TTS: "wss://api.lokutor.com/ws/tts"
+    };
+  }
+});
+// src/node-audio.ts
+var node_audio_exports = {};
+__export(node_audio_exports, {
+  NodeAudioManager: () => NodeAudioManager
+});
+var NodeAudioManager;
+var init_node_audio = __esm({
+  "src/node-audio.ts"() {
+    "use strict";
+    init_types();
+    NodeAudioManager = class {
+      speaker = null;
+      recorder = null;
+      recordingStream = null;
+      isMuted = false;
+      isListening = false;
+      constructor() {
+      }
+      async init() {
+        try {
+          const Speaker = await import("speaker").catch(() => null);
+          if (!Speaker) {
+            console.warn('\u26A0\uFE0F  Package "speaker" is missing. Hardware output will be disabled.');
+            console.warn("\u{1F449} Run: npm install speaker");
+          }
+        } catch (e) {
+          console.error("Error initializing Node audio:", e);
+        }
+      }
+      async startMicrophone(onAudioInput) {
+        if (this.isListening) return;
+        try {
+          const recorder = await import("node-record-lpcm16").catch(() => null);
+          if (!recorder) {
+            throw new Error('Package "node-record-lpcm16" is missing. Microphone input failed.\n\u{1F449} Run: npm install node-record-lpcm16');
+          }
+          console.log("\u{1F3A4} Starting microphone (Node.js)...");
+          this.recordingStream = recorder.record({
+            sampleRate: AUDIO_CONFIG.SAMPLE_RATE,
+            threshold: 0,
+            verbose: false,
+            recordProgram: "sox"
+            // default
+          });
+          this.recordingStream.stream().on("data", (chunk) => {
+            if (!this.isMuted && onAudioInput) {
+              onAudioInput(new Uint8Array(chunk));
+            }
+          });
+          this.isListening = true;
+        } catch (e) {
+          console.error("Failed to start microphone:", e.message);
+          throw e;
+        }
+      }
+      stopMicrophone() {
+        if (this.recordingStream) {
+          this.recordingStream.stop();
+          this.recordingStream = null;
+        }
+        this.isListening = false;
+      }
+      async playAudio(pcm16Data) {
+        try {
+          if (!this.speaker) {
+            const Speaker = (await import("speaker")).default;
+            this.speaker = new Speaker({
+              channels: AUDIO_CONFIG.CHANNELS,
+              bitDepth: 16,
+              sampleRate: AUDIO_CONFIG.SPEAKER_SAMPLE_RATE
+            });
+          }
+          this.speaker.write(Buffer.from(pcm16Data));
+        } catch (e) {
+        }
+      }
+      stopPlayback() {
+        if (this.speaker) {
+          this.speaker.end();
+          this.speaker = null;
+        }
+      }
+      cleanup() {
+        this.stopMicrophone();
+        this.stopPlayback();
+      }
+      isMicMuted() {
+        return this.isMuted;
+      }
+      setMuted(muted) {
+        this.isMuted = muted;
+      }
+      getAmplitude() {
+        return 0;
+      }
+    };
+  }
+});
 // src/index.ts
 var index_exports = {};
 __export(index_exports, {
@@ -41,42 +197,13 @@ __export(index_exports, {
   simpleTTS: () => simpleTTS
 });
 module.exports = __toCommonJS(index_exports);
+init_types();
-// src/types.ts
-var VoiceStyle = /* @__PURE__ */ ((VoiceStyle2) => {
-  VoiceStyle2["F1"] = "F1";
-  VoiceStyle2["F2"] = "F2";
-  VoiceStyle2["F3"] = "F3";
-  VoiceStyle2["F4"] = "F4";
-  VoiceStyle2["F5"] = "F5";
-  VoiceStyle2["M1"] = "M1";
-  VoiceStyle2["M2"] = "M2";
-  VoiceStyle2["M3"] = "M3";
-  VoiceStyle2["M4"] = "M4";
-  VoiceStyle2["M5"] = "M5";
-  return VoiceStyle2;
-})(VoiceStyle || {});
-var Language = /* @__PURE__ */ ((Language2) => {
-  Language2["ENGLISH"] = "en";
-  Language2["SPANISH"] = "es";
-  Language2["FRENCH"] = "fr";
-  Language2["PORTUGUESE"] = "pt";
-  Language2["KOREAN"] = "ko";
-  return Language2;
-})(Language || {});
-var AUDIO_CONFIG = {
-  SAMPLE_RATE: 16e3,
-  SPEAKER_SAMPLE_RATE: 44100,
-  CHANNELS: 1,
-  CHUNK_DURATION_MS: 20,
-  get CHUNK_SIZE() {
-    return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
-  }
-};
-var DEFAULT_URLS = {
-  VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
-  TTS: "wss://api.lokutor.com/ws/tts"
-};
+// src/client.ts
+init_types();
+// src/browser-audio.ts
+init_types();
 // src/audio-utils.ts
 function pcm16ToFloat32(int16Data) {
@@ -521,6 +648,7 @@ var VoiceAgentClient = class {
   audioManager = null;
   enableAudio = false;
   currentGeneration = 0;
+  listeners = {};
   // Connection resilience
   isUserDisconnect = false;
   reconnecting = false;
@@ -543,14 +671,19 @@ var VoiceAgentClient = class {
   }
   /**
    * Connect to the Lokutor Voice Agent server
+   * @param customAudioManager Optional replacement for the default audio hardware handler
    */
-  async connect() {
+  async connect(customAudioManager) {
     this.isUserDisconnect = false;
-    if (this.enableAudio) {
-      if (!this.audioManager) {
+    if (this.enableAudio || customAudioManager) {
+      if (customAudioManager) {
+        this.audioManager = customAudioManager;
+      } else if (!this.audioManager && typeof window !== "undefined") {
         this.audioManager = new BrowserAudioManager();
       }
-      await this.audioManager.init();
+      if (this.audioManager) {
+        await this.audioManager.init();
+      }
     }
     return new Promise((resolve, reject) => {
       try {
@@ -611,6 +744,34 @@ var VoiceAgentClient = class {
       }
     });
   }
+  /**
+   * The "Golden Path" - Starts a managed session with hardware handled automatically.
+   * This is the recommended way to start a conversation in both Browser and Node.js.
+   */
+  async startManaged(config) {
+    this.enableAudio = true;
+    if (config?.audioManager) {
+      this.audioManager = config.audioManager;
+    } else if (!this.audioManager) {
+      if (typeof window !== "undefined") {
+        this.audioManager = new BrowserAudioManager();
+      } else {
+        try {
+          const { NodeAudioManager: NodeAudioManager2 } = await Promise.resolve().then(() => (init_node_audio(), node_audio_exports));
+          this.audioManager = new NodeAudioManager2();
+        } catch (e) {
+          console.error('\u274C Failed to load NodeAudioManager. Please ensure "speaker" and "node-record-lpcm16" are installed.');
+        }
+      }
+    }
+    await this.connect();
+    if (this.audioManager && this.isConnected) {
+      await this.audioManager.startMicrophone((data) => {
+        this.sendAudio(data);
+      });
+    }
+    return this;
+  }
   /**
    * Send initial configuration to the server
    */
@@ -712,21 +873,51 @@ var VoiceAgentClient = class {
     } catch (e) {
     }
   }
-  audioListeners = [];
-  emit(event, data) {
-    if (event === "audio") {
-      if (this.onAudioCallback) this.onAudioCallback(data);
-      this.audioListeners.forEach((l) => l(data));
-    } else if (event === "visemes") {
-      if (this.onVisemesCallback) this.onVisemesCallback(data);
-      this.visemeListeners.forEach((l) => l(data));
+  /**
+   * Register an event listener (for Python parity)
+   */
+  on(event, callback) {
+    if (!this.listeners[event]) {
+      this.listeners[event] = [];
+    }
+    this.listeners[event].push(callback);
+    return this;
+  }
+  /**
+   * Internal emitter for all events
+   */
+  emit(event, ...args) {
+    const legacyMap = {
+      "transcription": "onTranscription",
+      "response": "onResponse",
+      "audio": "onAudioCallback",
+      "visemes": "onVisemesCallback",
+      "status": "onStatus",
+      "error": "onError"
+    };
+    const legacyKey = legacyMap[event];
+    if (legacyKey && this[legacyKey]) {
+      try {
+        this[legacyKey](...args);
+      } catch (e) {
+        console.error(`Error in legacy callback ${legacyKey}:`, e);
+      }
+    }
+    if (this.listeners[event]) {
+      this.listeners[event].forEach((cb) => {
+        try {
+          cb(...args);
+        } catch (e) {
+          console.error(`Error in listener for ${event}:`, e);
+        }
+      });
     }
   }
   onAudio(callback) {
-    this.audioListeners.push(callback);
+    this.on("audio", callback);
   }
   onVisemes(callback) {
-    this.visemeListeners.push(callback);
+    this.on("visemes", callback);
   }
   /**
    * Disconnect from the server
@@ -805,15 +996,28 @@ var TTSClient = class {
    */
   synthesize(options) {
     return new Promise((resolve, reject) => {
+      let activityTimeout;
+      let ws;
+      let startTime;
+      let firstByteReceived = false;
+      const refreshTimeout = () => {
+        if (activityTimeout) clearTimeout(activityTimeout);
+        activityTimeout = setTimeout(() => {
+          console.log("\u23F1\uFE0F TTS synthesis reached inactivity timeout (2s) - resolving");
+          if (ws) ws.close();
+          resolve();
+        }, 2e3);
+      };
       try {
         let url = DEFAULT_URLS.TTS;
         if (this.apiKey) {
           const separator = url.includes("?") ? "&" : "?";
           url += `${separator}api_key=${this.apiKey}`;
         }
-        const ws = new WebSocket(url);
+        ws = new WebSocket(url);
         ws.binaryType = "arraybuffer";
         ws.onopen = () => {
+          refreshTimeout();
           const req = {
             text: options.text,
             voice: options.voice || "F1" /* F1 */,
@@ -823,28 +1027,50 @@ var TTSClient = class {
             visemes: options.visemes || false
           };
           ws.send(JSON.stringify(req));
+          startTime = Date.now();
         };
         ws.onmessage = async (event) => {
+          refreshTimeout();
           if (event.data instanceof ArrayBuffer) {
+            if (!firstByteReceived) {
+              const ttfb = Date.now() - startTime;
+              if (options.onTTFB) options.onTTFB(ttfb);
+              firstByteReceived = true;
+            }
             if (options.onAudio) options.onAudio(new Uint8Array(event.data));
           } else {
+            const text = event.data.toString();
+            if (text === "EOS") {
+              if (activityTimeout) clearTimeout(activityTimeout);
+              ws.close();
+              resolve();
+              return;
+            }
             try {
-              const msg = JSON.parse(event.data.toString());
+              const msg = JSON.parse(text);
               if (Array.isArray(msg) && options.onVisemes) {
                 options.onVisemes(msg);
               }
+              if (msg.type === "eos") {
+                if (activityTimeout) clearTimeout(activityTimeout);
+                ws.close();
+                resolve();
+              }
             } catch (e) {
             }
           }
         };
         ws.onerror = (err) => {
+          if (activityTimeout) clearTimeout(activityTimeout);
           if (options.onError) options.onError(err);
           reject(err);
         };
         ws.onclose = () => {
+          if (activityTimeout) clearTimeout(activityTimeout);
           resolve();
         };
       } catch (err) {
+        if (activityTimeout) clearTimeout(activityTimeout);
         if (options.onError) options.onError(err);
         reject(err);
       }

package/dist/index.mjs CHANGED Viewed

@@ -3,7 +3,7 @@ import {
   DEFAULT_URLS,
   Language,
   VoiceStyle
-} from "./chunk-SNNPJP5R.mjs";
+} from "./chunk-UI24THO7.mjs";
 // src/audio-utils.ts
 function pcm16ToFloat32(int16Data) {
@@ -448,6 +448,7 @@ var VoiceAgentClient = class {
   audioManager = null;
   enableAudio = false;
   currentGeneration = 0;
+  listeners = {};
   // Connection resilience
   isUserDisconnect = false;
   reconnecting = false;
@@ -470,14 +471,19 @@ var VoiceAgentClient = class {
   }
   /**
    * Connect to the Lokutor Voice Agent server
+   * @param customAudioManager Optional replacement for the default audio hardware handler
    */
-  async connect() {
+  async connect(customAudioManager) {
     this.isUserDisconnect = false;
-    if (this.enableAudio) {
-      if (!this.audioManager) {
+    if (this.enableAudio || customAudioManager) {
+      if (customAudioManager) {
+        this.audioManager = customAudioManager;
+      } else if (!this.audioManager && typeof window !== "undefined") {
         this.audioManager = new BrowserAudioManager();
       }
-      await this.audioManager.init();
+      if (this.audioManager) {
+        await this.audioManager.init();
+      }
     }
     return new Promise((resolve, reject) => {
       try {
@@ -538,6 +544,34 @@ var VoiceAgentClient = class {
       }
     });
   }
+  /**
+   * The "Golden Path" - Starts a managed session with hardware handled automatically.
+   * This is the recommended way to start a conversation in both Browser and Node.js.
+   */
+  async startManaged(config) {
+    this.enableAudio = true;
+    if (config?.audioManager) {
+      this.audioManager = config.audioManager;
+    } else if (!this.audioManager) {
+      if (typeof window !== "undefined") {
+        this.audioManager = new BrowserAudioManager();
+      } else {
+        try {
+          const { NodeAudioManager } = await import("./node-audio-5HOWE6MC.mjs");
+          this.audioManager = new NodeAudioManager();
+        } catch (e) {
+          console.error('\u274C Failed to load NodeAudioManager. Please ensure "speaker" and "node-record-lpcm16" are installed.');
+        }
+      }
+    }
+    await this.connect();
+    if (this.audioManager && this.isConnected) {
+      await this.audioManager.startMicrophone((data) => {
+        this.sendAudio(data);
+      });
+    }
+    return this;
+  }
   /**
    * Send initial configuration to the server
    */
@@ -639,21 +673,51 @@ var VoiceAgentClient = class {
     } catch (e) {
     }
   }
-  audioListeners = [];
-  emit(event, data) {
-    if (event === "audio") {
-      if (this.onAudioCallback) this.onAudioCallback(data);
-      this.audioListeners.forEach((l) => l(data));
-    } else if (event === "visemes") {
-      if (this.onVisemesCallback) this.onVisemesCallback(data);
-      this.visemeListeners.forEach((l) => l(data));
+  /**
+   * Register an event listener (for Python parity)
+   */
+  on(event, callback) {
+    if (!this.listeners[event]) {
+      this.listeners[event] = [];
+    }
+    this.listeners[event].push(callback);
+    return this;
+  }
+  /**
+   * Internal emitter for all events
+   */
+  emit(event, ...args) {
+    const legacyMap = {
+      "transcription": "onTranscription",
+      "response": "onResponse",
+      "audio": "onAudioCallback",
+      "visemes": "onVisemesCallback",
+      "status": "onStatus",
+      "error": "onError"
+    };
+    const legacyKey = legacyMap[event];
+    if (legacyKey && this[legacyKey]) {
+      try {
+        this[legacyKey](...args);
+      } catch (e) {
+        console.error(`Error in legacy callback ${legacyKey}:`, e);
+      }
+    }
+    if (this.listeners[event]) {
+      this.listeners[event].forEach((cb) => {
+        try {
+          cb(...args);
+        } catch (e) {
+          console.error(`Error in listener for ${event}:`, e);
+        }
+      });
     }
   }
   onAudio(callback) {
-    this.audioListeners.push(callback);
+    this.on("audio", callback);
   }
   onVisemes(callback) {
-    this.visemeListeners.push(callback);
+    this.on("visemes", callback);
   }
   /**
    * Disconnect from the server
@@ -732,15 +796,28 @@ var TTSClient = class {
    */
   synthesize(options) {
     return new Promise((resolve, reject) => {
+      let activityTimeout;
+      let ws;
+      let startTime;
+      let firstByteReceived = false;
+      const refreshTimeout = () => {
+        if (activityTimeout) clearTimeout(activityTimeout);
+        activityTimeout = setTimeout(() => {
+          console.log("\u23F1\uFE0F TTS synthesis reached inactivity timeout (2s) - resolving");
+          if (ws) ws.close();
+          resolve();
+        }, 2e3);
+      };
       try {
         let url = DEFAULT_URLS.TTS;
         if (this.apiKey) {
           const separator = url.includes("?") ? "&" : "?";
           url += `${separator}api_key=${this.apiKey}`;
         }
-        const ws = new WebSocket(url);
+        ws = new WebSocket(url);
         ws.binaryType = "arraybuffer";
         ws.onopen = () => {
+          refreshTimeout();
           const req = {
             text: options.text,
             voice: options.voice || "F1" /* F1 */,
@@ -750,28 +827,50 @@ var TTSClient = class {
             visemes: options.visemes || false
           };
           ws.send(JSON.stringify(req));
+          startTime = Date.now();
         };
         ws.onmessage = async (event) => {
+          refreshTimeout();
           if (event.data instanceof ArrayBuffer) {
+            if (!firstByteReceived) {
+              const ttfb = Date.now() - startTime;
+              if (options.onTTFB) options.onTTFB(ttfb);
+              firstByteReceived = true;
+            }
             if (options.onAudio) options.onAudio(new Uint8Array(event.data));
           } else {
+            const text = event.data.toString();
+            if (text === "EOS") {
+              if (activityTimeout) clearTimeout(activityTimeout);
+              ws.close();
+              resolve();
+              return;
+            }
             try {
-              const msg = JSON.parse(event.data.toString());
+              const msg = JSON.parse(text);
               if (Array.isArray(msg) && options.onVisemes) {
                 options.onVisemes(msg);
               }
+              if (msg.type === "eos") {
+                if (activityTimeout) clearTimeout(activityTimeout);
+                ws.close();
+                resolve();
+              }
             } catch (e) {
             }
           }
         };
         ws.onerror = (err) => {
+          if (activityTimeout) clearTimeout(activityTimeout);
           if (options.onError) options.onError(err);
           reject(err);
         };
         ws.onclose = () => {
+          if (activityTimeout) clearTimeout(activityTimeout);
           resolve();
         };
       } catch (err) {
+        if (activityTimeout) clearTimeout(activityTimeout);
         if (options.onError) options.onError(err);
         reject(err);
       }

package/dist/{node-audio.mjs → node-audio-5HOWE6MC.mjs} RENAMED Viewed

@@ -1,6 +1,6 @@
 import {
   AUDIO_CONFIG
-} from "./chunk-SNNPJP5R.mjs";
+} from "./chunk-UI24THO7.mjs";
 // src/node-audio.ts
 var NodeAudioManager = class {
@@ -16,7 +16,7 @@ var NodeAudioManager = class {
       const Speaker = await import("speaker").catch(() => null);
       if (!Speaker) {
         console.warn('\u26A0\uFE0F  Package "speaker" is missing. Hardware output will be disabled.');
-        console.warn("\u{1F449}  Run: npm install speaker");
+        console.warn("\u{1F449} Run: npm install speaker");
       }
     } catch (e) {
       console.error("Error initializing Node audio:", e);
@@ -24,23 +24,29 @@ var NodeAudioManager = class {
   }
   async startMicrophone(onAudioInput) {
     if (this.isListening) return;
-    const recorder = await import("node-record-lpcm16").catch(() => null);
-    if (!recorder) {
-      throw new Error('Package "node-record-lpcm16" is missing. Microphone input failed.\n\u{1F449} Run: npm install node-record-lpcm16');
-    }
-    this.recorder = recorder;
-    this.recordingStream = recorder.record({
-      sampleRate: AUDIO_CONFIG.SAMPLE_RATE,
-      threshold: 0,
-      verbose: false,
-      recordProgram: "sox"
-    });
-    this.recordingStream.stream().on("data", (chunk) => {
-      if (!this.isMuted && onAudioInput) {
-        onAudioInput(new Uint8Array(chunk));
+    try {
+      const recorder = await import("node-record-lpcm16").catch(() => null);
+      if (!recorder) {
+        throw new Error('Package "node-record-lpcm16" is missing. Microphone input failed.\n\u{1F449} Run: npm install node-record-lpcm16');
       }
-    });
-    this.isListening = true;
+      console.log("\u{1F3A4} Starting microphone (Node.js)...");
+      this.recordingStream = recorder.record({
+        sampleRate: AUDIO_CONFIG.SAMPLE_RATE,
+        threshold: 0,
+        verbose: false,
+        recordProgram: "sox"
+        // default
+      });
+      this.recordingStream.stream().on("data", (chunk) => {
+        if (!this.isMuted && onAudioInput) {
+          onAudioInput(new Uint8Array(chunk));
+        }
+      });
+      this.isListening = true;
+    } catch (e) {
+      console.error("Failed to start microphone:", e.message);
+      throw e;
+    }
   }
   stopMicrophone() {
     if (this.recordingStream) {
@@ -60,7 +66,7 @@ var NodeAudioManager = class {
         });
       }
       this.speaker.write(Buffer.from(pcm16Data));
-    } catch {
+    } catch (e) {
     }
   }
   stopPlayback() {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lokutor/sdk",
-  "version": "1.1.15",
+  "version": "1.1.17",
   "description": "JavaScript/TypeScript SDK for Lokutor Real-time Voice AI",
   "main": "./dist/index.js",
   "module": "./dist/index.mjs",
@@ -8,23 +8,9 @@
   "files": [
     "dist"
   ],
-  "exports": {
-    ".": {
-      "import": "./dist/index.mjs",
-      "require": "./dist/index.js"
-    },
-    "./node-audio": {
-      "import": "./dist/node-audio.mjs",
-      "require": "./dist/node-audio.js"
-    }
-  },
-  "browser": {
-    "speaker": false,
-    "node-record-lpcm16": false
-  },
   "scripts": {
-    "build": "tsup src/index.ts src/node-audio.ts --format cjs,esm --dts --clean",
-    "dev": "tsup src/index.ts src/node-audio.ts --format cjs,esm --watch --dts --clean",
+    "build": "tsup src/index.ts --format cjs,esm --dts",
+    "dev": "tsup src/index.ts --format cjs,esm --watch --dts",
     "test": "vitest run",
     "test:watch": "vitest",
     "lint": "eslint src --ext .ts",

package/dist/node-audio.d.mts DELETED Viewed

@@ -1,25 +0,0 @@
-/**
- * Node.js-only audio manager.
- *
- * This module is intentionally separate, and is not exported from the browser default
- * entrypoint, so browser bundlers do not include Node-only dependencies.
- */
-declare class NodeAudioManager {
-    private speaker;
-    private recorder;
-    private recordingStream;
-    private isMuted;
-    private isListening;
-    constructor();
-    init(): Promise<void>;
-    startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
-    stopMicrophone(): void;
-    playAudio(pcm16Data: Uint8Array): Promise<void>;
-    stopPlayback(): void;
-    cleanup(): void;
-    isMicMuted(): boolean;
-    setMuted(muted: boolean): void;
-    getAmplitude(): number;
-}
-export { NodeAudioManager };

package/dist/node-audio.d.ts DELETED Viewed

@@ -1,25 +0,0 @@
-/**
- * Node.js-only audio manager.
- *
- * This module is intentionally separate, and is not exported from the browser default
- * entrypoint, so browser bundlers do not include Node-only dependencies.
- */
-declare class NodeAudioManager {
-    private speaker;
-    private recorder;
-    private recordingStream;
-    private isMuted;
-    private isListening;
-    constructor();
-    init(): Promise<void>;
-    startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
-    stopMicrophone(): void;
-    playAudio(pcm16Data: Uint8Array): Promise<void>;
-    stopPlayback(): void;
-    cleanup(): void;
-    isMicMuted(): boolean;
-    setMuted(muted: boolean): void;
-    getAmplitude(): number;
-}
-export { NodeAudioManager };

package/dist/node-audio.js DELETED Viewed

@@ -1,132 +0,0 @@
-"use strict";
-var __create = Object.create;
-var __defProp = Object.defineProperty;
-var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
-var __getOwnPropNames = Object.getOwnPropertyNames;
-var __getProtoOf = Object.getPrototypeOf;
-var __hasOwnProp = Object.prototype.hasOwnProperty;
-var __export = (target, all) => {
-  for (var name in all)
-    __defProp(target, name, { get: all[name], enumerable: true });
-};
-var __copyProps = (to, from, except, desc) => {
-  if (from && typeof from === "object" || typeof from === "function") {
-    for (let key of __getOwnPropNames(from))
-      if (!__hasOwnProp.call(to, key) && key !== except)
-        __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
-  }
-  return to;
-};
-var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
-  // If the importer is in node compatibility mode or this is not an ESM
-  // file that has been converted to a CommonJS file using a Babel-
-  // compatible transform (i.e. "__esModule" has not been set), then set
-  // "default" to the CommonJS "module.exports" for node compatibility.
-  isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
-  mod
-));
-var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
-// src/node-audio.ts
-var node_audio_exports = {};
-__export(node_audio_exports, {
-  NodeAudioManager: () => NodeAudioManager
-});
-module.exports = __toCommonJS(node_audio_exports);
-// src/types.ts
-var AUDIO_CONFIG = {
-  SAMPLE_RATE: 16e3,
-  SPEAKER_SAMPLE_RATE: 44100,
-  CHANNELS: 1,
-  CHUNK_DURATION_MS: 20,
-  get CHUNK_SIZE() {
-    return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
-  }
-};
-// src/node-audio.ts
-var NodeAudioManager = class {
-  speaker = null;
-  recorder = null;
-  recordingStream = null;
-  isMuted = false;
-  isListening = false;
-  constructor() {
-  }
-  async init() {
-    try {
-      const Speaker = await import("speaker").catch(() => null);
-      if (!Speaker) {
-        console.warn('\u26A0\uFE0F  Package "speaker" is missing. Hardware output will be disabled.');
-        console.warn("\u{1F449}  Run: npm install speaker");
-      }
-    } catch (e) {
-      console.error("Error initializing Node audio:", e);
-    }
-  }
-  async startMicrophone(onAudioInput) {
-    if (this.isListening) return;
-    const recorder = await import("node-record-lpcm16").catch(() => null);
-    if (!recorder) {
-      throw new Error('Package "node-record-lpcm16" is missing. Microphone input failed.\n\u{1F449} Run: npm install node-record-lpcm16');
-    }
-    this.recorder = recorder;
-    this.recordingStream = recorder.record({
-      sampleRate: AUDIO_CONFIG.SAMPLE_RATE,
-      threshold: 0,
-      verbose: false,
-      recordProgram: "sox"
-    });
-    this.recordingStream.stream().on("data", (chunk) => {
-      if (!this.isMuted && onAudioInput) {
-        onAudioInput(new Uint8Array(chunk));
-      }
-    });
-    this.isListening = true;
-  }
-  stopMicrophone() {
-    if (this.recordingStream) {
-      this.recordingStream.stop();
-      this.recordingStream = null;
-    }
-    this.isListening = false;
-  }
-  async playAudio(pcm16Data) {
-    try {
-      if (!this.speaker) {
-        const Speaker = (await import("speaker")).default;
-        this.speaker = new Speaker({
-          channels: AUDIO_CONFIG.CHANNELS,
-          bitDepth: 16,
-          sampleRate: AUDIO_CONFIG.SPEAKER_SAMPLE_RATE
-        });
-      }
-      this.speaker.write(Buffer.from(pcm16Data));
-    } catch {
-    }
-  }
-  stopPlayback() {
-    if (this.speaker) {
-      this.speaker.end();
-      this.speaker = null;
-    }
-  }
-  cleanup() {
-    this.stopMicrophone();
-    this.stopPlayback();
-  }
-  isMicMuted() {
-    return this.isMuted;
-  }
-  setMuted(muted) {
-    this.isMuted = muted;
-  }
-  getAmplitude() {
-    return 0;
-  }
-};
-// Annotate the CommonJS export names for ESM import in node:
-0 && (module.exports = {
-  NodeAudioManager
-});