npm - @lokutor/sdk - Versions diffs - 1.1.11 → 1.1.13 - Mend

@lokutor/sdk 1.1.11 → 1.1.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/chunk-UI24THO7.mjs +44 -0
package/dist/index.d.mts +60 -3
package/dist/index.d.ts +60 -3
package/dist/index.js +293 -53
package/dist/index.mjs +137 -53
package/dist/node-audio-5HOWE6MC.mjs +94 -0
package/package.json +1 -1

package/dist/chunk-UI24THO7.mjs ADDED Viewed

@@ -0,0 +1,44 @@
+// src/types.ts
+var VoiceStyle = /* @__PURE__ */ ((VoiceStyle2) => {
+  VoiceStyle2["F1"] = "F1";
+  VoiceStyle2["F2"] = "F2";
+  VoiceStyle2["F3"] = "F3";
+  VoiceStyle2["F4"] = "F4";
+  VoiceStyle2["F5"] = "F5";
+  VoiceStyle2["M1"] = "M1";
+  VoiceStyle2["M2"] = "M2";
+  VoiceStyle2["M3"] = "M3";
+  VoiceStyle2["M4"] = "M4";
+  VoiceStyle2["M5"] = "M5";
+  return VoiceStyle2;
+})(VoiceStyle || {});
+var Language = /* @__PURE__ */ ((Language2) => {
+  Language2["ENGLISH"] = "en";
+  Language2["SPANISH"] = "es";
+  Language2["FRENCH"] = "fr";
+  Language2["PORTUGUESE"] = "pt";
+  Language2["KOREAN"] = "ko";
+  return Language2;
+})(Language || {});
+var AUDIO_CONFIG = {
+  SAMPLE_RATE: 16e3,
+  SAMPLE_RATE_INPUT: 16e3,
+  SPEAKER_SAMPLE_RATE: 44100,
+  SAMPLE_RATE_OUTPUT: 44100,
+  CHANNELS: 1,
+  CHUNK_DURATION_MS: 20,
+  get CHUNK_SIZE() {
+    return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
+  }
+};
+var DEFAULT_URLS = {
+  VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
+  TTS: "wss://api.lokutor.com/ws/tts"
+};
+export {
+  VoiceStyle,
+  Language,
+  AUDIO_CONFIG,
+  DEFAULT_URLS
+};

package/dist/index.d.mts CHANGED Viewed

@@ -28,7 +28,9 @@ declare enum Language {
  */
 declare const AUDIO_CONFIG: {
     SAMPLE_RATE: number;
+    SAMPLE_RATE_INPUT: number;
     SPEAKER_SAMPLE_RATE: number;
+    SAMPLE_RATE_OUTPUT: number;
     CHANNELS: number;
     CHUNK_DURATION_MS: number;
     readonly CHUNK_SIZE: number;
@@ -97,7 +99,43 @@ interface Viseme {
     c: string;
     t: number;
 }
+/**
+ * Tool definition for LLM function calling (OpenAI format)
+ */
+interface ToolDefinition {
+    type: 'function';
+    function: {
+        name: string;
+        description: string;
+        parameters: {
+            type: 'object';
+            properties: Record<string, any>;
+            required?: string[];
+        };
+    };
+}
+/**
+ * Event data for tool execution
+ */
+interface ToolCall {
+    name: string;
+    arguments: string;
+}
+/**
+ * Interface for audio hardware management (Browser/Node parity)
+ */
+interface AudioManager {
+    init(): Promise<void>;
+    startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
+    stopMicrophone(): void;
+    playAudio(pcm16Data: Uint8Array): void;
+    stopPlayback(): void;
+    cleanup(): void;
+    isMicMuted(): boolean;
+    setMuted(muted: boolean): void;
+    getAmplitude(): number;
+}
 /**
  * Main client for Lokutor Voice Agent SDK
  *
@@ -109,6 +147,7 @@ declare class VoiceAgentClient {
     prompt: string;
     voice: VoiceStyle;
     language: Language;
+    tools: ToolDefinition[];
     private onTranscription?;
     private onResponse?;
     private onAudioCallback?;
@@ -121,6 +160,8 @@ declare class VoiceAgentClient {
     private wantVisemes;
     private audioManager;
     private enableAudio;
+    private currentGeneration;
+    private listeners;
     private isUserDisconnect;
     private reconnecting;
     private reconnectAttempts;
@@ -132,11 +173,20 @@ declare class VoiceAgentClient {
         visemes?: boolean;
         onVisemes?: (visemes: Viseme[]) => void;
         enableAudio?: boolean;
+        tools?: ToolDefinition[];
     });
     /**
      * Connect to the Lokutor Voice Agent server
+     * @param customAudioManager Optional replacement for the default audio hardware handler
      */
-    connect(): Promise<boolean>;
+    connect(customAudioManager?: AudioManager): Promise<boolean>;
+    /**
+     * The "Golden Path" - Starts a managed session with hardware handled automatically.
+     * This is the recommended way to start a conversation in both Browser and Node.js.
+     */
+    startManaged(config?: {
+        audioManager?: AudioManager;
+    }): Promise<this>;
     /**
      * Send initial configuration to the server
      */
@@ -154,7 +204,13 @@ declare class VoiceAgentClient {
      * Handle incoming text messages (metadata/transcriptions)
      */
     private handleTextMessage;
-    private audioListeners;
+    /**
+     * Register an event listener (for Python parity)
+     */
+    on(event: string, callback: Function): this;
+    /**
+     * Internal emitter for all events
+     */
     private emit;
     onAudio(callback: (data: Uint8Array) => void): void;
     onVisemes(callback: (visemes: Viseme[]) => void): void;
@@ -211,6 +267,7 @@ declare class TTSClient {
         visemes?: boolean;
         onAudio?: (data: Uint8Array) => void;
         onVisemes?: (visemes: any[]) => void;
+        onTTFB?: (ms: number) => void;
         onError?: (error: any) => void;
     }): Promise<void>;
 }
@@ -418,4 +475,4 @@ declare class BrowserAudioManager {
     isRecording(): boolean;
 }
-export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
+export { AUDIO_CONFIG, type AnalyserConfig, type AudioManager, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };

package/dist/index.d.ts CHANGED Viewed

@@ -28,7 +28,9 @@ declare enum Language {
  */
 declare const AUDIO_CONFIG: {
     SAMPLE_RATE: number;
+    SAMPLE_RATE_INPUT: number;
     SPEAKER_SAMPLE_RATE: number;
+    SAMPLE_RATE_OUTPUT: number;
     CHANNELS: number;
     CHUNK_DURATION_MS: number;
     readonly CHUNK_SIZE: number;
@@ -97,7 +99,43 @@ interface Viseme {
     c: string;
     t: number;
 }
+/**
+ * Tool definition for LLM function calling (OpenAI format)
+ */
+interface ToolDefinition {
+    type: 'function';
+    function: {
+        name: string;
+        description: string;
+        parameters: {
+            type: 'object';
+            properties: Record<string, any>;
+            required?: string[];
+        };
+    };
+}
+/**
+ * Event data for tool execution
+ */
+interface ToolCall {
+    name: string;
+    arguments: string;
+}
+/**
+ * Interface for audio hardware management (Browser/Node parity)
+ */
+interface AudioManager {
+    init(): Promise<void>;
+    startMicrophone(onAudioInput: (pcm16Data: Uint8Array) => void): Promise<void>;
+    stopMicrophone(): void;
+    playAudio(pcm16Data: Uint8Array): void;
+    stopPlayback(): void;
+    cleanup(): void;
+    isMicMuted(): boolean;
+    setMuted(muted: boolean): void;
+    getAmplitude(): number;
+}
 /**
  * Main client for Lokutor Voice Agent SDK
  *
@@ -109,6 +147,7 @@ declare class VoiceAgentClient {
     prompt: string;
     voice: VoiceStyle;
     language: Language;
+    tools: ToolDefinition[];
     private onTranscription?;
     private onResponse?;
     private onAudioCallback?;
@@ -121,6 +160,8 @@ declare class VoiceAgentClient {
     private wantVisemes;
     private audioManager;
     private enableAudio;
+    private currentGeneration;
+    private listeners;
     private isUserDisconnect;
     private reconnecting;
     private reconnectAttempts;
@@ -132,11 +173,20 @@ declare class VoiceAgentClient {
         visemes?: boolean;
         onVisemes?: (visemes: Viseme[]) => void;
         enableAudio?: boolean;
+        tools?: ToolDefinition[];
     });
     /**
      * Connect to the Lokutor Voice Agent server
+     * @param customAudioManager Optional replacement for the default audio hardware handler
      */
-    connect(): Promise<boolean>;
+    connect(customAudioManager?: AudioManager): Promise<boolean>;
+    /**
+     * The "Golden Path" - Starts a managed session with hardware handled automatically.
+     * This is the recommended way to start a conversation in both Browser and Node.js.
+     */
+    startManaged(config?: {
+        audioManager?: AudioManager;
+    }): Promise<this>;
     /**
      * Send initial configuration to the server
      */
@@ -154,7 +204,13 @@ declare class VoiceAgentClient {
      * Handle incoming text messages (metadata/transcriptions)
      */
     private handleTextMessage;
-    private audioListeners;
+    /**
+     * Register an event listener (for Python parity)
+     */
+    on(event: string, callback: Function): this;
+    /**
+     * Internal emitter for all events
+     */
     private emit;
     onAudio(callback: (data: Uint8Array) => void): void;
     onVisemes(callback: (visemes: Viseme[]) => void): void;
@@ -211,6 +267,7 @@ declare class TTSClient {
         visemes?: boolean;
         onAudio?: (data: Uint8Array) => void;
         onVisemes?: (visemes: any[]) => void;
+        onTTFB?: (ms: number) => void;
         onError?: (error: any) => void;
     }): Promise<void>;
 }
@@ -418,4 +475,4 @@ declare class BrowserAudioManager {
     isRecording(): boolean;
 }
-export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
+export { AUDIO_CONFIG, type AnalyserConfig, type AudioManager, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };

package/dist/index.js CHANGED Viewed

@@ -1,8 +1,13 @@
 "use strict";
+var __create = Object.create;
 var __defProp = Object.defineProperty;
 var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
 var __getOwnPropNames = Object.getOwnPropertyNames;
+var __getProtoOf = Object.getPrototypeOf;
 var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __esm = (fn, res) => function __init() {
+  return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
+};
 var __export = (target, all) => {
   for (var name in all)
     __defProp(target, name, { get: all[name], enumerable: true });
@@ -15,8 +20,159 @@ var __copyProps = (to, from, except, desc) => {
   }
   return to;
 };
+var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
+  // If the importer is in node compatibility mode or this is not an ESM
+  // file that has been converted to a CommonJS file using a Babel-
+  // compatible transform (i.e. "__esModule" has not been set), then set
+  // "default" to the CommonJS "module.exports" for node compatibility.
+  isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
+  mod
+));
 var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
+// src/types.ts
+var VoiceStyle, Language, AUDIO_CONFIG, DEFAULT_URLS;
+var init_types = __esm({
+  "src/types.ts"() {
+    "use strict";
+    VoiceStyle = /* @__PURE__ */ ((VoiceStyle2) => {
+      VoiceStyle2["F1"] = "F1";
+      VoiceStyle2["F2"] = "F2";
+      VoiceStyle2["F3"] = "F3";
+      VoiceStyle2["F4"] = "F4";
+      VoiceStyle2["F5"] = "F5";
+      VoiceStyle2["M1"] = "M1";
+      VoiceStyle2["M2"] = "M2";
+      VoiceStyle2["M3"] = "M3";
+      VoiceStyle2["M4"] = "M4";
+      VoiceStyle2["M5"] = "M5";
+      return VoiceStyle2;
+    })(VoiceStyle || {});
+    Language = /* @__PURE__ */ ((Language2) => {
+      Language2["ENGLISH"] = "en";
+      Language2["SPANISH"] = "es";
+      Language2["FRENCH"] = "fr";
+      Language2["PORTUGUESE"] = "pt";
+      Language2["KOREAN"] = "ko";
+      return Language2;
+    })(Language || {});
+    AUDIO_CONFIG = {
+      SAMPLE_RATE: 16e3,
+      SAMPLE_RATE_INPUT: 16e3,
+      SPEAKER_SAMPLE_RATE: 44100,
+      SAMPLE_RATE_OUTPUT: 44100,
+      CHANNELS: 1,
+      CHUNK_DURATION_MS: 20,
+      get CHUNK_SIZE() {
+        return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
+      }
+    };
+    DEFAULT_URLS = {
+      VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
+      TTS: "wss://api.lokutor.com/ws/tts"
+    };
+  }
+});
+// src/node-audio.ts
+var node_audio_exports = {};
+__export(node_audio_exports, {
+  NodeAudioManager: () => NodeAudioManager
+});
+var NodeAudioManager;
+var init_node_audio = __esm({
+  "src/node-audio.ts"() {
+    "use strict";
+    init_types();
+    NodeAudioManager = class {
+      speaker = null;
+      recorder = null;
+      recordingStream = null;
+      isMuted = false;
+      isListening = false;
+      constructor() {
+      }
+      async init() {
+        try {
+          const Speaker = await import("speaker").catch(() => null);
+          if (!Speaker) {
+            console.warn('\u26A0\uFE0F  Package "speaker" is missing. Hardware output will be disabled.');
+            console.warn("\u{1F449} Run: npm install speaker");
+          }
+        } catch (e) {
+          console.error("Error initializing Node audio:", e);
+        }
+      }
+      async startMicrophone(onAudioInput) {
+        if (this.isListening) return;
+        try {
+          const recorder = await import("node-record-lpcm16").catch(() => null);
+          if (!recorder) {
+            throw new Error('Package "node-record-lpcm16" is missing. Microphone input failed.\n\u{1F449} Run: npm install node-record-lpcm16');
+          }
+          console.log("\u{1F3A4} Starting microphone (Node.js)...");
+          this.recordingStream = recorder.record({
+            sampleRate: AUDIO_CONFIG.SAMPLE_RATE,
+            threshold: 0,
+            verbose: false,
+            recordProgram: "sox"
+            // default
+          });
+          this.recordingStream.stream().on("data", (chunk) => {
+            if (!this.isMuted && onAudioInput) {
+              onAudioInput(new Uint8Array(chunk));
+            }
+          });
+          this.isListening = true;
+        } catch (e) {
+          console.error("Failed to start microphone:", e.message);
+          throw e;
+        }
+      }
+      stopMicrophone() {
+        if (this.recordingStream) {
+          this.recordingStream.stop();
+          this.recordingStream = null;
+        }
+        this.isListening = false;
+      }
+      async playAudio(pcm16Data) {
+        try {
+          if (!this.speaker) {
+            const Speaker = (await import("speaker")).default;
+            this.speaker = new Speaker({
+              channels: AUDIO_CONFIG.CHANNELS,
+              bitDepth: 16,
+              sampleRate: AUDIO_CONFIG.SPEAKER_SAMPLE_RATE
+            });
+          }
+          this.speaker.write(Buffer.from(pcm16Data));
+        } catch (e) {
+        }
+      }
+      stopPlayback() {
+        if (this.speaker) {
+          this.speaker.end();
+          this.speaker = null;
+        }
+      }
+      cleanup() {
+        this.stopMicrophone();
+        this.stopPlayback();
+      }
+      isMicMuted() {
+        return this.isMuted;
+      }
+      setMuted(muted) {
+        this.isMuted = muted;
+      }
+      getAmplitude() {
+        return 0;
+      }
+    };
+  }
+});
 // src/index.ts
 var index_exports = {};
 __export(index_exports, {
@@ -41,42 +197,13 @@ __export(index_exports, {
   simpleTTS: () => simpleTTS
 });
 module.exports = __toCommonJS(index_exports);
+init_types();
-// src/types.ts
-var VoiceStyle = /* @__PURE__ */ ((VoiceStyle2) => {
-  VoiceStyle2["F1"] = "F1";
-  VoiceStyle2["F2"] = "F2";
-  VoiceStyle2["F3"] = "F3";
-  VoiceStyle2["F4"] = "F4";
-  VoiceStyle2["F5"] = "F5";
-  VoiceStyle2["M1"] = "M1";
-  VoiceStyle2["M2"] = "M2";
-  VoiceStyle2["M3"] = "M3";
-  VoiceStyle2["M4"] = "M4";
-  VoiceStyle2["M5"] = "M5";
-  return VoiceStyle2;
-})(VoiceStyle || {});
-var Language = /* @__PURE__ */ ((Language2) => {
-  Language2["ENGLISH"] = "en";
-  Language2["SPANISH"] = "es";
-  Language2["FRENCH"] = "fr";
-  Language2["PORTUGUESE"] = "pt";
-  Language2["KOREAN"] = "ko";
-  return Language2;
-})(Language || {});
-var AUDIO_CONFIG = {
-  SAMPLE_RATE: 16e3,
-  SPEAKER_SAMPLE_RATE: 44100,
-  CHANNELS: 1,
-  CHUNK_DURATION_MS: 20,
-  get CHUNK_SIZE() {
-    return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
-  }
-};
-var DEFAULT_URLS = {
-  VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
-  TTS: "wss://api.lokutor.com/ws/tts"
-};
+// src/client.ts
+init_types();
+// src/browser-audio.ts
+init_types();
 // src/audio-utils.ts
 function pcm16ToFloat32(int16Data) {
@@ -506,6 +633,7 @@ var VoiceAgentClient = class {
   prompt;
   voice;
   language;
+  tools = [];
   // Callbacks
   onTranscription;
   onResponse;
@@ -519,6 +647,8 @@ var VoiceAgentClient = class {
   wantVisemes = false;
   audioManager = null;
   enableAudio = false;
+  currentGeneration = 0;
+  listeners = {};
   // Connection resilience
   isUserDisconnect = false;
   reconnecting = false;
@@ -537,17 +667,23 @@ var VoiceAgentClient = class {
     this.onError = config.onError;
     this.wantVisemes = config.visemes || false;
     this.enableAudio = config.enableAudio ?? false;
+    this.tools = config.tools || [];
   }
   /**
    * Connect to the Lokutor Voice Agent server
+   * @param customAudioManager Optional replacement for the default audio hardware handler
    */
-  async connect() {
+  async connect(customAudioManager) {
     this.isUserDisconnect = false;
-    if (this.enableAudio) {
-      if (!this.audioManager) {
+    if (this.enableAudio || customAudioManager) {
+      if (customAudioManager) {
+        this.audioManager = customAudioManager;
+      } else if (!this.audioManager && typeof window !== "undefined") {
         this.audioManager = new BrowserAudioManager();
       }
-      await this.audioManager.init();
+      if (this.audioManager) {
+        await this.audioManager.init();
+      }
     }
     return new Promise((resolve, reject) => {
       try {
@@ -608,6 +744,34 @@ var VoiceAgentClient = class {
       }
     });
   }
+  /**
+   * The "Golden Path" - Starts a managed session with hardware handled automatically.
+   * This is the recommended way to start a conversation in both Browser and Node.js.
+   */
+  async startManaged(config) {
+    this.enableAudio = true;
+    if (config?.audioManager) {
+      this.audioManager = config.audioManager;
+    } else if (!this.audioManager) {
+      if (typeof window !== "undefined") {
+        this.audioManager = new BrowserAudioManager();
+      } else {
+        try {
+          const { NodeAudioManager: NodeAudioManager2 } = await Promise.resolve().then(() => (init_node_audio(), node_audio_exports));
+          this.audioManager = new NodeAudioManager2();
+        } catch (e) {
+          console.error('\u274C Failed to load NodeAudioManager. Please ensure "speaker" and "node-record-lpcm16" are installed.');
+        }
+      }
+    }
+    await this.connect();
+    if (this.audioManager && this.isConnected) {
+      await this.audioManager.startMicrophone((data) => {
+        this.sendAudio(data);
+      });
+    }
+    return this;
+  }
   /**
    * Send initial configuration to the server
    */
@@ -617,7 +781,10 @@ var VoiceAgentClient = class {
     this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
     this.ws.send(JSON.stringify({ type: "language", data: this.language }));
     this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
-    console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}`);
+    if (this.tools && this.tools.length > 0) {
+      this.ws.send(JSON.stringify({ type: "tools", data: this.tools }));
+    }
+    console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}, tools=${this.tools.length}`);
   }
   /**
    * Send raw PCM audio data to the server
@@ -631,7 +798,11 @@ var VoiceAgentClient = class {
   /**
    * Handle incoming binary data (audio response)
    */
-  handleBinaryMessage(data) {
+  handleBinaryMessage(data, generation) {
+    if (generation !== void 0 && generation < this.currentGeneration) {
+      console.log(`\u{1F5D1}\uFE0F Discarding ghost audio (Gen ${generation} < ${this.currentGeneration})`);
+      return;
+    }
     if (this.audioManager) {
       this.audioManager.playAudio(data);
     }
@@ -647,7 +818,7 @@ var VoiceAgentClient = class {
         case "audio":
           if (msg.data) {
             const buffer = base64ToUint8Array(msg.data);
-            this.handleBinaryMessage(buffer);
+            this.handleBinaryMessage(buffer, msg.generation);
           }
           break;
         case "transcript":
@@ -666,6 +837,14 @@ var VoiceAgentClient = class {
           }
           break;
         case "status":
+          if (msg.data === "thinking") {
+            const newGen = msg.generation || 0;
+            if (newGen > this.currentGeneration) {
+              console.log(`\u{1F9E0} New thought (Gen ${newGen}) - Clearing audio queue`);
+              this.currentGeneration = newGen;
+              if (this.audioManager) this.audioManager.stopPlayback();
+            }
+          }
           if (msg.data === "interrupted" && this.audioManager) {
             this.audioManager.stopPlayback();
           }
@@ -687,25 +866,58 @@ var VoiceAgentClient = class {
           if (this.onError) this.onError(msg.data);
           console.error(`\u274C Server error: ${msg.data}`);
           break;
+        case "tool_call":
+          console.log(`\u{1F6E0}\uFE0F Tool Call: ${msg.name}(${msg.arguments})`);
+          break;
       }
     } catch (e) {
     }
   }
-  audioListeners = [];
-  emit(event, data) {
-    if (event === "audio") {
-      if (this.onAudioCallback) this.onAudioCallback(data);
-      this.audioListeners.forEach((l) => l(data));
-    } else if (event === "visemes") {
-      if (this.onVisemesCallback) this.onVisemesCallback(data);
-      this.visemeListeners.forEach((l) => l(data));
+  /**
+   * Register an event listener (for Python parity)
+   */
+  on(event, callback) {
+    if (!this.listeners[event]) {
+      this.listeners[event] = [];
+    }
+    this.listeners[event].push(callback);
+    return this;
+  }
+  /**
+   * Internal emitter for all events
+   */
+  emit(event, ...args) {
+    const legacyMap = {
+      "transcription": "onTranscription",
+      "response": "onResponse",
+      "audio": "onAudioCallback",
+      "visemes": "onVisemesCallback",
+      "status": "onStatus",
+      "error": "onError"
+    };
+    const legacyKey = legacyMap[event];
+    if (legacyKey && this[legacyKey]) {
+      try {
+        this[legacyKey](...args);
+      } catch (e) {
+        console.error(`Error in legacy callback ${legacyKey}:`, e);
+      }
+    }
+    if (this.listeners[event]) {
+      this.listeners[event].forEach((cb) => {
+        try {
+          cb(...args);
+        } catch (e) {
+          console.error(`Error in listener for ${event}:`, e);
+        }
+      });
     }
   }
   onAudio(callback) {
-    this.audioListeners.push(callback);
+    this.on("audio", callback);
   }
   onVisemes(callback) {
-    this.visemeListeners.push(callback);
+    this.on("visemes", callback);
   }
   /**
    * Disconnect from the server
@@ -784,15 +996,28 @@ var TTSClient = class {
    */
   synthesize(options) {
     return new Promise((resolve, reject) => {
+      let activityTimeout;
+      let ws;
+      let startTime;
+      let firstByteReceived = false;
+      const refreshTimeout = () => {
+        if (activityTimeout) clearTimeout(activityTimeout);
+        activityTimeout = setTimeout(() => {
+          console.log("\u23F1\uFE0F TTS synthesis reached inactivity timeout (2s) - resolving");
+          if (ws) ws.close();
+          resolve();
+        }, 2e3);
+      };
       try {
         let url = DEFAULT_URLS.TTS;
         if (this.apiKey) {
           const separator = url.includes("?") ? "&" : "?";
           url += `${separator}api_key=${this.apiKey}`;
         }
-        const ws = new WebSocket(url);
+        ws = new WebSocket(url);
         ws.binaryType = "arraybuffer";
         ws.onopen = () => {
+          refreshTimeout();
           const req = {
             text: options.text,
             voice: options.voice || "F1" /* F1 */,
@@ -802,9 +1027,16 @@ var TTSClient = class {
             visemes: options.visemes || false
           };
           ws.send(JSON.stringify(req));
+          startTime = Date.now();
         };
         ws.onmessage = async (event) => {
+          refreshTimeout();
           if (event.data instanceof ArrayBuffer) {
+            if (!firstByteReceived) {
+              const ttfb = Date.now() - startTime;
+              if (options.onTTFB) options.onTTFB(ttfb);
+              firstByteReceived = true;
+            }
             if (options.onAudio) options.onAudio(new Uint8Array(event.data));
           } else {
             try {
@@ -812,18 +1044,26 @@ var TTSClient = class {
               if (Array.isArray(msg) && options.onVisemes) {
                 options.onVisemes(msg);
               }
+              if (msg.type === "eos") {
+                if (activityTimeout) clearTimeout(activityTimeout);
+                ws.close();
+                resolve();
+              }
             } catch (e) {
             }
           }
         };
         ws.onerror = (err) => {
+          if (activityTimeout) clearTimeout(activityTimeout);
           if (options.onError) options.onError(err);
           reject(err);
         };
         ws.onclose = () => {
+          if (activityTimeout) clearTimeout(activityTimeout);
           resolve();
         };
       } catch (err) {
+        if (activityTimeout) clearTimeout(activityTimeout);
         if (options.onError) options.onError(err);
         reject(err);
       }

package/dist/index.mjs CHANGED Viewed

@@ -1,38 +1,9 @@
-// src/types.ts
-var VoiceStyle = /* @__PURE__ */ ((VoiceStyle2) => {
-  VoiceStyle2["F1"] = "F1";
-  VoiceStyle2["F2"] = "F2";
-  VoiceStyle2["F3"] = "F3";
-  VoiceStyle2["F4"] = "F4";
-  VoiceStyle2["F5"] = "F5";
-  VoiceStyle2["M1"] = "M1";
-  VoiceStyle2["M2"] = "M2";
-  VoiceStyle2["M3"] = "M3";
-  VoiceStyle2["M4"] = "M4";
-  VoiceStyle2["M5"] = "M5";
-  return VoiceStyle2;
-})(VoiceStyle || {});
-var Language = /* @__PURE__ */ ((Language2) => {
-  Language2["ENGLISH"] = "en";
-  Language2["SPANISH"] = "es";
-  Language2["FRENCH"] = "fr";
-  Language2["PORTUGUESE"] = "pt";
-  Language2["KOREAN"] = "ko";
-  return Language2;
-})(Language || {});
-var AUDIO_CONFIG = {
-  SAMPLE_RATE: 16e3,
-  SPEAKER_SAMPLE_RATE: 44100,
-  CHANNELS: 1,
-  CHUNK_DURATION_MS: 20,
-  get CHUNK_SIZE() {
-    return Math.floor(this.SAMPLE_RATE * this.CHUNK_DURATION_MS / 1e3);
-  }
-};
-var DEFAULT_URLS = {
-  VOICE_AGENT: "wss://api.lokutor.com/ws/agent",
-  TTS: "wss://api.lokutor.com/ws/tts"
-};
+import {
+  AUDIO_CONFIG,
+  DEFAULT_URLS,
+  Language,
+  VoiceStyle
+} from "./chunk-UI24THO7.mjs";
 // src/audio-utils.ts
 function pcm16ToFloat32(int16Data) {
@@ -462,6 +433,7 @@ var VoiceAgentClient = class {
   prompt;
   voice;
   language;
+  tools = [];
   // Callbacks
   onTranscription;
   onResponse;
@@ -475,6 +447,8 @@ var VoiceAgentClient = class {
   wantVisemes = false;
   audioManager = null;
   enableAudio = false;
+  currentGeneration = 0;
+  listeners = {};
   // Connection resilience
   isUserDisconnect = false;
   reconnecting = false;
@@ -493,17 +467,23 @@ var VoiceAgentClient = class {
     this.onError = config.onError;
     this.wantVisemes = config.visemes || false;
     this.enableAudio = config.enableAudio ?? false;
+    this.tools = config.tools || [];
   }
   /**
    * Connect to the Lokutor Voice Agent server
+   * @param customAudioManager Optional replacement for the default audio hardware handler
    */
-  async connect() {
+  async connect(customAudioManager) {
     this.isUserDisconnect = false;
-    if (this.enableAudio) {
-      if (!this.audioManager) {
+    if (this.enableAudio || customAudioManager) {
+      if (customAudioManager) {
+        this.audioManager = customAudioManager;
+      } else if (!this.audioManager && typeof window !== "undefined") {
         this.audioManager = new BrowserAudioManager();
       }
-      await this.audioManager.init();
+      if (this.audioManager) {
+        await this.audioManager.init();
+      }
     }
     return new Promise((resolve, reject) => {
       try {
@@ -564,6 +544,34 @@ var VoiceAgentClient = class {
       }
     });
   }
+  /**
+   * The "Golden Path" - Starts a managed session with hardware handled automatically.
+   * This is the recommended way to start a conversation in both Browser and Node.js.
+   */
+  async startManaged(config) {
+    this.enableAudio = true;
+    if (config?.audioManager) {
+      this.audioManager = config.audioManager;
+    } else if (!this.audioManager) {
+      if (typeof window !== "undefined") {
+        this.audioManager = new BrowserAudioManager();
+      } else {
+        try {
+          const { NodeAudioManager } = await import("./node-audio-5HOWE6MC.mjs");
+          this.audioManager = new NodeAudioManager();
+        } catch (e) {
+          console.error('\u274C Failed to load NodeAudioManager. Please ensure "speaker" and "node-record-lpcm16" are installed.');
+        }
+      }
+    }
+    await this.connect();
+    if (this.audioManager && this.isConnected) {
+      await this.audioManager.startMicrophone((data) => {
+        this.sendAudio(data);
+      });
+    }
+    return this;
+  }
   /**
    * Send initial configuration to the server
    */
@@ -573,7 +581,10 @@ var VoiceAgentClient = class {
     this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
     this.ws.send(JSON.stringify({ type: "language", data: this.language }));
     this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
-    console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}`);
+    if (this.tools && this.tools.length > 0) {
+      this.ws.send(JSON.stringify({ type: "tools", data: this.tools }));
+    }
+    console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}, tools=${this.tools.length}`);
   }
   /**
    * Send raw PCM audio data to the server
@@ -587,7 +598,11 @@ var VoiceAgentClient = class {
   /**
    * Handle incoming binary data (audio response)
    */
-  handleBinaryMessage(data) {
+  handleBinaryMessage(data, generation) {
+    if (generation !== void 0 && generation < this.currentGeneration) {
+      console.log(`\u{1F5D1}\uFE0F Discarding ghost audio (Gen ${generation} < ${this.currentGeneration})`);
+      return;
+    }
     if (this.audioManager) {
       this.audioManager.playAudio(data);
     }
@@ -603,7 +618,7 @@ var VoiceAgentClient = class {
         case "audio":
           if (msg.data) {
             const buffer = base64ToUint8Array(msg.data);
-            this.handleBinaryMessage(buffer);
+            this.handleBinaryMessage(buffer, msg.generation);
           }
           break;
         case "transcript":
@@ -622,6 +637,14 @@ var VoiceAgentClient = class {
           }
           break;
         case "status":
+          if (msg.data === "thinking") {
+            const newGen = msg.generation || 0;
+            if (newGen > this.currentGeneration) {
+              console.log(`\u{1F9E0} New thought (Gen ${newGen}) - Clearing audio queue`);
+              this.currentGeneration = newGen;
+              if (this.audioManager) this.audioManager.stopPlayback();
+            }
+          }
           if (msg.data === "interrupted" && this.audioManager) {
             this.audioManager.stopPlayback();
           }
@@ -643,25 +666,58 @@ var VoiceAgentClient = class {
           if (this.onError) this.onError(msg.data);
           console.error(`\u274C Server error: ${msg.data}`);
           break;
+        case "tool_call":
+          console.log(`\u{1F6E0}\uFE0F Tool Call: ${msg.name}(${msg.arguments})`);
+          break;
       }
     } catch (e) {
     }
   }
-  audioListeners = [];
-  emit(event, data) {
-    if (event === "audio") {
-      if (this.onAudioCallback) this.onAudioCallback(data);
-      this.audioListeners.forEach((l) => l(data));
-    } else if (event === "visemes") {
-      if (this.onVisemesCallback) this.onVisemesCallback(data);
-      this.visemeListeners.forEach((l) => l(data));
+  /**
+   * Register an event listener (for Python parity)
+   */
+  on(event, callback) {
+    if (!this.listeners[event]) {
+      this.listeners[event] = [];
+    }
+    this.listeners[event].push(callback);
+    return this;
+  }
+  /**
+   * Internal emitter for all events
+   */
+  emit(event, ...args) {
+    const legacyMap = {
+      "transcription": "onTranscription",
+      "response": "onResponse",
+      "audio": "onAudioCallback",
+      "visemes": "onVisemesCallback",
+      "status": "onStatus",
+      "error": "onError"
+    };
+    const legacyKey = legacyMap[event];
+    if (legacyKey && this[legacyKey]) {
+      try {
+        this[legacyKey](...args);
+      } catch (e) {
+        console.error(`Error in legacy callback ${legacyKey}:`, e);
+      }
+    }
+    if (this.listeners[event]) {
+      this.listeners[event].forEach((cb) => {
+        try {
+          cb(...args);
+        } catch (e) {
+          console.error(`Error in listener for ${event}:`, e);
+        }
+      });
     }
   }
   onAudio(callback) {
-    this.audioListeners.push(callback);
+    this.on("audio", callback);
   }
   onVisemes(callback) {
-    this.visemeListeners.push(callback);
+    this.on("visemes", callback);
   }
   /**
    * Disconnect from the server
@@ -740,15 +796,28 @@ var TTSClient = class {
    */
   synthesize(options) {
     return new Promise((resolve, reject) => {
+      let activityTimeout;
+      let ws;
+      let startTime;
+      let firstByteReceived = false;
+      const refreshTimeout = () => {
+        if (activityTimeout) clearTimeout(activityTimeout);
+        activityTimeout = setTimeout(() => {
+          console.log("\u23F1\uFE0F TTS synthesis reached inactivity timeout (2s) - resolving");
+          if (ws) ws.close();
+          resolve();
+        }, 2e3);
+      };
       try {
         let url = DEFAULT_URLS.TTS;
         if (this.apiKey) {
           const separator = url.includes("?") ? "&" : "?";
           url += `${separator}api_key=${this.apiKey}`;
         }
-        const ws = new WebSocket(url);
+        ws = new WebSocket(url);
         ws.binaryType = "arraybuffer";
         ws.onopen = () => {
+          refreshTimeout();
           const req = {
             text: options.text,
             voice: options.voice || "F1" /* F1 */,
@@ -758,9 +827,16 @@ var TTSClient = class {
             visemes: options.visemes || false
           };
           ws.send(JSON.stringify(req));
+          startTime = Date.now();
         };
         ws.onmessage = async (event) => {
+          refreshTimeout();
           if (event.data instanceof ArrayBuffer) {
+            if (!firstByteReceived) {
+              const ttfb = Date.now() - startTime;
+              if (options.onTTFB) options.onTTFB(ttfb);
+              firstByteReceived = true;
+            }
             if (options.onAudio) options.onAudio(new Uint8Array(event.data));
           } else {
             try {
@@ -768,18 +844,26 @@ var TTSClient = class {
               if (Array.isArray(msg) && options.onVisemes) {
                 options.onVisemes(msg);
               }
+              if (msg.type === "eos") {
+                if (activityTimeout) clearTimeout(activityTimeout);
+                ws.close();
+                resolve();
+              }
             } catch (e) {
             }
           }
         };
         ws.onerror = (err) => {
+          if (activityTimeout) clearTimeout(activityTimeout);
           if (options.onError) options.onError(err);
           reject(err);
         };
         ws.onclose = () => {
+          if (activityTimeout) clearTimeout(activityTimeout);
           resolve();
         };
       } catch (err) {
+        if (activityTimeout) clearTimeout(activityTimeout);
         if (options.onError) options.onError(err);
         reject(err);
       }

package/dist/node-audio-5HOWE6MC.mjs ADDED Viewed

@@ -0,0 +1,94 @@
+import {
+  AUDIO_CONFIG
+} from "./chunk-UI24THO7.mjs";
+// src/node-audio.ts
+var NodeAudioManager = class {
+  speaker = null;
+  recorder = null;
+  recordingStream = null;
+  isMuted = false;
+  isListening = false;
+  constructor() {
+  }
+  async init() {
+    try {
+      const Speaker = await import("speaker").catch(() => null);
+      if (!Speaker) {
+        console.warn('\u26A0\uFE0F  Package "speaker" is missing. Hardware output will be disabled.');
+        console.warn("\u{1F449} Run: npm install speaker");
+      }
+    } catch (e) {
+      console.error("Error initializing Node audio:", e);
+    }
+  }
+  async startMicrophone(onAudioInput) {
+    if (this.isListening) return;
+    try {
+      const recorder = await import("node-record-lpcm16").catch(() => null);
+      if (!recorder) {
+        throw new Error('Package "node-record-lpcm16" is missing. Microphone input failed.\n\u{1F449} Run: npm install node-record-lpcm16');
+      }
+      console.log("\u{1F3A4} Starting microphone (Node.js)...");
+      this.recordingStream = recorder.record({
+        sampleRate: AUDIO_CONFIG.SAMPLE_RATE,
+        threshold: 0,
+        verbose: false,
+        recordProgram: "sox"
+        // default
+      });
+      this.recordingStream.stream().on("data", (chunk) => {
+        if (!this.isMuted && onAudioInput) {
+          onAudioInput(new Uint8Array(chunk));
+        }
+      });
+      this.isListening = true;
+    } catch (e) {
+      console.error("Failed to start microphone:", e.message);
+      throw e;
+    }
+  }
+  stopMicrophone() {
+    if (this.recordingStream) {
+      this.recordingStream.stop();
+      this.recordingStream = null;
+    }
+    this.isListening = false;
+  }
+  async playAudio(pcm16Data) {
+    try {
+      if (!this.speaker) {
+        const Speaker = (await import("speaker")).default;
+        this.speaker = new Speaker({
+          channels: AUDIO_CONFIG.CHANNELS,
+          bitDepth: 16,
+          sampleRate: AUDIO_CONFIG.SPEAKER_SAMPLE_RATE
+        });
+      }
+      this.speaker.write(Buffer.from(pcm16Data));
+    } catch (e) {
+    }
+  }
+  stopPlayback() {
+    if (this.speaker) {
+      this.speaker.end();
+      this.speaker = null;
+    }
+  }
+  cleanup() {
+    this.stopMicrophone();
+    this.stopPlayback();
+  }
+  isMicMuted() {
+    return this.isMuted;
+  }
+  setMuted(muted) {
+    this.isMuted = muted;
+  }
+  getAmplitude() {
+    return 0;
+  }
+};
+export {
+  NodeAudioManager
+};

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lokutor/sdk",
-  "version": "1.1.11",
+  "version": "1.1.13",
   "description": "JavaScript/TypeScript SDK for Lokutor Real-time Voice AI",
   "main": "./dist/index.js",
   "module": "./dist/index.mjs",