npm - @lokutor/sdk - Versions diffs - 1.1.11 → 1.1.12 - Mend

@lokutor/sdk 1.1.11 → 1.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/index.d.mts CHANGED Viewed

@@ -97,6 +97,28 @@ interface Viseme {
     c: string;
     t: number;
 }
+/**
+ * Tool definition for LLM function calling (OpenAI format)
+ */
+interface ToolDefinition {
+    type: 'function';
+    function: {
+        name: string;
+        description: string;
+        parameters: {
+            type: 'object';
+            properties: Record<string, any>;
+            required?: string[];
+        };
+    };
+}
+/**
+ * Event data for tool execution
+ */
+interface ToolCall {
+    name: string;
+    arguments: string;
+}
 /**
  * Main client for Lokutor Voice Agent SDK
@@ -109,6 +131,7 @@ declare class VoiceAgentClient {
     prompt: string;
     voice: VoiceStyle;
     language: Language;
+    tools: ToolDefinition[];
     private onTranscription?;
     private onResponse?;
     private onAudioCallback?;
@@ -121,6 +144,7 @@ declare class VoiceAgentClient {
     private wantVisemes;
     private audioManager;
     private enableAudio;
+    private currentGeneration;
     private isUserDisconnect;
     private reconnecting;
     private reconnectAttempts;
@@ -132,6 +156,7 @@ declare class VoiceAgentClient {
         visemes?: boolean;
         onVisemes?: (visemes: Viseme[]) => void;
         enableAudio?: boolean;
+        tools?: ToolDefinition[];
     });
     /**
      * Connect to the Lokutor Voice Agent server
@@ -418,4 +443,4 @@ declare class BrowserAudioManager {
     isRecording(): boolean;
 }
-export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
+export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };

package/dist/index.d.ts CHANGED Viewed

@@ -97,6 +97,28 @@ interface Viseme {
     c: string;
     t: number;
 }
+/**
+ * Tool definition for LLM function calling (OpenAI format)
+ */
+interface ToolDefinition {
+    type: 'function';
+    function: {
+        name: string;
+        description: string;
+        parameters: {
+            type: 'object';
+            properties: Record<string, any>;
+            required?: string[];
+        };
+    };
+}
+/**
+ * Event data for tool execution
+ */
+interface ToolCall {
+    name: string;
+    arguments: string;
+}
 /**
  * Main client for Lokutor Voice Agent SDK
@@ -109,6 +131,7 @@ declare class VoiceAgentClient {
     prompt: string;
     voice: VoiceStyle;
     language: Language;
+    tools: ToolDefinition[];
     private onTranscription?;
     private onResponse?;
     private onAudioCallback?;
@@ -121,6 +144,7 @@ declare class VoiceAgentClient {
     private wantVisemes;
     private audioManager;
     private enableAudio;
+    private currentGeneration;
     private isUserDisconnect;
     private reconnecting;
     private reconnectAttempts;
@@ -132,6 +156,7 @@ declare class VoiceAgentClient {
         visemes?: boolean;
         onVisemes?: (visemes: Viseme[]) => void;
         enableAudio?: boolean;
+        tools?: ToolDefinition[];
     });
     /**
      * Connect to the Lokutor Voice Agent server
@@ -418,4 +443,4 @@ declare class BrowserAudioManager {
     isRecording(): boolean;
 }
-export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
+export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };

package/dist/index.js CHANGED Viewed

@@ -506,6 +506,7 @@ var VoiceAgentClient = class {
   prompt;
   voice;
   language;
+  tools = [];
   // Callbacks
   onTranscription;
   onResponse;
@@ -519,6 +520,7 @@ var VoiceAgentClient = class {
   wantVisemes = false;
   audioManager = null;
   enableAudio = false;
+  currentGeneration = 0;
   // Connection resilience
   isUserDisconnect = false;
   reconnecting = false;
@@ -537,6 +539,7 @@ var VoiceAgentClient = class {
     this.onError = config.onError;
     this.wantVisemes = config.visemes || false;
     this.enableAudio = config.enableAudio ?? false;
+    this.tools = config.tools || [];
   }
   /**
    * Connect to the Lokutor Voice Agent server
@@ -617,7 +620,10 @@ var VoiceAgentClient = class {
     this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
     this.ws.send(JSON.stringify({ type: "language", data: this.language }));
     this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
-    console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}`);
+    if (this.tools && this.tools.length > 0) {
+      this.ws.send(JSON.stringify({ type: "tools", data: this.tools }));
+    }
+    console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}, tools=${this.tools.length}`);
   }
   /**
    * Send raw PCM audio data to the server
@@ -631,7 +637,11 @@ var VoiceAgentClient = class {
   /**
    * Handle incoming binary data (audio response)
    */
-  handleBinaryMessage(data) {
+  handleBinaryMessage(data, generation) {
+    if (generation !== void 0 && generation < this.currentGeneration) {
+      console.log(`\u{1F5D1}\uFE0F Discarding ghost audio (Gen ${generation} < ${this.currentGeneration})`);
+      return;
+    }
     if (this.audioManager) {
       this.audioManager.playAudio(data);
     }
@@ -647,7 +657,7 @@ var VoiceAgentClient = class {
         case "audio":
           if (msg.data) {
             const buffer = base64ToUint8Array(msg.data);
-            this.handleBinaryMessage(buffer);
+            this.handleBinaryMessage(buffer, msg.generation);
           }
           break;
         case "transcript":
@@ -666,6 +676,14 @@ var VoiceAgentClient = class {
           }
           break;
         case "status":
+          if (msg.data === "thinking") {
+            const newGen = msg.generation || 0;
+            if (newGen > this.currentGeneration) {
+              console.log(`\u{1F9E0} New thought (Gen ${newGen}) - Clearing audio queue`);
+              this.currentGeneration = newGen;
+              if (this.audioManager) this.audioManager.stopPlayback();
+            }
+          }
           if (msg.data === "interrupted" && this.audioManager) {
             this.audioManager.stopPlayback();
           }
@@ -687,6 +705,9 @@ var VoiceAgentClient = class {
           if (this.onError) this.onError(msg.data);
           console.error(`\u274C Server error: ${msg.data}`);
           break;
+        case "tool_call":
+          console.log(`\u{1F6E0}\uFE0F Tool Call: ${msg.name}(${msg.arguments})`);
+          break;
       }
     } catch (e) {
     }

package/dist/index.mjs CHANGED Viewed

@@ -462,6 +462,7 @@ var VoiceAgentClient = class {
   prompt;
   voice;
   language;
+  tools = [];
   // Callbacks
   onTranscription;
   onResponse;
@@ -475,6 +476,7 @@ var VoiceAgentClient = class {
   wantVisemes = false;
   audioManager = null;
   enableAudio = false;
+  currentGeneration = 0;
   // Connection resilience
   isUserDisconnect = false;
   reconnecting = false;
@@ -493,6 +495,7 @@ var VoiceAgentClient = class {
     this.onError = config.onError;
     this.wantVisemes = config.visemes || false;
     this.enableAudio = config.enableAudio ?? false;
+    this.tools = config.tools || [];
   }
   /**
    * Connect to the Lokutor Voice Agent server
@@ -573,7 +576,10 @@ var VoiceAgentClient = class {
     this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
     this.ws.send(JSON.stringify({ type: "language", data: this.language }));
     this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
-    console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}`);
+    if (this.tools && this.tools.length > 0) {
+      this.ws.send(JSON.stringify({ type: "tools", data: this.tools }));
+    }
+    console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}, tools=${this.tools.length}`);
   }
   /**
    * Send raw PCM audio data to the server
@@ -587,7 +593,11 @@ var VoiceAgentClient = class {
   /**
    * Handle incoming binary data (audio response)
    */
-  handleBinaryMessage(data) {
+  handleBinaryMessage(data, generation) {
+    if (generation !== void 0 && generation < this.currentGeneration) {
+      console.log(`\u{1F5D1}\uFE0F Discarding ghost audio (Gen ${generation} < ${this.currentGeneration})`);
+      return;
+    }
     if (this.audioManager) {
       this.audioManager.playAudio(data);
     }
@@ -603,7 +613,7 @@ var VoiceAgentClient = class {
         case "audio":
           if (msg.data) {
             const buffer = base64ToUint8Array(msg.data);
-            this.handleBinaryMessage(buffer);
+            this.handleBinaryMessage(buffer, msg.generation);
           }
           break;
         case "transcript":
@@ -622,6 +632,14 @@ var VoiceAgentClient = class {
           }
           break;
         case "status":
+          if (msg.data === "thinking") {
+            const newGen = msg.generation || 0;
+            if (newGen > this.currentGeneration) {
+              console.log(`\u{1F9E0} New thought (Gen ${newGen}) - Clearing audio queue`);
+              this.currentGeneration = newGen;
+              if (this.audioManager) this.audioManager.stopPlayback();
+            }
+          }
           if (msg.data === "interrupted" && this.audioManager) {
             this.audioManager.stopPlayback();
           }
@@ -643,6 +661,9 @@ var VoiceAgentClient = class {
           if (this.onError) this.onError(msg.data);
           console.error(`\u274C Server error: ${msg.data}`);
           break;
+        case "tool_call":
+          console.log(`\u{1F6E0}\uFE0F Tool Call: ${msg.name}(${msg.arguments})`);
+          break;
       }
     } catch (e) {
     }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lokutor/sdk",
-  "version": "1.1.11",
+  "version": "1.1.12",
   "description": "JavaScript/TypeScript SDK for Lokutor Real-time Voice AI",
   "main": "./dist/index.js",
   "module": "./dist/index.mjs",