@lokutor/sdk 1.1.11 → 1.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -97,6 +97,28 @@ interface Viseme {
97
97
  c: string;
98
98
  t: number;
99
99
  }
100
+ /**
101
+ * Tool definition for LLM function calling (OpenAI format)
102
+ */
103
+ interface ToolDefinition {
104
+ type: 'function';
105
+ function: {
106
+ name: string;
107
+ description: string;
108
+ parameters: {
109
+ type: 'object';
110
+ properties: Record<string, any>;
111
+ required?: string[];
112
+ };
113
+ };
114
+ }
115
+ /**
116
+ * Event data for tool execution
117
+ */
118
+ interface ToolCall {
119
+ name: string;
120
+ arguments: string;
121
+ }
100
122
 
101
123
  /**
102
124
  * Main client for Lokutor Voice Agent SDK
@@ -109,6 +131,7 @@ declare class VoiceAgentClient {
109
131
  prompt: string;
110
132
  voice: VoiceStyle;
111
133
  language: Language;
134
+ tools: ToolDefinition[];
112
135
  private onTranscription?;
113
136
  private onResponse?;
114
137
  private onAudioCallback?;
@@ -121,6 +144,7 @@ declare class VoiceAgentClient {
121
144
  private wantVisemes;
122
145
  private audioManager;
123
146
  private enableAudio;
147
+ private currentGeneration;
124
148
  private isUserDisconnect;
125
149
  private reconnecting;
126
150
  private reconnectAttempts;
@@ -132,6 +156,7 @@ declare class VoiceAgentClient {
132
156
  visemes?: boolean;
133
157
  onVisemes?: (visemes: Viseme[]) => void;
134
158
  enableAudio?: boolean;
159
+ tools?: ToolDefinition[];
135
160
  });
136
161
  /**
137
162
  * Connect to the Lokutor Voice Agent server
@@ -418,4 +443,4 @@ declare class BrowserAudioManager {
418
443
  isRecording(): boolean;
419
444
  }
420
445
 
421
- export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
446
+ export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
package/dist/index.d.ts CHANGED
@@ -97,6 +97,28 @@ interface Viseme {
97
97
  c: string;
98
98
  t: number;
99
99
  }
100
+ /**
101
+ * Tool definition for LLM function calling (OpenAI format)
102
+ */
103
+ interface ToolDefinition {
104
+ type: 'function';
105
+ function: {
106
+ name: string;
107
+ description: string;
108
+ parameters: {
109
+ type: 'object';
110
+ properties: Record<string, any>;
111
+ required?: string[];
112
+ };
113
+ };
114
+ }
115
+ /**
116
+ * Event data for tool execution
117
+ */
118
+ interface ToolCall {
119
+ name: string;
120
+ arguments: string;
121
+ }
100
122
 
101
123
  /**
102
124
  * Main client for Lokutor Voice Agent SDK
@@ -109,6 +131,7 @@ declare class VoiceAgentClient {
109
131
  prompt: string;
110
132
  voice: VoiceStyle;
111
133
  language: Language;
134
+ tools: ToolDefinition[];
112
135
  private onTranscription?;
113
136
  private onResponse?;
114
137
  private onAudioCallback?;
@@ -121,6 +144,7 @@ declare class VoiceAgentClient {
121
144
  private wantVisemes;
122
145
  private audioManager;
123
146
  private enableAudio;
147
+ private currentGeneration;
124
148
  private isUserDisconnect;
125
149
  private reconnecting;
126
150
  private reconnectAttempts;
@@ -132,6 +156,7 @@ declare class VoiceAgentClient {
132
156
  visemes?: boolean;
133
157
  onVisemes?: (visemes: Viseme[]) => void;
134
158
  enableAudio?: boolean;
159
+ tools?: ToolDefinition[];
135
160
  });
136
161
  /**
137
162
  * Connect to the Lokutor Voice Agent server
@@ -418,4 +443,4 @@ declare class BrowserAudioManager {
418
443
  isRecording(): boolean;
419
444
  }
420
445
 
421
- export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
446
+ export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
package/dist/index.js CHANGED
@@ -506,6 +506,7 @@ var VoiceAgentClient = class {
506
506
  prompt;
507
507
  voice;
508
508
  language;
509
+ tools = [];
509
510
  // Callbacks
510
511
  onTranscription;
511
512
  onResponse;
@@ -519,6 +520,7 @@ var VoiceAgentClient = class {
519
520
  wantVisemes = false;
520
521
  audioManager = null;
521
522
  enableAudio = false;
523
+ currentGeneration = 0;
522
524
  // Connection resilience
523
525
  isUserDisconnect = false;
524
526
  reconnecting = false;
@@ -537,6 +539,7 @@ var VoiceAgentClient = class {
537
539
  this.onError = config.onError;
538
540
  this.wantVisemes = config.visemes || false;
539
541
  this.enableAudio = config.enableAudio ?? false;
542
+ this.tools = config.tools || [];
540
543
  }
541
544
  /**
542
545
  * Connect to the Lokutor Voice Agent server
@@ -617,7 +620,10 @@ var VoiceAgentClient = class {
617
620
  this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
618
621
  this.ws.send(JSON.stringify({ type: "language", data: this.language }));
619
622
  this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
620
- console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}`);
623
+ if (this.tools && this.tools.length > 0) {
624
+ this.ws.send(JSON.stringify({ type: "tools", data: this.tools }));
625
+ }
626
+ console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}, tools=${this.tools.length}`);
621
627
  }
622
628
  /**
623
629
  * Send raw PCM audio data to the server
@@ -631,7 +637,11 @@ var VoiceAgentClient = class {
631
637
  /**
632
638
  * Handle incoming binary data (audio response)
633
639
  */
634
- handleBinaryMessage(data) {
640
+ handleBinaryMessage(data, generation) {
641
+ if (generation !== void 0 && generation < this.currentGeneration) {
642
+ console.log(`\u{1F5D1}\uFE0F Discarding ghost audio (Gen ${generation} < ${this.currentGeneration})`);
643
+ return;
644
+ }
635
645
  if (this.audioManager) {
636
646
  this.audioManager.playAudio(data);
637
647
  }
@@ -647,7 +657,7 @@ var VoiceAgentClient = class {
647
657
  case "audio":
648
658
  if (msg.data) {
649
659
  const buffer = base64ToUint8Array(msg.data);
650
- this.handleBinaryMessage(buffer);
660
+ this.handleBinaryMessage(buffer, msg.generation);
651
661
  }
652
662
  break;
653
663
  case "transcript":
@@ -666,6 +676,14 @@ var VoiceAgentClient = class {
666
676
  }
667
677
  break;
668
678
  case "status":
679
+ if (msg.data === "thinking") {
680
+ const newGen = msg.generation || 0;
681
+ if (newGen > this.currentGeneration) {
682
+ console.log(`\u{1F9E0} New thought (Gen ${newGen}) - Clearing audio queue`);
683
+ this.currentGeneration = newGen;
684
+ if (this.audioManager) this.audioManager.stopPlayback();
685
+ }
686
+ }
669
687
  if (msg.data === "interrupted" && this.audioManager) {
670
688
  this.audioManager.stopPlayback();
671
689
  }
@@ -687,6 +705,9 @@ var VoiceAgentClient = class {
687
705
  if (this.onError) this.onError(msg.data);
688
706
  console.error(`\u274C Server error: ${msg.data}`);
689
707
  break;
708
+ case "tool_call":
709
+ console.log(`\u{1F6E0}\uFE0F Tool Call: ${msg.name}(${msg.arguments})`);
710
+ break;
690
711
  }
691
712
  } catch (e) {
692
713
  }
package/dist/index.mjs CHANGED
@@ -462,6 +462,7 @@ var VoiceAgentClient = class {
462
462
  prompt;
463
463
  voice;
464
464
  language;
465
+ tools = [];
465
466
  // Callbacks
466
467
  onTranscription;
467
468
  onResponse;
@@ -475,6 +476,7 @@ var VoiceAgentClient = class {
475
476
  wantVisemes = false;
476
477
  audioManager = null;
477
478
  enableAudio = false;
479
+ currentGeneration = 0;
478
480
  // Connection resilience
479
481
  isUserDisconnect = false;
480
482
  reconnecting = false;
@@ -493,6 +495,7 @@ var VoiceAgentClient = class {
493
495
  this.onError = config.onError;
494
496
  this.wantVisemes = config.visemes || false;
495
497
  this.enableAudio = config.enableAudio ?? false;
498
+ this.tools = config.tools || [];
496
499
  }
497
500
  /**
498
501
  * Connect to the Lokutor Voice Agent server
@@ -573,7 +576,10 @@ var VoiceAgentClient = class {
573
576
  this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
574
577
  this.ws.send(JSON.stringify({ type: "language", data: this.language }));
575
578
  this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
576
- console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}`);
579
+ if (this.tools && this.tools.length > 0) {
580
+ this.ws.send(JSON.stringify({ type: "tools", data: this.tools }));
581
+ }
582
+ console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}, tools=${this.tools.length}`);
577
583
  }
578
584
  /**
579
585
  * Send raw PCM audio data to the server
@@ -587,7 +593,11 @@ var VoiceAgentClient = class {
587
593
  /**
588
594
  * Handle incoming binary data (audio response)
589
595
  */
590
- handleBinaryMessage(data) {
596
+ handleBinaryMessage(data, generation) {
597
+ if (generation !== void 0 && generation < this.currentGeneration) {
598
+ console.log(`\u{1F5D1}\uFE0F Discarding ghost audio (Gen ${generation} < ${this.currentGeneration})`);
599
+ return;
600
+ }
591
601
  if (this.audioManager) {
592
602
  this.audioManager.playAudio(data);
593
603
  }
@@ -603,7 +613,7 @@ var VoiceAgentClient = class {
603
613
  case "audio":
604
614
  if (msg.data) {
605
615
  const buffer = base64ToUint8Array(msg.data);
606
- this.handleBinaryMessage(buffer);
616
+ this.handleBinaryMessage(buffer, msg.generation);
607
617
  }
608
618
  break;
609
619
  case "transcript":
@@ -622,6 +632,14 @@ var VoiceAgentClient = class {
622
632
  }
623
633
  break;
624
634
  case "status":
635
+ if (msg.data === "thinking") {
636
+ const newGen = msg.generation || 0;
637
+ if (newGen > this.currentGeneration) {
638
+ console.log(`\u{1F9E0} New thought (Gen ${newGen}) - Clearing audio queue`);
639
+ this.currentGeneration = newGen;
640
+ if (this.audioManager) this.audioManager.stopPlayback();
641
+ }
642
+ }
625
643
  if (msg.data === "interrupted" && this.audioManager) {
626
644
  this.audioManager.stopPlayback();
627
645
  }
@@ -643,6 +661,9 @@ var VoiceAgentClient = class {
643
661
  if (this.onError) this.onError(msg.data);
644
662
  console.error(`\u274C Server error: ${msg.data}`);
645
663
  break;
664
+ case "tool_call":
665
+ console.log(`\u{1F6E0}\uFE0F Tool Call: ${msg.name}(${msg.arguments})`);
666
+ break;
646
667
  }
647
668
  } catch (e) {
648
669
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lokutor/sdk",
3
- "version": "1.1.11",
3
+ "version": "1.1.12",
4
4
  "description": "JavaScript/TypeScript SDK for Lokutor Real-time Voice AI",
5
5
  "main": "./dist/index.js",
6
6
  "module": "./dist/index.mjs",