@lokutor/sdk 1.1.11 → 1.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +26 -1
- package/dist/index.d.ts +26 -1
- package/dist/index.js +24 -3
- package/dist/index.mjs +24 -3
- package/package.json +1 -1
package/dist/index.d.mts
CHANGED
|
@@ -97,6 +97,28 @@ interface Viseme {
|
|
|
97
97
|
c: string;
|
|
98
98
|
t: number;
|
|
99
99
|
}
|
|
100
|
+
/**
|
|
101
|
+
* Tool definition for LLM function calling (OpenAI format)
|
|
102
|
+
*/
|
|
103
|
+
interface ToolDefinition {
|
|
104
|
+
type: 'function';
|
|
105
|
+
function: {
|
|
106
|
+
name: string;
|
|
107
|
+
description: string;
|
|
108
|
+
parameters: {
|
|
109
|
+
type: 'object';
|
|
110
|
+
properties: Record<string, any>;
|
|
111
|
+
required?: string[];
|
|
112
|
+
};
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Event data for tool execution
|
|
117
|
+
*/
|
|
118
|
+
interface ToolCall {
|
|
119
|
+
name: string;
|
|
120
|
+
arguments: string;
|
|
121
|
+
}
|
|
100
122
|
|
|
101
123
|
/**
|
|
102
124
|
* Main client for Lokutor Voice Agent SDK
|
|
@@ -109,6 +131,7 @@ declare class VoiceAgentClient {
|
|
|
109
131
|
prompt: string;
|
|
110
132
|
voice: VoiceStyle;
|
|
111
133
|
language: Language;
|
|
134
|
+
tools: ToolDefinition[];
|
|
112
135
|
private onTranscription?;
|
|
113
136
|
private onResponse?;
|
|
114
137
|
private onAudioCallback?;
|
|
@@ -121,6 +144,7 @@ declare class VoiceAgentClient {
|
|
|
121
144
|
private wantVisemes;
|
|
122
145
|
private audioManager;
|
|
123
146
|
private enableAudio;
|
|
147
|
+
private currentGeneration;
|
|
124
148
|
private isUserDisconnect;
|
|
125
149
|
private reconnecting;
|
|
126
150
|
private reconnectAttempts;
|
|
@@ -132,6 +156,7 @@ declare class VoiceAgentClient {
|
|
|
132
156
|
visemes?: boolean;
|
|
133
157
|
onVisemes?: (visemes: Viseme[]) => void;
|
|
134
158
|
enableAudio?: boolean;
|
|
159
|
+
tools?: ToolDefinition[];
|
|
135
160
|
});
|
|
136
161
|
/**
|
|
137
162
|
* Connect to the Lokutor Voice Agent server
|
|
@@ -418,4 +443,4 @@ declare class BrowserAudioManager {
|
|
|
418
443
|
isRecording(): boolean;
|
|
419
444
|
}
|
|
420
445
|
|
|
421
|
-
export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
|
|
446
|
+
export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
|
package/dist/index.d.ts
CHANGED
|
@@ -97,6 +97,28 @@ interface Viseme {
|
|
|
97
97
|
c: string;
|
|
98
98
|
t: number;
|
|
99
99
|
}
|
|
100
|
+
/**
|
|
101
|
+
* Tool definition for LLM function calling (OpenAI format)
|
|
102
|
+
*/
|
|
103
|
+
interface ToolDefinition {
|
|
104
|
+
type: 'function';
|
|
105
|
+
function: {
|
|
106
|
+
name: string;
|
|
107
|
+
description: string;
|
|
108
|
+
parameters: {
|
|
109
|
+
type: 'object';
|
|
110
|
+
properties: Record<string, any>;
|
|
111
|
+
required?: string[];
|
|
112
|
+
};
|
|
113
|
+
};
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Event data for tool execution
|
|
117
|
+
*/
|
|
118
|
+
interface ToolCall {
|
|
119
|
+
name: string;
|
|
120
|
+
arguments: string;
|
|
121
|
+
}
|
|
100
122
|
|
|
101
123
|
/**
|
|
102
124
|
* Main client for Lokutor Voice Agent SDK
|
|
@@ -109,6 +131,7 @@ declare class VoiceAgentClient {
|
|
|
109
131
|
prompt: string;
|
|
110
132
|
voice: VoiceStyle;
|
|
111
133
|
language: Language;
|
|
134
|
+
tools: ToolDefinition[];
|
|
112
135
|
private onTranscription?;
|
|
113
136
|
private onResponse?;
|
|
114
137
|
private onAudioCallback?;
|
|
@@ -121,6 +144,7 @@ declare class VoiceAgentClient {
|
|
|
121
144
|
private wantVisemes;
|
|
122
145
|
private audioManager;
|
|
123
146
|
private enableAudio;
|
|
147
|
+
private currentGeneration;
|
|
124
148
|
private isUserDisconnect;
|
|
125
149
|
private reconnecting;
|
|
126
150
|
private reconnectAttempts;
|
|
@@ -132,6 +156,7 @@ declare class VoiceAgentClient {
|
|
|
132
156
|
visemes?: boolean;
|
|
133
157
|
onVisemes?: (visemes: Viseme[]) => void;
|
|
134
158
|
enableAudio?: boolean;
|
|
159
|
+
tools?: ToolDefinition[];
|
|
135
160
|
});
|
|
136
161
|
/**
|
|
137
162
|
* Connect to the Lokutor Voice Agent server
|
|
@@ -418,4 +443,4 @@ declare class BrowserAudioManager {
|
|
|
418
443
|
isRecording(): boolean;
|
|
419
444
|
}
|
|
420
445
|
|
|
421
|
-
export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
|
|
446
|
+
export { AUDIO_CONFIG, type AnalyserConfig, type BrowserAudioConfig, BrowserAudioManager, type BrowserAudioOptions, DEFAULT_URLS, Language, type LokutorConfig, StreamResampler, type SynthesizeOptions, TTSClient, type ToolCall, type ToolDefinition, type Viseme, VoiceAgentClient, type VoiceAgentOptions, VoiceStyle, applyLowPassFilter, bytesToPcm16, calculateRMS, float32ToPcm16, normalizeAudio, pcm16ToBytes, pcm16ToFloat32, resample, resampleWithAntiAliasing, simpleConversation, simpleTTS };
|
package/dist/index.js
CHANGED
|
@@ -506,6 +506,7 @@ var VoiceAgentClient = class {
|
|
|
506
506
|
prompt;
|
|
507
507
|
voice;
|
|
508
508
|
language;
|
|
509
|
+
tools = [];
|
|
509
510
|
// Callbacks
|
|
510
511
|
onTranscription;
|
|
511
512
|
onResponse;
|
|
@@ -519,6 +520,7 @@ var VoiceAgentClient = class {
|
|
|
519
520
|
wantVisemes = false;
|
|
520
521
|
audioManager = null;
|
|
521
522
|
enableAudio = false;
|
|
523
|
+
currentGeneration = 0;
|
|
522
524
|
// Connection resilience
|
|
523
525
|
isUserDisconnect = false;
|
|
524
526
|
reconnecting = false;
|
|
@@ -537,6 +539,7 @@ var VoiceAgentClient = class {
|
|
|
537
539
|
this.onError = config.onError;
|
|
538
540
|
this.wantVisemes = config.visemes || false;
|
|
539
541
|
this.enableAudio = config.enableAudio ?? false;
|
|
542
|
+
this.tools = config.tools || [];
|
|
540
543
|
}
|
|
541
544
|
/**
|
|
542
545
|
* Connect to the Lokutor Voice Agent server
|
|
@@ -617,7 +620,10 @@ var VoiceAgentClient = class {
|
|
|
617
620
|
this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
|
|
618
621
|
this.ws.send(JSON.stringify({ type: "language", data: this.language }));
|
|
619
622
|
this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
|
|
620
|
-
|
|
623
|
+
if (this.tools && this.tools.length > 0) {
|
|
624
|
+
this.ws.send(JSON.stringify({ type: "tools", data: this.tools }));
|
|
625
|
+
}
|
|
626
|
+
console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}, tools=${this.tools.length}`);
|
|
621
627
|
}
|
|
622
628
|
/**
|
|
623
629
|
* Send raw PCM audio data to the server
|
|
@@ -631,7 +637,11 @@ var VoiceAgentClient = class {
|
|
|
631
637
|
/**
|
|
632
638
|
* Handle incoming binary data (audio response)
|
|
633
639
|
*/
|
|
634
|
-
handleBinaryMessage(data) {
|
|
640
|
+
handleBinaryMessage(data, generation) {
|
|
641
|
+
if (generation !== void 0 && generation < this.currentGeneration) {
|
|
642
|
+
console.log(`\u{1F5D1}\uFE0F Discarding ghost audio (Gen ${generation} < ${this.currentGeneration})`);
|
|
643
|
+
return;
|
|
644
|
+
}
|
|
635
645
|
if (this.audioManager) {
|
|
636
646
|
this.audioManager.playAudio(data);
|
|
637
647
|
}
|
|
@@ -647,7 +657,7 @@ var VoiceAgentClient = class {
|
|
|
647
657
|
case "audio":
|
|
648
658
|
if (msg.data) {
|
|
649
659
|
const buffer = base64ToUint8Array(msg.data);
|
|
650
|
-
this.handleBinaryMessage(buffer);
|
|
660
|
+
this.handleBinaryMessage(buffer, msg.generation);
|
|
651
661
|
}
|
|
652
662
|
break;
|
|
653
663
|
case "transcript":
|
|
@@ -666,6 +676,14 @@ var VoiceAgentClient = class {
|
|
|
666
676
|
}
|
|
667
677
|
break;
|
|
668
678
|
case "status":
|
|
679
|
+
if (msg.data === "thinking") {
|
|
680
|
+
const newGen = msg.generation || 0;
|
|
681
|
+
if (newGen > this.currentGeneration) {
|
|
682
|
+
console.log(`\u{1F9E0} New thought (Gen ${newGen}) - Clearing audio queue`);
|
|
683
|
+
this.currentGeneration = newGen;
|
|
684
|
+
if (this.audioManager) this.audioManager.stopPlayback();
|
|
685
|
+
}
|
|
686
|
+
}
|
|
669
687
|
if (msg.data === "interrupted" && this.audioManager) {
|
|
670
688
|
this.audioManager.stopPlayback();
|
|
671
689
|
}
|
|
@@ -687,6 +705,9 @@ var VoiceAgentClient = class {
|
|
|
687
705
|
if (this.onError) this.onError(msg.data);
|
|
688
706
|
console.error(`\u274C Server error: ${msg.data}`);
|
|
689
707
|
break;
|
|
708
|
+
case "tool_call":
|
|
709
|
+
console.log(`\u{1F6E0}\uFE0F Tool Call: ${msg.name}(${msg.arguments})`);
|
|
710
|
+
break;
|
|
690
711
|
}
|
|
691
712
|
} catch (e) {
|
|
692
713
|
}
|
package/dist/index.mjs
CHANGED
|
@@ -462,6 +462,7 @@ var VoiceAgentClient = class {
|
|
|
462
462
|
prompt;
|
|
463
463
|
voice;
|
|
464
464
|
language;
|
|
465
|
+
tools = [];
|
|
465
466
|
// Callbacks
|
|
466
467
|
onTranscription;
|
|
467
468
|
onResponse;
|
|
@@ -475,6 +476,7 @@ var VoiceAgentClient = class {
|
|
|
475
476
|
wantVisemes = false;
|
|
476
477
|
audioManager = null;
|
|
477
478
|
enableAudio = false;
|
|
479
|
+
currentGeneration = 0;
|
|
478
480
|
// Connection resilience
|
|
479
481
|
isUserDisconnect = false;
|
|
480
482
|
reconnecting = false;
|
|
@@ -493,6 +495,7 @@ var VoiceAgentClient = class {
|
|
|
493
495
|
this.onError = config.onError;
|
|
494
496
|
this.wantVisemes = config.visemes || false;
|
|
495
497
|
this.enableAudio = config.enableAudio ?? false;
|
|
498
|
+
this.tools = config.tools || [];
|
|
496
499
|
}
|
|
497
500
|
/**
|
|
498
501
|
* Connect to the Lokutor Voice Agent server
|
|
@@ -573,7 +576,10 @@ var VoiceAgentClient = class {
|
|
|
573
576
|
this.ws.send(JSON.stringify({ type: "voice", data: this.voice }));
|
|
574
577
|
this.ws.send(JSON.stringify({ type: "language", data: this.language }));
|
|
575
578
|
this.ws.send(JSON.stringify({ type: "visemes", data: this.wantVisemes }));
|
|
576
|
-
|
|
579
|
+
if (this.tools && this.tools.length > 0) {
|
|
580
|
+
this.ws.send(JSON.stringify({ type: "tools", data: this.tools }));
|
|
581
|
+
}
|
|
582
|
+
console.log(`\u2699\uFE0F Configured: voice=${this.voice}, language=${this.language}, visemes=${this.wantVisemes}, tools=${this.tools.length}`);
|
|
577
583
|
}
|
|
578
584
|
/**
|
|
579
585
|
* Send raw PCM audio data to the server
|
|
@@ -587,7 +593,11 @@ var VoiceAgentClient = class {
|
|
|
587
593
|
/**
|
|
588
594
|
* Handle incoming binary data (audio response)
|
|
589
595
|
*/
|
|
590
|
-
handleBinaryMessage(data) {
|
|
596
|
+
handleBinaryMessage(data, generation) {
|
|
597
|
+
if (generation !== void 0 && generation < this.currentGeneration) {
|
|
598
|
+
console.log(`\u{1F5D1}\uFE0F Discarding ghost audio (Gen ${generation} < ${this.currentGeneration})`);
|
|
599
|
+
return;
|
|
600
|
+
}
|
|
591
601
|
if (this.audioManager) {
|
|
592
602
|
this.audioManager.playAudio(data);
|
|
593
603
|
}
|
|
@@ -603,7 +613,7 @@ var VoiceAgentClient = class {
|
|
|
603
613
|
case "audio":
|
|
604
614
|
if (msg.data) {
|
|
605
615
|
const buffer = base64ToUint8Array(msg.data);
|
|
606
|
-
this.handleBinaryMessage(buffer);
|
|
616
|
+
this.handleBinaryMessage(buffer, msg.generation);
|
|
607
617
|
}
|
|
608
618
|
break;
|
|
609
619
|
case "transcript":
|
|
@@ -622,6 +632,14 @@ var VoiceAgentClient = class {
|
|
|
622
632
|
}
|
|
623
633
|
break;
|
|
624
634
|
case "status":
|
|
635
|
+
if (msg.data === "thinking") {
|
|
636
|
+
const newGen = msg.generation || 0;
|
|
637
|
+
if (newGen > this.currentGeneration) {
|
|
638
|
+
console.log(`\u{1F9E0} New thought (Gen ${newGen}) - Clearing audio queue`);
|
|
639
|
+
this.currentGeneration = newGen;
|
|
640
|
+
if (this.audioManager) this.audioManager.stopPlayback();
|
|
641
|
+
}
|
|
642
|
+
}
|
|
625
643
|
if (msg.data === "interrupted" && this.audioManager) {
|
|
626
644
|
this.audioManager.stopPlayback();
|
|
627
645
|
}
|
|
@@ -643,6 +661,9 @@ var VoiceAgentClient = class {
|
|
|
643
661
|
if (this.onError) this.onError(msg.data);
|
|
644
662
|
console.error(`\u274C Server error: ${msg.data}`);
|
|
645
663
|
break;
|
|
664
|
+
case "tool_call":
|
|
665
|
+
console.log(`\u{1F6E0}\uFE0F Tool Call: ${msg.name}(${msg.arguments})`);
|
|
666
|
+
break;
|
|
646
667
|
}
|
|
647
668
|
} catch (e) {
|
|
648
669
|
}
|