@lokutor/sdk 1.0.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/dist/index.d.mts +23 -12
- package/dist/index.d.ts +23 -12
- package/dist/index.js +79 -41
- package/dist/index.mjs +79 -41
- package/package.json +3 -6
package/README.md
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# Lokutor JavaScript SDK
|
|
2
2
|
|
|
3
|
+

|
|
4
|
+
|
|
3
5
|
[](https://www.npmjs.com/package/@lokutor/sdk)
|
|
4
6
|
[](https://opensource.org/licenses/MIT)
|
|
5
7
|
|
package/dist/index.d.mts
CHANGED
|
@@ -28,6 +28,7 @@ declare enum Language {
|
|
|
28
28
|
*/
|
|
29
29
|
declare const AUDIO_CONFIG: {
|
|
30
30
|
SAMPLE_RATE: number;
|
|
31
|
+
SPEAKER_SAMPLE_RATE: number;
|
|
31
32
|
CHANNELS: number;
|
|
32
33
|
CHUNK_DURATION_MS: number;
|
|
33
34
|
readonly CHUNK_SIZE: number;
|
|
@@ -44,10 +45,9 @@ declare const DEFAULT_URLS: {
|
|
|
44
45
|
*/
|
|
45
46
|
interface LokutorConfig {
|
|
46
47
|
apiKey: string;
|
|
47
|
-
serverUrl?: string;
|
|
48
48
|
onTranscription?: (text: string) => void;
|
|
49
49
|
onResponse?: (text: string) => void;
|
|
50
|
-
onAudio?: (data:
|
|
50
|
+
onAudio?: (data: Uint8Array) => void;
|
|
51
51
|
onStatus?: (status: string) => void;
|
|
52
52
|
onError?: (error: any) => void;
|
|
53
53
|
}
|
|
@@ -71,7 +71,6 @@ interface SynthesizeOptions {
|
|
|
71
71
|
declare class VoiceAgentClient {
|
|
72
72
|
private ws;
|
|
73
73
|
private apiKey;
|
|
74
|
-
private serverUrl;
|
|
75
74
|
prompt: string;
|
|
76
75
|
voice: VoiceStyle;
|
|
77
76
|
language: Language;
|
|
@@ -81,6 +80,7 @@ declare class VoiceAgentClient {
|
|
|
81
80
|
private onStatus?;
|
|
82
81
|
private onError?;
|
|
83
82
|
private isConnected;
|
|
83
|
+
private messages;
|
|
84
84
|
constructor(config: LokutorConfig & {
|
|
85
85
|
prompt: string;
|
|
86
86
|
voice?: VoiceStyle;
|
|
@@ -98,7 +98,7 @@ declare class VoiceAgentClient {
|
|
|
98
98
|
* Send raw PCM audio data to the server
|
|
99
99
|
* @param audioData Int16 PCM audio buffer
|
|
100
100
|
*/
|
|
101
|
-
sendAudio(audioData:
|
|
101
|
+
sendAudio(audioData: Uint8Array): void;
|
|
102
102
|
/**
|
|
103
103
|
* Handle incoming binary data (audio response)
|
|
104
104
|
*/
|
|
@@ -109,24 +109,35 @@ declare class VoiceAgentClient {
|
|
|
109
109
|
private handleTextMessage;
|
|
110
110
|
private audioListeners;
|
|
111
111
|
private emit;
|
|
112
|
-
onAudio(callback: (data:
|
|
112
|
+
onAudio(callback: (data: Uint8Array) => void): void;
|
|
113
113
|
/**
|
|
114
114
|
* Disconnect from the server
|
|
115
115
|
*/
|
|
116
116
|
disconnect(): void;
|
|
117
|
+
/**
|
|
118
|
+
* Update the system prompt mid-conversation
|
|
119
|
+
*/
|
|
120
|
+
updatePrompt(newPrompt: string): void;
|
|
121
|
+
/**
|
|
122
|
+
* Get full conversation transcript
|
|
123
|
+
*/
|
|
124
|
+
getTranscript(): Array<{
|
|
125
|
+
role: 'user' | 'agent';
|
|
126
|
+
text: string;
|
|
127
|
+
timestamp: number;
|
|
128
|
+
}>;
|
|
129
|
+
/**
|
|
130
|
+
* Get conversation as formatted text
|
|
131
|
+
*/
|
|
132
|
+
getTranscriptText(): string;
|
|
117
133
|
}
|
|
118
134
|
/**
|
|
119
135
|
* Client for standalone Text-to-Speech synthesis
|
|
120
136
|
*/
|
|
121
137
|
declare class TTSClient {
|
|
122
138
|
private apiKey;
|
|
123
|
-
private serverUrl;
|
|
124
|
-
private onAudioCallback?;
|
|
125
|
-
private onVisemesCallback?;
|
|
126
|
-
private onErrorCallback?;
|
|
127
139
|
constructor(config: {
|
|
128
140
|
apiKey: string;
|
|
129
|
-
serverUrl?: string;
|
|
130
141
|
});
|
|
131
142
|
/**
|
|
132
143
|
* Synthesize text to speech
|
|
@@ -141,7 +152,7 @@ declare class TTSClient {
|
|
|
141
152
|
speed?: number;
|
|
142
153
|
steps?: number;
|
|
143
154
|
visemes?: boolean;
|
|
144
|
-
onAudio?: (data:
|
|
155
|
+
onAudio?: (data: Uint8Array) => void;
|
|
145
156
|
onVisemes?: (visemes: any[]) => void;
|
|
146
157
|
onError?: (error: any) => void;
|
|
147
158
|
}): Promise<void>;
|
|
@@ -157,7 +168,7 @@ declare function simpleConversation(config: LokutorConfig & {
|
|
|
157
168
|
*/
|
|
158
169
|
declare function simpleTTS(options: SynthesizeOptions & {
|
|
159
170
|
apiKey: string;
|
|
160
|
-
onAudio: (buf:
|
|
171
|
+
onAudio: (buf: Uint8Array) => void;
|
|
161
172
|
}): Promise<void>;
|
|
162
173
|
|
|
163
174
|
export { AUDIO_CONFIG, DEFAULT_URLS, Language, type LokutorConfig, type SynthesizeOptions, TTSClient, VoiceAgentClient, VoiceStyle, simpleConversation, simpleTTS };
|
package/dist/index.d.ts
CHANGED
|
@@ -28,6 +28,7 @@ declare enum Language {
|
|
|
28
28
|
*/
|
|
29
29
|
declare const AUDIO_CONFIG: {
|
|
30
30
|
SAMPLE_RATE: number;
|
|
31
|
+
SPEAKER_SAMPLE_RATE: number;
|
|
31
32
|
CHANNELS: number;
|
|
32
33
|
CHUNK_DURATION_MS: number;
|
|
33
34
|
readonly CHUNK_SIZE: number;
|
|
@@ -44,10 +45,9 @@ declare const DEFAULT_URLS: {
|
|
|
44
45
|
*/
|
|
45
46
|
interface LokutorConfig {
|
|
46
47
|
apiKey: string;
|
|
47
|
-
serverUrl?: string;
|
|
48
48
|
onTranscription?: (text: string) => void;
|
|
49
49
|
onResponse?: (text: string) => void;
|
|
50
|
-
onAudio?: (data:
|
|
50
|
+
onAudio?: (data: Uint8Array) => void;
|
|
51
51
|
onStatus?: (status: string) => void;
|
|
52
52
|
onError?: (error: any) => void;
|
|
53
53
|
}
|
|
@@ -71,7 +71,6 @@ interface SynthesizeOptions {
|
|
|
71
71
|
declare class VoiceAgentClient {
|
|
72
72
|
private ws;
|
|
73
73
|
private apiKey;
|
|
74
|
-
private serverUrl;
|
|
75
74
|
prompt: string;
|
|
76
75
|
voice: VoiceStyle;
|
|
77
76
|
language: Language;
|
|
@@ -81,6 +80,7 @@ declare class VoiceAgentClient {
|
|
|
81
80
|
private onStatus?;
|
|
82
81
|
private onError?;
|
|
83
82
|
private isConnected;
|
|
83
|
+
private messages;
|
|
84
84
|
constructor(config: LokutorConfig & {
|
|
85
85
|
prompt: string;
|
|
86
86
|
voice?: VoiceStyle;
|
|
@@ -98,7 +98,7 @@ declare class VoiceAgentClient {
|
|
|
98
98
|
* Send raw PCM audio data to the server
|
|
99
99
|
* @param audioData Int16 PCM audio buffer
|
|
100
100
|
*/
|
|
101
|
-
sendAudio(audioData:
|
|
101
|
+
sendAudio(audioData: Uint8Array): void;
|
|
102
102
|
/**
|
|
103
103
|
* Handle incoming binary data (audio response)
|
|
104
104
|
*/
|
|
@@ -109,24 +109,35 @@ declare class VoiceAgentClient {
|
|
|
109
109
|
private handleTextMessage;
|
|
110
110
|
private audioListeners;
|
|
111
111
|
private emit;
|
|
112
|
-
onAudio(callback: (data:
|
|
112
|
+
onAudio(callback: (data: Uint8Array) => void): void;
|
|
113
113
|
/**
|
|
114
114
|
* Disconnect from the server
|
|
115
115
|
*/
|
|
116
116
|
disconnect(): void;
|
|
117
|
+
/**
|
|
118
|
+
* Update the system prompt mid-conversation
|
|
119
|
+
*/
|
|
120
|
+
updatePrompt(newPrompt: string): void;
|
|
121
|
+
/**
|
|
122
|
+
* Get full conversation transcript
|
|
123
|
+
*/
|
|
124
|
+
getTranscript(): Array<{
|
|
125
|
+
role: 'user' | 'agent';
|
|
126
|
+
text: string;
|
|
127
|
+
timestamp: number;
|
|
128
|
+
}>;
|
|
129
|
+
/**
|
|
130
|
+
* Get conversation as formatted text
|
|
131
|
+
*/
|
|
132
|
+
getTranscriptText(): string;
|
|
117
133
|
}
|
|
118
134
|
/**
|
|
119
135
|
* Client for standalone Text-to-Speech synthesis
|
|
120
136
|
*/
|
|
121
137
|
declare class TTSClient {
|
|
122
138
|
private apiKey;
|
|
123
|
-
private serverUrl;
|
|
124
|
-
private onAudioCallback?;
|
|
125
|
-
private onVisemesCallback?;
|
|
126
|
-
private onErrorCallback?;
|
|
127
139
|
constructor(config: {
|
|
128
140
|
apiKey: string;
|
|
129
|
-
serverUrl?: string;
|
|
130
141
|
});
|
|
131
142
|
/**
|
|
132
143
|
* Synthesize text to speech
|
|
@@ -141,7 +152,7 @@ declare class TTSClient {
|
|
|
141
152
|
speed?: number;
|
|
142
153
|
steps?: number;
|
|
143
154
|
visemes?: boolean;
|
|
144
|
-
onAudio?: (data:
|
|
155
|
+
onAudio?: (data: Uint8Array) => void;
|
|
145
156
|
onVisemes?: (visemes: any[]) => void;
|
|
146
157
|
onError?: (error: any) => void;
|
|
147
158
|
}): Promise<void>;
|
|
@@ -157,7 +168,7 @@ declare function simpleConversation(config: LokutorConfig & {
|
|
|
157
168
|
*/
|
|
158
169
|
declare function simpleTTS(options: SynthesizeOptions & {
|
|
159
170
|
apiKey: string;
|
|
160
|
-
onAudio: (buf:
|
|
171
|
+
onAudio: (buf: Uint8Array) => void;
|
|
161
172
|
}): Promise<void>;
|
|
162
173
|
|
|
163
174
|
export { AUDIO_CONFIG, DEFAULT_URLS, Language, type LokutorConfig, type SynthesizeOptions, TTSClient, VoiceAgentClient, VoiceStyle, simpleConversation, simpleTTS };
|
package/dist/index.js
CHANGED
|
@@ -55,6 +55,7 @@ var Language = /* @__PURE__ */ ((Language2) => {
|
|
|
55
55
|
})(Language || {});
|
|
56
56
|
var AUDIO_CONFIG = {
|
|
57
57
|
SAMPLE_RATE: 44100,
|
|
58
|
+
SPEAKER_SAMPLE_RATE: 16e3,
|
|
58
59
|
CHANNELS: 1,
|
|
59
60
|
CHUNK_DURATION_MS: 20,
|
|
60
61
|
get CHUNK_SIZE() {
|
|
@@ -67,11 +68,17 @@ var DEFAULT_URLS = {
|
|
|
67
68
|
};
|
|
68
69
|
|
|
69
70
|
// src/client.ts
|
|
70
|
-
|
|
71
|
+
function base64ToUint8Array(base64) {
|
|
72
|
+
const binaryString = atob(base64);
|
|
73
|
+
const bytes = new Uint8Array(binaryString.length);
|
|
74
|
+
for (let i = 0; i < binaryString.length; i++) {
|
|
75
|
+
bytes[i] = binaryString.charCodeAt(i);
|
|
76
|
+
}
|
|
77
|
+
return bytes;
|
|
78
|
+
}
|
|
71
79
|
var VoiceAgentClient = class {
|
|
72
80
|
ws = null;
|
|
73
81
|
apiKey;
|
|
74
|
-
serverUrl;
|
|
75
82
|
prompt;
|
|
76
83
|
voice;
|
|
77
84
|
language;
|
|
@@ -82,9 +89,9 @@ var VoiceAgentClient = class {
|
|
|
82
89
|
onStatus;
|
|
83
90
|
onError;
|
|
84
91
|
isConnected = false;
|
|
92
|
+
messages = [];
|
|
85
93
|
constructor(config) {
|
|
86
94
|
this.apiKey = config.apiKey;
|
|
87
|
-
this.serverUrl = config.serverUrl || DEFAULT_URLS.VOICE_AGENT;
|
|
88
95
|
this.prompt = config.prompt;
|
|
89
96
|
this.voice = config.voice || "F1" /* F1 */;
|
|
90
97
|
this.language = config.language || "en" /* ENGLISH */;
|
|
@@ -100,36 +107,36 @@ var VoiceAgentClient = class {
|
|
|
100
107
|
async connect() {
|
|
101
108
|
return new Promise((resolve, reject) => {
|
|
102
109
|
try {
|
|
103
|
-
|
|
104
|
-
const headers = {};
|
|
110
|
+
let url = DEFAULT_URLS.VOICE_AGENT;
|
|
105
111
|
if (this.apiKey) {
|
|
106
|
-
|
|
112
|
+
const separator = url.includes("?") ? "&" : "?";
|
|
113
|
+
url += `${separator}api_key=${this.apiKey}`;
|
|
107
114
|
}
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
this.ws.
|
|
115
|
+
console.log(`\u{1F517} Connecting to ${DEFAULT_URLS.VOICE_AGENT}...`);
|
|
116
|
+
this.ws = new WebSocket(url);
|
|
117
|
+
this.ws.binaryType = "arraybuffer";
|
|
118
|
+
this.ws.onopen = () => {
|
|
112
119
|
this.isConnected = true;
|
|
113
120
|
console.log("\u2705 Connected to voice agent!");
|
|
114
121
|
this.sendConfig();
|
|
115
122
|
resolve(true);
|
|
116
|
-
}
|
|
117
|
-
this.ws.
|
|
118
|
-
if (
|
|
119
|
-
this.handleBinaryMessage(data);
|
|
123
|
+
};
|
|
124
|
+
this.ws.onmessage = async (event) => {
|
|
125
|
+
if (event.data instanceof ArrayBuffer) {
|
|
126
|
+
this.handleBinaryMessage(new Uint8Array(event.data));
|
|
120
127
|
} else {
|
|
121
|
-
this.handleTextMessage(data.toString());
|
|
128
|
+
this.handleTextMessage(event.data.toString());
|
|
122
129
|
}
|
|
123
|
-
}
|
|
124
|
-
this.ws.
|
|
130
|
+
};
|
|
131
|
+
this.ws.onerror = (err) => {
|
|
125
132
|
console.error("\u274C WebSocket error:", err);
|
|
126
133
|
if (this.onError) this.onError(err);
|
|
127
134
|
if (!this.isConnected) reject(err);
|
|
128
|
-
}
|
|
129
|
-
this.ws.
|
|
135
|
+
};
|
|
136
|
+
this.ws.onclose = () => {
|
|
130
137
|
this.isConnected = false;
|
|
131
138
|
console.log("Disconnected");
|
|
132
|
-
}
|
|
139
|
+
};
|
|
133
140
|
} catch (err) {
|
|
134
141
|
if (this.onError) this.onError(err);
|
|
135
142
|
reject(err);
|
|
@@ -152,7 +159,7 @@ var VoiceAgentClient = class {
|
|
|
152
159
|
*/
|
|
153
160
|
sendAudio(audioData) {
|
|
154
161
|
if (this.ws && this.isConnected) {
|
|
155
|
-
this.ws.send(audioData
|
|
162
|
+
this.ws.send(audioData);
|
|
156
163
|
}
|
|
157
164
|
}
|
|
158
165
|
/**
|
|
@@ -170,11 +177,17 @@ var VoiceAgentClient = class {
|
|
|
170
177
|
switch (msg.type) {
|
|
171
178
|
case "audio":
|
|
172
179
|
if (msg.data) {
|
|
173
|
-
const buffer =
|
|
180
|
+
const buffer = base64ToUint8Array(msg.data);
|
|
174
181
|
this.handleBinaryMessage(buffer);
|
|
175
182
|
}
|
|
176
183
|
break;
|
|
177
184
|
case "transcript":
|
|
185
|
+
const role = msg.role === "user" ? "user" : "agent";
|
|
186
|
+
this.messages.push({
|
|
187
|
+
role,
|
|
188
|
+
text: msg.data,
|
|
189
|
+
timestamp: Date.now()
|
|
190
|
+
});
|
|
178
191
|
if (msg.role === "user") {
|
|
179
192
|
if (this.onTranscription) this.onTranscription(msg.data);
|
|
180
193
|
console.log(`\u{1F4AC} You: ${msg.data}`);
|
|
@@ -220,16 +233,39 @@ var VoiceAgentClient = class {
|
|
|
220
233
|
this.ws = null;
|
|
221
234
|
}
|
|
222
235
|
}
|
|
236
|
+
/**
|
|
237
|
+
* Update the system prompt mid-conversation
|
|
238
|
+
*/
|
|
239
|
+
updatePrompt(newPrompt) {
|
|
240
|
+
this.prompt = newPrompt;
|
|
241
|
+
if (this.ws && this.isConnected) {
|
|
242
|
+
try {
|
|
243
|
+
this.ws.send(JSON.stringify({ type: "prompt", data: newPrompt }));
|
|
244
|
+
console.log(`\u2699\uFE0F Updated prompt: ${newPrompt.substring(0, 50)}...`);
|
|
245
|
+
} catch (error) {
|
|
246
|
+
console.error("Error updating prompt:", error);
|
|
247
|
+
}
|
|
248
|
+
} else {
|
|
249
|
+
console.warn("Not connected - prompt will be updated on next connection");
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
/**
|
|
253
|
+
* Get full conversation transcript
|
|
254
|
+
*/
|
|
255
|
+
getTranscript() {
|
|
256
|
+
return this.messages.slice();
|
|
257
|
+
}
|
|
258
|
+
/**
|
|
259
|
+
* Get conversation as formatted text
|
|
260
|
+
*/
|
|
261
|
+
getTranscriptText() {
|
|
262
|
+
return this.messages.map((msg) => `${msg.role === "user" ? "You" : "Agent"}: ${msg.text}`).join("\n");
|
|
263
|
+
}
|
|
223
264
|
};
|
|
224
265
|
var TTSClient = class {
|
|
225
266
|
apiKey;
|
|
226
|
-
serverUrl;
|
|
227
|
-
onAudioCallback;
|
|
228
|
-
onVisemesCallback;
|
|
229
|
-
onErrorCallback;
|
|
230
267
|
constructor(config) {
|
|
231
268
|
this.apiKey = config.apiKey;
|
|
232
|
-
this.serverUrl = config.serverUrl || DEFAULT_URLS.TTS;
|
|
233
269
|
}
|
|
234
270
|
/**
|
|
235
271
|
* Synthesize text to speech
|
|
@@ -240,12 +276,14 @@ var TTSClient = class {
|
|
|
240
276
|
synthesize(options) {
|
|
241
277
|
return new Promise((resolve, reject) => {
|
|
242
278
|
try {
|
|
243
|
-
|
|
279
|
+
let url = DEFAULT_URLS.TTS;
|
|
244
280
|
if (this.apiKey) {
|
|
245
|
-
|
|
281
|
+
const separator = url.includes("?") ? "&" : "?";
|
|
282
|
+
url += `${separator}api_key=${this.apiKey}`;
|
|
246
283
|
}
|
|
247
|
-
const ws = new
|
|
248
|
-
ws.
|
|
284
|
+
const ws = new WebSocket(url);
|
|
285
|
+
ws.binaryType = "arraybuffer";
|
|
286
|
+
ws.onopen = () => {
|
|
249
287
|
const req = {
|
|
250
288
|
text: options.text,
|
|
251
289
|
voice: options.voice || "F1" /* F1 */,
|
|
@@ -255,27 +293,27 @@ var TTSClient = class {
|
|
|
255
293
|
visemes: options.visemes || false
|
|
256
294
|
};
|
|
257
295
|
ws.send(JSON.stringify(req));
|
|
258
|
-
}
|
|
259
|
-
ws.
|
|
260
|
-
if (
|
|
261
|
-
if (options.onAudio) options.onAudio(data);
|
|
296
|
+
};
|
|
297
|
+
ws.onmessage = async (event) => {
|
|
298
|
+
if (event.data instanceof ArrayBuffer) {
|
|
299
|
+
if (options.onAudio) options.onAudio(new Uint8Array(event.data));
|
|
262
300
|
} else {
|
|
263
301
|
try {
|
|
264
|
-
const msg = JSON.parse(data.toString());
|
|
302
|
+
const msg = JSON.parse(event.data.toString());
|
|
265
303
|
if (Array.isArray(msg) && options.onVisemes) {
|
|
266
304
|
options.onVisemes(msg);
|
|
267
305
|
}
|
|
268
306
|
} catch (e) {
|
|
269
307
|
}
|
|
270
308
|
}
|
|
271
|
-
}
|
|
272
|
-
ws.
|
|
309
|
+
};
|
|
310
|
+
ws.onerror = (err) => {
|
|
273
311
|
if (options.onError) options.onError(err);
|
|
274
312
|
reject(err);
|
|
275
|
-
}
|
|
276
|
-
ws.
|
|
313
|
+
};
|
|
314
|
+
ws.onclose = () => {
|
|
277
315
|
resolve();
|
|
278
|
-
}
|
|
316
|
+
};
|
|
279
317
|
} catch (err) {
|
|
280
318
|
if (options.onError) options.onError(err);
|
|
281
319
|
reject(err);
|
package/dist/index.mjs
CHANGED
|
@@ -22,6 +22,7 @@ var Language = /* @__PURE__ */ ((Language2) => {
|
|
|
22
22
|
})(Language || {});
|
|
23
23
|
var AUDIO_CONFIG = {
|
|
24
24
|
SAMPLE_RATE: 44100,
|
|
25
|
+
SPEAKER_SAMPLE_RATE: 16e3,
|
|
25
26
|
CHANNELS: 1,
|
|
26
27
|
CHUNK_DURATION_MS: 20,
|
|
27
28
|
get CHUNK_SIZE() {
|
|
@@ -34,11 +35,17 @@ var DEFAULT_URLS = {
|
|
|
34
35
|
};
|
|
35
36
|
|
|
36
37
|
// src/client.ts
|
|
37
|
-
|
|
38
|
+
function base64ToUint8Array(base64) {
|
|
39
|
+
const binaryString = atob(base64);
|
|
40
|
+
const bytes = new Uint8Array(binaryString.length);
|
|
41
|
+
for (let i = 0; i < binaryString.length; i++) {
|
|
42
|
+
bytes[i] = binaryString.charCodeAt(i);
|
|
43
|
+
}
|
|
44
|
+
return bytes;
|
|
45
|
+
}
|
|
38
46
|
var VoiceAgentClient = class {
|
|
39
47
|
ws = null;
|
|
40
48
|
apiKey;
|
|
41
|
-
serverUrl;
|
|
42
49
|
prompt;
|
|
43
50
|
voice;
|
|
44
51
|
language;
|
|
@@ -49,9 +56,9 @@ var VoiceAgentClient = class {
|
|
|
49
56
|
onStatus;
|
|
50
57
|
onError;
|
|
51
58
|
isConnected = false;
|
|
59
|
+
messages = [];
|
|
52
60
|
constructor(config) {
|
|
53
61
|
this.apiKey = config.apiKey;
|
|
54
|
-
this.serverUrl = config.serverUrl || DEFAULT_URLS.VOICE_AGENT;
|
|
55
62
|
this.prompt = config.prompt;
|
|
56
63
|
this.voice = config.voice || "F1" /* F1 */;
|
|
57
64
|
this.language = config.language || "en" /* ENGLISH */;
|
|
@@ -67,36 +74,36 @@ var VoiceAgentClient = class {
|
|
|
67
74
|
async connect() {
|
|
68
75
|
return new Promise((resolve, reject) => {
|
|
69
76
|
try {
|
|
70
|
-
|
|
71
|
-
const headers = {};
|
|
77
|
+
let url = DEFAULT_URLS.VOICE_AGENT;
|
|
72
78
|
if (this.apiKey) {
|
|
73
|
-
|
|
79
|
+
const separator = url.includes("?") ? "&" : "?";
|
|
80
|
+
url += `${separator}api_key=${this.apiKey}`;
|
|
74
81
|
}
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
this.ws.
|
|
82
|
+
console.log(`\u{1F517} Connecting to ${DEFAULT_URLS.VOICE_AGENT}...`);
|
|
83
|
+
this.ws = new WebSocket(url);
|
|
84
|
+
this.ws.binaryType = "arraybuffer";
|
|
85
|
+
this.ws.onopen = () => {
|
|
79
86
|
this.isConnected = true;
|
|
80
87
|
console.log("\u2705 Connected to voice agent!");
|
|
81
88
|
this.sendConfig();
|
|
82
89
|
resolve(true);
|
|
83
|
-
}
|
|
84
|
-
this.ws.
|
|
85
|
-
if (
|
|
86
|
-
this.handleBinaryMessage(data);
|
|
90
|
+
};
|
|
91
|
+
this.ws.onmessage = async (event) => {
|
|
92
|
+
if (event.data instanceof ArrayBuffer) {
|
|
93
|
+
this.handleBinaryMessage(new Uint8Array(event.data));
|
|
87
94
|
} else {
|
|
88
|
-
this.handleTextMessage(data.toString());
|
|
95
|
+
this.handleTextMessage(event.data.toString());
|
|
89
96
|
}
|
|
90
|
-
}
|
|
91
|
-
this.ws.
|
|
97
|
+
};
|
|
98
|
+
this.ws.onerror = (err) => {
|
|
92
99
|
console.error("\u274C WebSocket error:", err);
|
|
93
100
|
if (this.onError) this.onError(err);
|
|
94
101
|
if (!this.isConnected) reject(err);
|
|
95
|
-
}
|
|
96
|
-
this.ws.
|
|
102
|
+
};
|
|
103
|
+
this.ws.onclose = () => {
|
|
97
104
|
this.isConnected = false;
|
|
98
105
|
console.log("Disconnected");
|
|
99
|
-
}
|
|
106
|
+
};
|
|
100
107
|
} catch (err) {
|
|
101
108
|
if (this.onError) this.onError(err);
|
|
102
109
|
reject(err);
|
|
@@ -119,7 +126,7 @@ var VoiceAgentClient = class {
|
|
|
119
126
|
*/
|
|
120
127
|
sendAudio(audioData) {
|
|
121
128
|
if (this.ws && this.isConnected) {
|
|
122
|
-
this.ws.send(audioData
|
|
129
|
+
this.ws.send(audioData);
|
|
123
130
|
}
|
|
124
131
|
}
|
|
125
132
|
/**
|
|
@@ -137,11 +144,17 @@ var VoiceAgentClient = class {
|
|
|
137
144
|
switch (msg.type) {
|
|
138
145
|
case "audio":
|
|
139
146
|
if (msg.data) {
|
|
140
|
-
const buffer =
|
|
147
|
+
const buffer = base64ToUint8Array(msg.data);
|
|
141
148
|
this.handleBinaryMessage(buffer);
|
|
142
149
|
}
|
|
143
150
|
break;
|
|
144
151
|
case "transcript":
|
|
152
|
+
const role = msg.role === "user" ? "user" : "agent";
|
|
153
|
+
this.messages.push({
|
|
154
|
+
role,
|
|
155
|
+
text: msg.data,
|
|
156
|
+
timestamp: Date.now()
|
|
157
|
+
});
|
|
145
158
|
if (msg.role === "user") {
|
|
146
159
|
if (this.onTranscription) this.onTranscription(msg.data);
|
|
147
160
|
console.log(`\u{1F4AC} You: ${msg.data}`);
|
|
@@ -187,16 +200,39 @@ var VoiceAgentClient = class {
|
|
|
187
200
|
this.ws = null;
|
|
188
201
|
}
|
|
189
202
|
}
|
|
203
|
+
/**
|
|
204
|
+
* Update the system prompt mid-conversation
|
|
205
|
+
*/
|
|
206
|
+
updatePrompt(newPrompt) {
|
|
207
|
+
this.prompt = newPrompt;
|
|
208
|
+
if (this.ws && this.isConnected) {
|
|
209
|
+
try {
|
|
210
|
+
this.ws.send(JSON.stringify({ type: "prompt", data: newPrompt }));
|
|
211
|
+
console.log(`\u2699\uFE0F Updated prompt: ${newPrompt.substring(0, 50)}...`);
|
|
212
|
+
} catch (error) {
|
|
213
|
+
console.error("Error updating prompt:", error);
|
|
214
|
+
}
|
|
215
|
+
} else {
|
|
216
|
+
console.warn("Not connected - prompt will be updated on next connection");
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
/**
|
|
220
|
+
* Get full conversation transcript
|
|
221
|
+
*/
|
|
222
|
+
getTranscript() {
|
|
223
|
+
return this.messages.slice();
|
|
224
|
+
}
|
|
225
|
+
/**
|
|
226
|
+
* Get conversation as formatted text
|
|
227
|
+
*/
|
|
228
|
+
getTranscriptText() {
|
|
229
|
+
return this.messages.map((msg) => `${msg.role === "user" ? "You" : "Agent"}: ${msg.text}`).join("\n");
|
|
230
|
+
}
|
|
190
231
|
};
|
|
191
232
|
var TTSClient = class {
|
|
192
233
|
apiKey;
|
|
193
|
-
serverUrl;
|
|
194
|
-
onAudioCallback;
|
|
195
|
-
onVisemesCallback;
|
|
196
|
-
onErrorCallback;
|
|
197
234
|
constructor(config) {
|
|
198
235
|
this.apiKey = config.apiKey;
|
|
199
|
-
this.serverUrl = config.serverUrl || DEFAULT_URLS.TTS;
|
|
200
236
|
}
|
|
201
237
|
/**
|
|
202
238
|
* Synthesize text to speech
|
|
@@ -207,12 +243,14 @@ var TTSClient = class {
|
|
|
207
243
|
synthesize(options) {
|
|
208
244
|
return new Promise((resolve, reject) => {
|
|
209
245
|
try {
|
|
210
|
-
|
|
246
|
+
let url = DEFAULT_URLS.TTS;
|
|
211
247
|
if (this.apiKey) {
|
|
212
|
-
|
|
248
|
+
const separator = url.includes("?") ? "&" : "?";
|
|
249
|
+
url += `${separator}api_key=${this.apiKey}`;
|
|
213
250
|
}
|
|
214
|
-
const ws = new WebSocket(
|
|
215
|
-
ws.
|
|
251
|
+
const ws = new WebSocket(url);
|
|
252
|
+
ws.binaryType = "arraybuffer";
|
|
253
|
+
ws.onopen = () => {
|
|
216
254
|
const req = {
|
|
217
255
|
text: options.text,
|
|
218
256
|
voice: options.voice || "F1" /* F1 */,
|
|
@@ -222,27 +260,27 @@ var TTSClient = class {
|
|
|
222
260
|
visemes: options.visemes || false
|
|
223
261
|
};
|
|
224
262
|
ws.send(JSON.stringify(req));
|
|
225
|
-
}
|
|
226
|
-
ws.
|
|
227
|
-
if (
|
|
228
|
-
if (options.onAudio) options.onAudio(data);
|
|
263
|
+
};
|
|
264
|
+
ws.onmessage = async (event) => {
|
|
265
|
+
if (event.data instanceof ArrayBuffer) {
|
|
266
|
+
if (options.onAudio) options.onAudio(new Uint8Array(event.data));
|
|
229
267
|
} else {
|
|
230
268
|
try {
|
|
231
|
-
const msg = JSON.parse(data.toString());
|
|
269
|
+
const msg = JSON.parse(event.data.toString());
|
|
232
270
|
if (Array.isArray(msg) && options.onVisemes) {
|
|
233
271
|
options.onVisemes(msg);
|
|
234
272
|
}
|
|
235
273
|
} catch (e) {
|
|
236
274
|
}
|
|
237
275
|
}
|
|
238
|
-
}
|
|
239
|
-
ws.
|
|
276
|
+
};
|
|
277
|
+
ws.onerror = (err) => {
|
|
240
278
|
if (options.onError) options.onError(err);
|
|
241
279
|
reject(err);
|
|
242
|
-
}
|
|
243
|
-
ws.
|
|
280
|
+
};
|
|
281
|
+
ws.onclose = () => {
|
|
244
282
|
resolve();
|
|
245
|
-
}
|
|
283
|
+
};
|
|
246
284
|
} catch (err) {
|
|
247
285
|
if (options.onError) options.onError(err);
|
|
248
286
|
reject(err);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lokutor/sdk",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.1.1",
|
|
4
4
|
"description": "JavaScript/TypeScript SDK for Lokutor Real-time Voice AI",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"module": "./dist/index.mjs",
|
|
@@ -27,14 +27,11 @@
|
|
|
27
27
|
],
|
|
28
28
|
"author": "Lokutor AI",
|
|
29
29
|
"license": "MIT",
|
|
30
|
-
"dependencies": {
|
|
31
|
-
"ws": "^8.16.0"
|
|
32
|
-
},
|
|
30
|
+
"dependencies": {},
|
|
33
31
|
"devDependencies": {
|
|
34
32
|
"@types/node": "^20.10.0",
|
|
35
|
-
"@types/ws": "^8.5.10",
|
|
36
33
|
"tsup": "^8.0.1",
|
|
37
34
|
"typescript": "^5.3.2",
|
|
38
35
|
"vitest": "^1.0.1"
|
|
39
36
|
}
|
|
40
|
-
}
|
|
37
|
+
}
|