kugelaudio 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -13
- package/dist/index.d.mts +518 -26
- package/dist/index.d.ts +518 -26
- package/dist/index.js +864 -112
- package/dist/index.mjs +858 -112
- package/package.json +9 -8
- package/src/client.test.ts +548 -0
- package/src/client.ts +885 -103
- package/src/errors.ts +266 -18
- package/src/index.ts +17 -2
- package/src/types.ts +215 -8
- package/src/websocket.ts +38 -18
package/dist/index.js
CHANGED
|
@@ -22,12 +22,18 @@ var index_exports = {};
|
|
|
22
22
|
__export(index_exports, {
|
|
23
23
|
AuthenticationError: () => AuthenticationError,
|
|
24
24
|
ConnectionError: () => ConnectionError,
|
|
25
|
+
ErrorCodes: () => ErrorCodes,
|
|
25
26
|
InsufficientCreditsError: () => InsufficientCreditsError,
|
|
26
27
|
KugelAudio: () => KugelAudio,
|
|
27
28
|
KugelAudioError: () => KugelAudioError,
|
|
28
29
|
RateLimitError: () => RateLimitError,
|
|
29
30
|
ValidationError: () => ValidationError,
|
|
31
|
+
WsCloseCodes: () => WsCloseCodes,
|
|
30
32
|
base64ToArrayBuffer: () => base64ToArrayBuffer,
|
|
33
|
+
classifyHttpError: () => classifyHttpError,
|
|
34
|
+
classifyWsClose: () => classifyWsClose,
|
|
35
|
+
classifyWsFrame: () => classifyWsFrame,
|
|
36
|
+
classifyWsHandshakeError: () => classifyWsHandshakeError,
|
|
31
37
|
createWavBlob: () => createWavBlob,
|
|
32
38
|
createWavFile: () => createWavFile,
|
|
33
39
|
decodePCM16: () => decodePCM16
|
|
@@ -35,49 +41,192 @@ __export(index_exports, {
|
|
|
35
41
|
module.exports = __toCommonJS(index_exports);
|
|
36
42
|
|
|
37
43
|
// src/errors.ts
|
|
44
|
+
var ErrorCodes = {
|
|
45
|
+
UNAUTHORIZED: "UNAUTHORIZED",
|
|
46
|
+
RATE_LIMITED: "RATE_LIMITED",
|
|
47
|
+
INSUFFICIENT_CREDITS: "INSUFFICIENT_CREDITS",
|
|
48
|
+
MODEL_UNAVAILABLE: "MODEL_UNAVAILABLE",
|
|
49
|
+
EMPTY_AUDIO: "EMPTY_AUDIO",
|
|
50
|
+
VALIDATION: "VALIDATION_ERROR",
|
|
51
|
+
INTERNAL: "INTERNAL_ERROR",
|
|
52
|
+
NOT_FOUND: "NOT_FOUND"
|
|
53
|
+
};
|
|
54
|
+
var WsCloseCodes = {
|
|
55
|
+
UNAUTHORIZED: 4001,
|
|
56
|
+
INSUFFICIENT_CREDITS: 4003,
|
|
57
|
+
RATE_LIMITED: 4029,
|
|
58
|
+
MODEL_UNAVAILABLE: 4500
|
|
59
|
+
};
|
|
60
|
+
var API_KEYS_URL = "https://app.kugelaudio.com/settings/api-keys";
|
|
61
|
+
var BILLING_URL = "https://app.kugelaudio.com/billing";
|
|
38
62
|
var KugelAudioError = class _KugelAudioError extends Error {
|
|
39
|
-
constructor(message,
|
|
40
|
-
super(message);
|
|
63
|
+
constructor(message, options = {}) {
|
|
64
|
+
super(options.requestId ? `${message} (request_id: ${options.requestId})` : message);
|
|
41
65
|
this.name = "KugelAudioError";
|
|
42
|
-
this.statusCode = statusCode;
|
|
66
|
+
this.statusCode = options.statusCode;
|
|
67
|
+
this.errorCode = options.errorCode;
|
|
68
|
+
this.requestId = options.requestId;
|
|
69
|
+
this.retryAfter = options.retryAfter;
|
|
43
70
|
Object.setPrototypeOf(this, _KugelAudioError.prototype);
|
|
44
71
|
}
|
|
45
72
|
};
|
|
46
73
|
var AuthenticationError = class _AuthenticationError extends KugelAudioError {
|
|
47
|
-
constructor(message =
|
|
48
|
-
super(
|
|
74
|
+
constructor(message, options = {}) {
|
|
75
|
+
super(
|
|
76
|
+
message ?? `KugelAudio rejected the API key. Check it is current at ${API_KEYS_URL}.`,
|
|
77
|
+
{ statusCode: 401, errorCode: ErrorCodes.UNAUTHORIZED, ...options }
|
|
78
|
+
);
|
|
49
79
|
this.name = "AuthenticationError";
|
|
50
80
|
Object.setPrototypeOf(this, _AuthenticationError.prototype);
|
|
51
81
|
}
|
|
52
82
|
};
|
|
53
83
|
var RateLimitError = class _RateLimitError extends KugelAudioError {
|
|
54
|
-
constructor(message =
|
|
55
|
-
|
|
84
|
+
constructor(message, options = {}) {
|
|
85
|
+
const msg = message ?? (options.retryAfter ? `KugelAudio rate limit hit; retry after ${options.retryAfter}s.` : "KugelAudio rate limit hit; retry shortly.");
|
|
86
|
+
super(msg, { statusCode: 429, errorCode: ErrorCodes.RATE_LIMITED, ...options });
|
|
56
87
|
this.name = "RateLimitError";
|
|
57
88
|
Object.setPrototypeOf(this, _RateLimitError.prototype);
|
|
58
89
|
}
|
|
59
90
|
};
|
|
60
91
|
var InsufficientCreditsError = class _InsufficientCreditsError extends KugelAudioError {
|
|
61
|
-
constructor(message =
|
|
62
|
-
super(
|
|
92
|
+
constructor(message, options = {}) {
|
|
93
|
+
super(
|
|
94
|
+
message ?? `Your KugelAudio account is out of credits. Top up at ${BILLING_URL}.`,
|
|
95
|
+
{ statusCode: 402, errorCode: ErrorCodes.INSUFFICIENT_CREDITS, ...options }
|
|
96
|
+
);
|
|
63
97
|
this.name = "InsufficientCreditsError";
|
|
64
98
|
Object.setPrototypeOf(this, _InsufficientCreditsError.prototype);
|
|
65
99
|
}
|
|
66
100
|
};
|
|
67
101
|
var ValidationError = class _ValidationError extends KugelAudioError {
|
|
68
|
-
constructor(message) {
|
|
69
|
-
super(message, 400);
|
|
102
|
+
constructor(message, options = {}) {
|
|
103
|
+
super(message, { statusCode: 400, errorCode: ErrorCodes.VALIDATION, ...options });
|
|
70
104
|
this.name = "ValidationError";
|
|
71
105
|
Object.setPrototypeOf(this, _ValidationError.prototype);
|
|
72
106
|
}
|
|
73
107
|
};
|
|
74
108
|
var ConnectionError = class _ConnectionError extends KugelAudioError {
|
|
75
|
-
constructor(message =
|
|
76
|
-
super(message, 503);
|
|
109
|
+
constructor(message, options = {}) {
|
|
110
|
+
super(message, { statusCode: 503, ...options });
|
|
77
111
|
this.name = "ConnectionError";
|
|
78
112
|
Object.setPrototypeOf(this, _ConnectionError.prototype);
|
|
79
113
|
}
|
|
80
114
|
};
|
|
115
|
+
function build(status, errorCode, message, opts = {}) {
|
|
116
|
+
const common = { ...opts };
|
|
117
|
+
if (status !== void 0) common.statusCode = status;
|
|
118
|
+
if (errorCode !== void 0) common.errorCode = errorCode;
|
|
119
|
+
if (errorCode === ErrorCodes.UNAUTHORIZED || status === 401) {
|
|
120
|
+
return new AuthenticationError(message || void 0, common);
|
|
121
|
+
}
|
|
122
|
+
if (errorCode === ErrorCodes.INSUFFICIENT_CREDITS || status === 402) {
|
|
123
|
+
return new InsufficientCreditsError(message || void 0, common);
|
|
124
|
+
}
|
|
125
|
+
if (errorCode === ErrorCodes.RATE_LIMITED || status === 429) {
|
|
126
|
+
return new RateLimitError(message || void 0, common);
|
|
127
|
+
}
|
|
128
|
+
if (errorCode === ErrorCodes.VALIDATION || status === 400) {
|
|
129
|
+
return new ValidationError(message || "Request validation failed.", common);
|
|
130
|
+
}
|
|
131
|
+
if (errorCode === ErrorCodes.MODEL_UNAVAILABLE || status === 503) {
|
|
132
|
+
const detail = message || "service temporarily unavailable";
|
|
133
|
+
return new ConnectionError(
|
|
134
|
+
`KugelAudio is temporarily unavailable: ${detail}. Retry shortly.`,
|
|
135
|
+
common
|
|
136
|
+
);
|
|
137
|
+
}
|
|
138
|
+
return new KugelAudioError(message || `HTTP ${status}`, common);
|
|
139
|
+
}
|
|
140
|
+
function readHeader(headers, name) {
|
|
141
|
+
if (headers && typeof headers.get === "function") {
|
|
142
|
+
return headers.get(name) ?? void 0;
|
|
143
|
+
}
|
|
144
|
+
const rec = headers;
|
|
145
|
+
return rec[name] ?? rec[name.toLowerCase()] ?? void 0;
|
|
146
|
+
}
|
|
147
|
+
function classifyHttpError(status, bodyText, headers) {
|
|
148
|
+
let errorCode;
|
|
149
|
+
let message = "";
|
|
150
|
+
let retryAfter;
|
|
151
|
+
if (bodyText) {
|
|
152
|
+
try {
|
|
153
|
+
const body = JSON.parse(bodyText);
|
|
154
|
+
if (body && typeof body === "object") {
|
|
155
|
+
errorCode = typeof body.error_code === "string" ? body.error_code : void 0;
|
|
156
|
+
const msg = body.error ?? body.detail;
|
|
157
|
+
if (Array.isArray(msg)) {
|
|
158
|
+
message = msg.map((m) => String(m)).join("; ");
|
|
159
|
+
} else if (typeof msg === "string") {
|
|
160
|
+
message = msg;
|
|
161
|
+
}
|
|
162
|
+
if (typeof body.retry_after === "number") {
|
|
163
|
+
retryAfter = body.retry_after;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
} catch {
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
if (retryAfter === void 0) {
|
|
170
|
+
const header = readHeader(headers, "Retry-After") ?? readHeader(headers, "retry-after");
|
|
171
|
+
if (header) {
|
|
172
|
+
const n = Number(header);
|
|
173
|
+
if (Number.isFinite(n)) retryAfter = n;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
const requestId = readHeader(headers, "x-request-id") ?? readHeader(headers, "X-Request-Id");
|
|
177
|
+
if (!message) {
|
|
178
|
+
message = (bodyText || "").trim();
|
|
179
|
+
}
|
|
180
|
+
return build(status, errorCode, message, { requestId, retryAfter });
|
|
181
|
+
}
|
|
182
|
+
function classifyWsFrame(data) {
|
|
183
|
+
const errorCode = data.error_code;
|
|
184
|
+
const message = data.error ?? "Server reported an error.";
|
|
185
|
+
const retryAfter = typeof data.retry_after === "number" ? data.retry_after : void 0;
|
|
186
|
+
return build(void 0, errorCode, message, { retryAfter });
|
|
187
|
+
}
|
|
188
|
+
function classifyWsClose(code, reason) {
|
|
189
|
+
const reasonTxt = (reason ?? "").trim();
|
|
190
|
+
if (code === WsCloseCodes.UNAUTHORIZED) {
|
|
191
|
+
let msg = `KugelAudio rejected the API key. Check it is current at ${API_KEYS_URL}.`;
|
|
192
|
+
if (reasonTxt) msg = `${msg} (${reasonTxt})`;
|
|
193
|
+
return new AuthenticationError(msg);
|
|
194
|
+
}
|
|
195
|
+
if (code === WsCloseCodes.INSUFFICIENT_CREDITS) {
|
|
196
|
+
return new InsufficientCreditsError();
|
|
197
|
+
}
|
|
198
|
+
if (code === WsCloseCodes.RATE_LIMITED) {
|
|
199
|
+
return new RateLimitError();
|
|
200
|
+
}
|
|
201
|
+
if (code === WsCloseCodes.MODEL_UNAVAILABLE) {
|
|
202
|
+
const suffix = reasonTxt ? ` (${reasonTxt})` : "";
|
|
203
|
+
return new ConnectionError(
|
|
204
|
+
`KugelAudio model is temporarily unavailable. Retry shortly.${suffix}`
|
|
205
|
+
);
|
|
206
|
+
}
|
|
207
|
+
const detail = reasonTxt || "no reason given";
|
|
208
|
+
const codeStr = code !== void 0 ? ` (code ${code})` : "";
|
|
209
|
+
return new ConnectionError(
|
|
210
|
+
`KugelAudio WebSocket closed by server: ${detail}${codeStr}.`
|
|
211
|
+
);
|
|
212
|
+
}
|
|
213
|
+
function classifyWsHandshakeError(err) {
|
|
214
|
+
if (!err || typeof err !== "object") return null;
|
|
215
|
+
const e = err;
|
|
216
|
+
let status;
|
|
217
|
+
if (typeof e.statusCode === "number") {
|
|
218
|
+
status = e.statusCode;
|
|
219
|
+
}
|
|
220
|
+
if (status === void 0 && typeof e.message === "string") {
|
|
221
|
+
const m = e.message.match(/Unexpected server response:\s*(\d{3})/i);
|
|
222
|
+
if (m) status = Number(m[1]);
|
|
223
|
+
}
|
|
224
|
+
if (status === void 0) return null;
|
|
225
|
+
if (status === 403) {
|
|
226
|
+
return new AuthenticationError();
|
|
227
|
+
}
|
|
228
|
+
return build(status, void 0, typeof e.message === "string" ? e.message : "");
|
|
229
|
+
}
|
|
81
230
|
|
|
82
231
|
// src/utils.ts
|
|
83
232
|
function base64ToArrayBuffer(base64) {
|
|
@@ -137,33 +286,61 @@ function createWavBlob(audio, sampleRate) {
|
|
|
137
286
|
|
|
138
287
|
// src/websocket.ts
|
|
139
288
|
var _cachedWs = null;
|
|
289
|
+
function isNodeJs() {
|
|
290
|
+
return typeof process !== "undefined" && !!process.versions && typeof process.versions.node === "string";
|
|
291
|
+
}
|
|
140
292
|
function getWebSocket() {
|
|
141
293
|
if (_cachedWs) return _cachedWs;
|
|
294
|
+
if (isNodeJs()) {
|
|
295
|
+
try {
|
|
296
|
+
const _require = typeof require !== "undefined" ? require : Function('return typeof require !== "undefined" ? require : undefined')();
|
|
297
|
+
if (_require) {
|
|
298
|
+
const ws = _require("ws");
|
|
299
|
+
_cachedWs = ws.default || ws;
|
|
300
|
+
return _cachedWs;
|
|
301
|
+
}
|
|
302
|
+
} catch {
|
|
303
|
+
}
|
|
304
|
+
}
|
|
142
305
|
if (typeof globalThis !== "undefined" && typeof globalThis.WebSocket !== "undefined") {
|
|
143
306
|
_cachedWs = globalThis.WebSocket;
|
|
144
307
|
return _cachedWs;
|
|
145
308
|
}
|
|
146
|
-
try {
|
|
147
|
-
const _require = typeof require !== "undefined" ? require : Function('return typeof require !== "undefined" ? require : undefined')();
|
|
148
|
-
if (_require) {
|
|
149
|
-
const ws = _require("ws");
|
|
150
|
-
_cachedWs = ws.default || ws;
|
|
151
|
-
return _cachedWs;
|
|
152
|
-
}
|
|
153
|
-
} catch {
|
|
154
|
-
}
|
|
155
309
|
throw new Error(
|
|
156
310
|
'WebSocket not available. In Node.js, install the "ws" package: npm install ws'
|
|
157
311
|
);
|
|
158
312
|
}
|
|
159
313
|
|
|
160
314
|
// src/client.ts
|
|
161
|
-
var
|
|
315
|
+
var REGION_URLS = {
|
|
316
|
+
eu: "https://api.kugelaudio.com",
|
|
317
|
+
us: "https://us-api.kugelaudio.com",
|
|
318
|
+
global: "https://global-api.kugelaudio.com"
|
|
319
|
+
};
|
|
320
|
+
var REGION_PREFIXES = ["eu-", "us-", "global-"];
|
|
321
|
+
function parseApiKey(apiKey) {
|
|
322
|
+
for (const prefix of REGION_PREFIXES) {
|
|
323
|
+
if (apiKey.startsWith(prefix)) {
|
|
324
|
+
return { cleanKey: apiKey.slice(prefix.length), detectedRegion: prefix.slice(0, -1) };
|
|
325
|
+
}
|
|
326
|
+
}
|
|
327
|
+
return { cleanKey: apiKey };
|
|
328
|
+
}
|
|
162
329
|
function createWs(url) {
|
|
163
330
|
const WS = getWebSocket();
|
|
164
331
|
return new WS(url);
|
|
165
332
|
}
|
|
166
333
|
var WS_OPEN = 1;
|
|
334
|
+
var _languageWarningLogged = false;
|
|
335
|
+
function warnIfNoLanguage(language, normalize) {
|
|
336
|
+
const normEnabled = normalize === void 0 || normalize;
|
|
337
|
+
if (!language && normEnabled && !_languageWarningLogged) {
|
|
338
|
+
_languageWarningLogged = true;
|
|
339
|
+
console.warn(
|
|
340
|
+
"[KugelAudio] No 'language' set with normalization enabled \u2014 the server will auto-detect the language, adding ~60-150ms to TTFA. Set language (e.g., language: 'en') for optimal latency."
|
|
341
|
+
);
|
|
342
|
+
}
|
|
343
|
+
}
|
|
167
344
|
var ModelsResource = class {
|
|
168
345
|
constructor(client) {
|
|
169
346
|
this.client = client;
|
|
@@ -197,42 +374,177 @@ var VoicesResource = class {
|
|
|
197
374
|
params.set("include_public", String(options.includePublic));
|
|
198
375
|
}
|
|
199
376
|
if (options?.limit) params.set("limit", String(options.limit));
|
|
377
|
+
if (options?.offset) params.set("offset", String(options.offset));
|
|
200
378
|
const query = params.toString();
|
|
201
379
|
const path = query ? `/v1/voices?${query}` : "/v1/voices";
|
|
202
380
|
const response = await this.client.request("GET", path);
|
|
203
|
-
return
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
381
|
+
return {
|
|
382
|
+
voices: response.voices.map((v) => ({
|
|
383
|
+
id: v.id,
|
|
384
|
+
name: v.name,
|
|
385
|
+
description: v.description,
|
|
386
|
+
category: v.category,
|
|
387
|
+
sex: v.sex,
|
|
388
|
+
age: v.age,
|
|
389
|
+
supportedLanguages: v.supported_languages || [],
|
|
390
|
+
sampleText: v.sample_text,
|
|
391
|
+
avatarUrl: v.avatar_url,
|
|
392
|
+
sampleUrl: v.sample_url,
|
|
393
|
+
isPublic: v.is_public || false,
|
|
394
|
+
verified: v.verified || false
|
|
395
|
+
})),
|
|
396
|
+
total: response.total,
|
|
397
|
+
limit: response.limit,
|
|
398
|
+
offset: response.offset
|
|
399
|
+
};
|
|
217
400
|
}
|
|
218
401
|
/**
|
|
219
402
|
* Get a specific voice by ID.
|
|
220
403
|
*/
|
|
221
404
|
async get(voiceId) {
|
|
222
405
|
const v = await this.client.request("GET", `/v1/voices/${voiceId}`);
|
|
406
|
+
return this.mapVoiceDetail(v);
|
|
407
|
+
}
|
|
408
|
+
/**
|
|
409
|
+
* Create a new voice.
|
|
410
|
+
*/
|
|
411
|
+
async create(options) {
|
|
412
|
+
const metadata = {
|
|
413
|
+
name: options.name,
|
|
414
|
+
sex: options.sex,
|
|
415
|
+
description: options.description ?? "",
|
|
416
|
+
category: options.category ?? "conversational",
|
|
417
|
+
age: options.age ?? "middle_age",
|
|
418
|
+
quality: options.quality ?? "mid",
|
|
419
|
+
supported_languages: options.supportedLanguages ?? ["en"],
|
|
420
|
+
is_public: options.isPublic ?? false,
|
|
421
|
+
sample_text: options.sampleText ?? ""
|
|
422
|
+
};
|
|
423
|
+
const formData = new FormData();
|
|
424
|
+
formData.append(
|
|
425
|
+
"metadata",
|
|
426
|
+
new Blob([JSON.stringify(metadata)], { type: "application/json" })
|
|
427
|
+
);
|
|
428
|
+
if (options.referenceFiles) {
|
|
429
|
+
for (const file of options.referenceFiles) {
|
|
430
|
+
formData.append("files", file);
|
|
431
|
+
}
|
|
432
|
+
}
|
|
433
|
+
const v = await this.client.requestMultipart("POST", "/v1/voices", formData);
|
|
434
|
+
return this.mapVoiceDetail(v);
|
|
435
|
+
}
|
|
436
|
+
/**
|
|
437
|
+
* Update an existing voice. Only provided fields are updated.
|
|
438
|
+
*/
|
|
439
|
+
async update(voiceId, options) {
|
|
440
|
+
const payload = {};
|
|
441
|
+
if (options.name !== void 0) payload.name = options.name;
|
|
442
|
+
if (options.description !== void 0) payload.description = options.description;
|
|
443
|
+
if (options.category !== void 0) payload.category = options.category;
|
|
444
|
+
if (options.age !== void 0) payload.age = options.age;
|
|
445
|
+
if (options.sex !== void 0) payload.sex = options.sex;
|
|
446
|
+
if (options.quality !== void 0) payload.quality = options.quality;
|
|
447
|
+
if (options.supportedLanguages !== void 0) payload.supported_languages = options.supportedLanguages;
|
|
448
|
+
if (options.isPublic !== void 0) payload.is_public = options.isPublic;
|
|
449
|
+
if (options.sampleText !== void 0) payload.sample_text = options.sampleText;
|
|
450
|
+
const v = await this.client.request("PATCH", `/v1/voices/${voiceId}`, payload);
|
|
451
|
+
return this.mapVoiceDetail(v);
|
|
452
|
+
}
|
|
453
|
+
/**
|
|
454
|
+
* Delete a voice.
|
|
455
|
+
*/
|
|
456
|
+
async delete(voiceId) {
|
|
457
|
+
await this.client.request("DELETE", `/v1/voices/${voiceId}`);
|
|
458
|
+
}
|
|
459
|
+
// -- Reference management --
|
|
460
|
+
/**
|
|
461
|
+
* List reference audio files for a voice.
|
|
462
|
+
*/
|
|
463
|
+
async listReferences(voiceId) {
|
|
464
|
+
const response = await this.client.request(
|
|
465
|
+
"GET",
|
|
466
|
+
`/v1/voices/${voiceId}/references`
|
|
467
|
+
);
|
|
468
|
+
return response.references.map((r) => this.mapVoiceReference(r));
|
|
469
|
+
}
|
|
470
|
+
/**
|
|
471
|
+
* Upload a reference audio file to a voice.
|
|
472
|
+
*
|
|
473
|
+
* @param voiceId - Voice ID
|
|
474
|
+
* @param file - Audio file (File in browser, Blob in Node.js)
|
|
475
|
+
* @param referenceText - Optional transcript of the reference audio
|
|
476
|
+
*/
|
|
477
|
+
async addReference(voiceId, file, referenceText) {
|
|
478
|
+
const formData = new FormData();
|
|
479
|
+
formData.append("file", file);
|
|
480
|
+
if (referenceText) {
|
|
481
|
+
formData.append("reference_text", referenceText);
|
|
482
|
+
}
|
|
483
|
+
const r = await this.client.requestMultipart(
|
|
484
|
+
"POST",
|
|
485
|
+
`/v1/voices/${voiceId}/references`,
|
|
486
|
+
formData
|
|
487
|
+
);
|
|
488
|
+
return this.mapVoiceReference(r);
|
|
489
|
+
}
|
|
490
|
+
/**
|
|
491
|
+
* Delete a reference audio file from a voice.
|
|
492
|
+
*/
|
|
493
|
+
async deleteReference(voiceId, referenceId) {
|
|
494
|
+
await this.client.request(
|
|
495
|
+
"DELETE",
|
|
496
|
+
`/v1/voices/${voiceId}/references/${referenceId}`
|
|
497
|
+
);
|
|
498
|
+
}
|
|
499
|
+
// -- Publishing --
|
|
500
|
+
/**
|
|
501
|
+
* Request publication of a voice. Sets it as public and marks it
|
|
502
|
+
* as pending verification by an admin.
|
|
503
|
+
*/
|
|
504
|
+
async publish(voiceId) {
|
|
505
|
+
const v = await this.client.request("POST", `/v1/voices/${voiceId}/publish`);
|
|
506
|
+
return this.mapVoiceDetail(v);
|
|
507
|
+
}
|
|
508
|
+
// -- Sample generation --
|
|
509
|
+
/**
|
|
510
|
+
* Trigger sample audio generation for a voice.
|
|
511
|
+
*/
|
|
512
|
+
async generateSample(voiceId) {
|
|
513
|
+
const v = await this.client.request(
|
|
514
|
+
"POST",
|
|
515
|
+
`/v1/voices/${voiceId}/generate-sample`
|
|
516
|
+
);
|
|
517
|
+
return this.mapVoiceDetail(v);
|
|
518
|
+
}
|
|
519
|
+
// -- Helpers --
|
|
520
|
+
mapVoiceDetail(v) {
|
|
223
521
|
return {
|
|
224
522
|
id: v.id,
|
|
225
523
|
name: v.name,
|
|
226
|
-
description: v.description,
|
|
227
|
-
|
|
228
|
-
|
|
524
|
+
description: v.description ?? "",
|
|
525
|
+
generativeVoiceDescription: v.generative_voice_description ?? "",
|
|
526
|
+
supportedLanguages: v.supported_languages ?? [],
|
|
527
|
+
category: v.category ?? "cloned",
|
|
229
528
|
age: v.age,
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
529
|
+
sex: v.sex,
|
|
530
|
+
quality: v.quality ?? "mid",
|
|
531
|
+
isPublic: v.is_public ?? false,
|
|
532
|
+
verified: v.verified ?? false,
|
|
533
|
+
pendingVerification: v.pending_verification ?? false,
|
|
233
534
|
sampleUrl: v.sample_url,
|
|
234
|
-
|
|
235
|
-
|
|
535
|
+
avatarUrl: v.avatar_url,
|
|
536
|
+
sampleText: v.sample_text ?? ""
|
|
537
|
+
};
|
|
538
|
+
}
|
|
539
|
+
mapVoiceReference(r) {
|
|
540
|
+
return {
|
|
541
|
+
id: r.id,
|
|
542
|
+
voiceId: r.voice_id,
|
|
543
|
+
name: r.name ?? "",
|
|
544
|
+
referenceText: r.reference_text ?? "",
|
|
545
|
+
s3Path: r.s3_path ?? "",
|
|
546
|
+
audioUrl: r.audio_url,
|
|
547
|
+
isGenerated: r.is_generated ?? false
|
|
236
548
|
};
|
|
237
549
|
}
|
|
238
550
|
};
|
|
@@ -244,6 +556,7 @@ var TTSResource = class {
|
|
|
244
556
|
this.wsUrl = null;
|
|
245
557
|
this.pendingRequests = /* @__PURE__ */ new Map();
|
|
246
558
|
this.requestCounter = 0;
|
|
559
|
+
this.keepaliveTimer = null;
|
|
247
560
|
}
|
|
248
561
|
/**
|
|
249
562
|
* Pre-establish WebSocket connection for faster first request.
|
|
@@ -307,6 +620,63 @@ var TTSResource = class {
|
|
|
307
620
|
wordTimestamps: allTimestamps
|
|
308
621
|
};
|
|
309
622
|
}
|
|
623
|
+
/**
|
|
624
|
+
* Stream audio and return a Node.js Readable stream of raw PCM16 binary data.
|
|
625
|
+
*
|
|
626
|
+
* **Node.js only** — this method requires the `stream` built-in module and is
|
|
627
|
+
* intended for server-side integrations such as Vapi custom TTS endpoints,
|
|
628
|
+
* Express/Fastify handlers, or any pipeline that expects a Node.js `Readable`.
|
|
629
|
+
*
|
|
630
|
+
* Compared to manually wiring `onChunk` to a `Readable`, this method avoids
|
|
631
|
+
* a common race-condition: the stream object is created and returned **before**
|
|
632
|
+
* any chunks arrive, so the caller can safely pipe or attach listeners before
|
|
633
|
+
* the first audio byte is pushed.
|
|
634
|
+
*
|
|
635
|
+
* @example Vapi custom TTS endpoint
|
|
636
|
+
* ```typescript
|
|
637
|
+
* app.post('/synthesize', (req, res) => {
|
|
638
|
+
* res.setHeader('Content-Type', 'audio/pcm');
|
|
639
|
+
* res.setHeader('Transfer-Encoding', 'chunked');
|
|
640
|
+
*
|
|
641
|
+
* const readable = client.tts.toReadable({
|
|
642
|
+
* text: req.body.message.text,
|
|
643
|
+
* modelId: 'kugel-1-turbo',
|
|
644
|
+
* sampleRate: req.body.message.sampleRate,
|
|
645
|
+
* language: 'en',
|
|
646
|
+
* });
|
|
647
|
+
*
|
|
648
|
+
* readable.pipe(res);
|
|
649
|
+
* });
|
|
650
|
+
* ```
|
|
651
|
+
*
|
|
652
|
+
* @param options - TTS generation options (same as `stream()`)
|
|
653
|
+
* @param reuseConnection - Reuse the pooled WebSocket connection (default: true)
|
|
654
|
+
* @returns Node.js Readable stream emitting raw PCM16 binary Buffer chunks
|
|
655
|
+
*/
|
|
656
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
657
|
+
toReadable(options, reuseConnection = true) {
|
|
658
|
+
const { Readable } = require("stream");
|
|
659
|
+
const readable = new Readable({ read() {
|
|
660
|
+
} });
|
|
661
|
+
this.stream(
|
|
662
|
+
options,
|
|
663
|
+
{
|
|
664
|
+
onChunk: (chunk) => {
|
|
665
|
+
readable.push(Buffer.from(chunk.audio, "base64"));
|
|
666
|
+
},
|
|
667
|
+
onFinal: () => {
|
|
668
|
+
readable.push(null);
|
|
669
|
+
},
|
|
670
|
+
onError: (error) => {
|
|
671
|
+
readable.destroy(error);
|
|
672
|
+
}
|
|
673
|
+
},
|
|
674
|
+
reuseConnection
|
|
675
|
+
).catch((error) => {
|
|
676
|
+
readable.destroy(error);
|
|
677
|
+
});
|
|
678
|
+
return readable;
|
|
679
|
+
}
|
|
310
680
|
/**
|
|
311
681
|
* Build the WebSocket URL with appropriate auth param.
|
|
312
682
|
*/
|
|
@@ -348,10 +718,17 @@ var TTSResource = class {
|
|
|
348
718
|
this.wsConnection = ws;
|
|
349
719
|
this.wsUrl = url;
|
|
350
720
|
this.setupMessageHandler(ws);
|
|
721
|
+
this.startKeepalive(ws);
|
|
351
722
|
resolve(ws);
|
|
352
723
|
};
|
|
353
|
-
ws.onerror = () => {
|
|
354
|
-
|
|
724
|
+
ws.onerror = (event) => {
|
|
725
|
+
const underlying = event?.error ?? event;
|
|
726
|
+
const typed = classifyWsHandshakeError(underlying);
|
|
727
|
+
reject(
|
|
728
|
+
typed ?? new ConnectionError(
|
|
729
|
+
`Could not establish KugelAudio WebSocket connection to ${url}. Check network connectivity.`
|
|
730
|
+
)
|
|
731
|
+
);
|
|
355
732
|
};
|
|
356
733
|
});
|
|
357
734
|
}
|
|
@@ -366,7 +743,7 @@ var TTSResource = class {
|
|
|
366
743
|
const [requestId, pending] = [...this.pendingRequests.entries()][0] || [];
|
|
367
744
|
if (!pending) return;
|
|
368
745
|
if (data.error) {
|
|
369
|
-
const error = this.parseError(data
|
|
746
|
+
const error = this.parseError(data);
|
|
370
747
|
pending.callbacks.onError?.(error);
|
|
371
748
|
this.pendingRequests.delete(requestId);
|
|
372
749
|
pending.reject(error);
|
|
@@ -379,7 +756,6 @@ var TTSResource = class {
|
|
|
379
756
|
totalSamples: data.total_samples,
|
|
380
757
|
durationMs: data.dur_ms,
|
|
381
758
|
generationMs: data.gen_ms,
|
|
382
|
-
ttfaMs: data.ttfa_ms,
|
|
383
759
|
rtf: data.rtf,
|
|
384
760
|
error: data.error
|
|
385
761
|
};
|
|
@@ -416,20 +792,23 @@ var TTSResource = class {
|
|
|
416
792
|
}
|
|
417
793
|
};
|
|
418
794
|
ws.onclose = (event) => {
|
|
795
|
+
this.stopKeepalive();
|
|
419
796
|
this.wsConnection = null;
|
|
420
797
|
this.wsUrl = null;
|
|
421
798
|
for (const [id, pending] of this.pendingRequests) {
|
|
422
799
|
pending.callbacks.onClose?.();
|
|
423
|
-
if (event.code === 4001) {
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
pending.reject(
|
|
800
|
+
if (event.code === 4001 || event.code === 4003 || event.code === 4029 || event.code === 4500) {
|
|
801
|
+
const error = classifyWsClose(event.code, event.reason);
|
|
802
|
+
pending.callbacks.onError?.(error);
|
|
803
|
+
pending.reject(error);
|
|
427
804
|
}
|
|
428
805
|
this.pendingRequests.delete(id);
|
|
429
806
|
}
|
|
430
807
|
};
|
|
431
808
|
ws.onerror = () => {
|
|
432
|
-
const error = new
|
|
809
|
+
const error = new ConnectionError(
|
|
810
|
+
"KugelAudio WebSocket connection error. Check network connectivity."
|
|
811
|
+
);
|
|
433
812
|
for (const [id, pending] of this.pendingRequests) {
|
|
434
813
|
pending.callbacks.onError?.(error);
|
|
435
814
|
pending.reject(error);
|
|
@@ -455,6 +834,7 @@ var TTSResource = class {
|
|
|
455
834
|
* Stream with connection pooling (fast path).
|
|
456
835
|
*/
|
|
457
836
|
async streamWithPooling(options, callbacks) {
|
|
837
|
+
warnIfNoLanguage(options.language, options.normalize);
|
|
458
838
|
const ws = await this.getConnection();
|
|
459
839
|
const requestId = ++this.requestCounter;
|
|
460
840
|
return new Promise((resolve, reject) => {
|
|
@@ -465,11 +845,14 @@ var TTSResource = class {
|
|
|
465
845
|
model_id: options.modelId || "kugel-1-turbo",
|
|
466
846
|
voice_id: options.voiceId,
|
|
467
847
|
cfg_scale: options.cfgScale ?? 2,
|
|
848
|
+
...options.temperature !== void 0 && { temperature: options.temperature },
|
|
468
849
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
469
850
|
sample_rate: options.sampleRate ?? 24e3,
|
|
470
851
|
normalize: options.normalize ?? true,
|
|
471
852
|
...options.language && { language: options.language },
|
|
472
|
-
...options.wordTimestamps && { word_timestamps: true }
|
|
853
|
+
...options.wordTimestamps && { word_timestamps: true },
|
|
854
|
+
...options.speed !== void 0 && { speed: options.speed },
|
|
855
|
+
...options.projectId !== void 0 && { project_id: options.projectId }
|
|
473
856
|
}));
|
|
474
857
|
});
|
|
475
858
|
}
|
|
@@ -477,6 +860,7 @@ var TTSResource = class {
|
|
|
477
860
|
* Stream without connection pooling (original behavior).
|
|
478
861
|
*/
|
|
479
862
|
streamWithoutPooling(options, callbacks) {
|
|
863
|
+
warnIfNoLanguage(options.language, options.normalize);
|
|
480
864
|
return new Promise((resolve, reject) => {
|
|
481
865
|
const url = this.buildWsUrl();
|
|
482
866
|
const ws = createWs(url);
|
|
@@ -491,7 +875,9 @@ var TTSResource = class {
|
|
|
491
875
|
sample_rate: options.sampleRate ?? 24e3,
|
|
492
876
|
normalize: options.normalize ?? true,
|
|
493
877
|
...options.language && { language: options.language },
|
|
494
|
-
...options.wordTimestamps && { word_timestamps: true }
|
|
878
|
+
...options.wordTimestamps && { word_timestamps: true },
|
|
879
|
+
...options.speed !== void 0 && { speed: options.speed },
|
|
880
|
+
...options.projectId !== void 0 && { project_id: options.projectId }
|
|
495
881
|
}));
|
|
496
882
|
};
|
|
497
883
|
ws.onmessage = (event) => {
|
|
@@ -499,7 +885,7 @@ var TTSResource = class {
|
|
|
499
885
|
const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
500
886
|
const data = JSON.parse(messageData);
|
|
501
887
|
if (data.error) {
|
|
502
|
-
const error = this.parseError(data
|
|
888
|
+
const error = this.parseError(data);
|
|
503
889
|
callbacks.onError?.(error);
|
|
504
890
|
ws.close();
|
|
505
891
|
reject(error);
|
|
@@ -512,7 +898,6 @@ var TTSResource = class {
|
|
|
512
898
|
totalSamples: data.total_samples,
|
|
513
899
|
durationMs: data.dur_ms,
|
|
514
900
|
generationMs: data.gen_ms,
|
|
515
|
-
ttfaMs: data.ttfa_ms,
|
|
516
901
|
rtf: data.rtf,
|
|
517
902
|
error: data.error
|
|
518
903
|
};
|
|
@@ -548,25 +933,54 @@ var TTSResource = class {
|
|
|
548
933
|
console.error("Failed to parse WebSocket message:", e);
|
|
549
934
|
}
|
|
550
935
|
};
|
|
551
|
-
ws.onerror = () => {
|
|
552
|
-
const
|
|
936
|
+
ws.onerror = (event) => {
|
|
937
|
+
const underlying = event?.error ?? event;
|
|
938
|
+
const error = classifyWsHandshakeError(underlying) ?? new ConnectionError(
|
|
939
|
+
"KugelAudio WebSocket connection error. Check network connectivity."
|
|
940
|
+
);
|
|
553
941
|
callbacks.onError?.(error);
|
|
554
942
|
reject(error);
|
|
555
943
|
};
|
|
556
944
|
ws.onclose = (event) => {
|
|
557
945
|
callbacks.onClose?.();
|
|
558
|
-
if (event.code === 4001) {
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
reject(
|
|
946
|
+
if (event.code === 4001 || event.code === 4003 || event.code === 4029 || event.code === 4500) {
|
|
947
|
+
const error = classifyWsClose(event.code, event.reason);
|
|
948
|
+
callbacks.onError?.(error);
|
|
949
|
+
reject(error);
|
|
562
950
|
}
|
|
563
951
|
};
|
|
564
952
|
});
|
|
565
953
|
}
|
|
954
|
+
/**
|
|
955
|
+
* Start periodic keepalive pings on the pooled connection.
|
|
956
|
+
* Uses the ws package's ping() in Node.js; silently skips in browsers
|
|
957
|
+
* where WebSocket doesn't expose a ping method.
|
|
958
|
+
*/
|
|
959
|
+
startKeepalive(ws) {
|
|
960
|
+
this.stopKeepalive();
|
|
961
|
+
const intervalMs = this.client.keepalivePingInterval;
|
|
962
|
+
if (intervalMs == null || intervalMs <= 0) return;
|
|
963
|
+
this.keepaliveTimer = setInterval(() => {
|
|
964
|
+
if (this.wsConnection !== ws || ws.readyState !== WS_OPEN) {
|
|
965
|
+
this.stopKeepalive();
|
|
966
|
+
return;
|
|
967
|
+
}
|
|
968
|
+
if (typeof ws.ping === "function") {
|
|
969
|
+
ws.ping();
|
|
970
|
+
}
|
|
971
|
+
}, intervalMs);
|
|
972
|
+
}
|
|
973
|
+
stopKeepalive() {
|
|
974
|
+
if (this.keepaliveTimer !== null) {
|
|
975
|
+
clearInterval(this.keepaliveTimer);
|
|
976
|
+
this.keepaliveTimer = null;
|
|
977
|
+
}
|
|
978
|
+
}
|
|
566
979
|
/**
|
|
567
980
|
* Close the pooled WebSocket connection.
|
|
568
981
|
*/
|
|
569
982
|
close() {
|
|
983
|
+
this.stopKeepalive();
|
|
570
984
|
if (this.wsConnection) {
|
|
571
985
|
try {
|
|
572
986
|
this.wsConnection.close();
|
|
@@ -576,15 +990,39 @@ var TTSResource = class {
|
|
|
576
990
|
this.wsUrl = null;
|
|
577
991
|
}
|
|
578
992
|
}
|
|
579
|
-
parseError(
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
993
|
+
parseError(data) {
|
|
994
|
+
return classifyWsFrame(data);
|
|
995
|
+
}
|
|
996
|
+
/**
|
|
997
|
+
* Create a streaming session for LLM integration.
|
|
998
|
+
*
|
|
999
|
+
* The session connects to `/ws/tts/stream` and keeps a persistent
|
|
1000
|
+
* connection across multiple {@link StreamingSession.send} calls.
|
|
1001
|
+
* The server auto-chunks text at sentence boundaries — no client-side
|
|
1002
|
+
* flushing required.
|
|
1003
|
+
*
|
|
1004
|
+
* @param config - Session configuration (voice, model, chunking strategy).
|
|
1005
|
+
* @param callbacks - Callbacks for audio chunks and session lifecycle events.
|
|
1006
|
+
* @returns A {@link StreamingSession} instance. Call `.connect()` before sending.
|
|
1007
|
+
*
|
|
1008
|
+
* @example
|
|
1009
|
+
* ```typescript
|
|
1010
|
+
* const session = client.tts.streamingSession(
|
|
1011
|
+
* { voiceId: 123, autoMode: true, chunkLengthSchedule: [50, 100, 150, 250] },
|
|
1012
|
+
* { onChunk: (chunk) => playAudio(chunk.audio) },
|
|
1013
|
+
* );
|
|
1014
|
+
*
|
|
1015
|
+
* session.connect();
|
|
1016
|
+
*
|
|
1017
|
+
* for await (const token of llmStream) {
|
|
1018
|
+
* session.send(token);
|
|
1019
|
+
* }
|
|
1020
|
+
*
|
|
1021
|
+
* await session.close();
|
|
1022
|
+
* ```
|
|
1023
|
+
*/
|
|
1024
|
+
streamingSession(config, callbacks) {
|
|
1025
|
+
return new StreamingSession(this.client, config, callbacks);
|
|
588
1026
|
}
|
|
589
1027
|
/**
|
|
590
1028
|
* Create a multi-context session for concurrent TTS streams.
|
|
@@ -604,7 +1042,7 @@ var TTSResource = class {
|
|
|
604
1042
|
* console.log(`Audio from ${chunk.contextId}`);
|
|
605
1043
|
* playAudio(chunk.audio);
|
|
606
1044
|
* },
|
|
607
|
-
*
|
|
1045
|
+
* onContextClosed: (contextId) => {
|
|
608
1046
|
* console.log(`${contextId} finished`);
|
|
609
1047
|
* },
|
|
610
1048
|
* });
|
|
@@ -643,6 +1081,11 @@ var MultiContextSession = class {
|
|
|
643
1081
|
}
|
|
644
1082
|
/**
|
|
645
1083
|
* Connect to the multi-context WebSocket endpoint.
|
|
1084
|
+
*
|
|
1085
|
+
* The returned promise resolves once the WebSocket is OPEN so callers can
|
|
1086
|
+
* ``await session.connect(callbacks)`` before invoking
|
|
1087
|
+
* {@link createContext} / {@link send}. Pre-open errors reject with the
|
|
1088
|
+
* typed error.
|
|
646
1089
|
*/
|
|
647
1090
|
connect(callbacks) {
|
|
648
1091
|
this.callbacks = callbacks;
|
|
@@ -657,9 +1100,8 @@ var MultiContextSession = class {
|
|
|
657
1100
|
}
|
|
658
1101
|
const url = `${wsUrl}/ws/tts/multi?${authParam}=${this.client.apiKey}`;
|
|
659
1102
|
this.ws = createWs(url);
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
this.ws.onmessage = (event) => {
|
|
1103
|
+
const ws = this.ws;
|
|
1104
|
+
ws.onmessage = (event) => {
|
|
663
1105
|
try {
|
|
664
1106
|
const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
665
1107
|
const data = JSON.parse(messageData);
|
|
@@ -690,9 +1132,6 @@ var MultiContextSession = class {
|
|
|
690
1132
|
};
|
|
691
1133
|
this.callbacks.onChunk?.(chunk);
|
|
692
1134
|
}
|
|
693
|
-
if (data.is_final) {
|
|
694
|
-
this.callbacks.onContextFinal?.(data.context_id);
|
|
695
|
-
}
|
|
696
1135
|
if (data.context_closed) {
|
|
697
1136
|
this.contexts.delete(data.context_id);
|
|
698
1137
|
this.callbacks.onContextClosed?.(data.context_id);
|
|
@@ -708,19 +1147,38 @@ var MultiContextSession = class {
|
|
|
708
1147
|
console.error("Failed to parse WebSocket message:", e);
|
|
709
1148
|
}
|
|
710
1149
|
};
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
1150
|
+
return new Promise((resolve, reject) => {
|
|
1151
|
+
let opened = false;
|
|
1152
|
+
ws.onopen = () => {
|
|
1153
|
+
opened = true;
|
|
1154
|
+
resolve();
|
|
1155
|
+
};
|
|
1156
|
+
ws.onerror = (event) => {
|
|
1157
|
+
const underlying = event?.error ?? event;
|
|
1158
|
+
const err = classifyWsHandshakeError(underlying) ?? new ConnectionError(
|
|
1159
|
+
"KugelAudio multi-context WebSocket connection error. Check network connectivity."
|
|
1160
|
+
);
|
|
1161
|
+
if (!opened) reject(err);
|
|
1162
|
+
this.callbacks.onError?.(err);
|
|
1163
|
+
};
|
|
1164
|
+
ws.onclose = (event) => {
|
|
1165
|
+
let typedErr = null;
|
|
1166
|
+
if (event.code === 4001 || event.code === 4003 || event.code === 4029 || event.code === 4500) {
|
|
1167
|
+
typedErr = classifyWsClose(event.code, event.reason);
|
|
1168
|
+
this.callbacks.onError?.(typedErr);
|
|
1169
|
+
}
|
|
1170
|
+
if (!opened) {
|
|
1171
|
+
reject(
|
|
1172
|
+
typedErr ?? new ConnectionError(
|
|
1173
|
+
`KugelAudio multi-context WebSocket closed before ready (code ${event.code}).`
|
|
1174
|
+
)
|
|
1175
|
+
);
|
|
1176
|
+
}
|
|
1177
|
+
this.ws = null;
|
|
1178
|
+
this.isStarted = false;
|
|
1179
|
+
this.contexts.clear();
|
|
1180
|
+
};
|
|
1181
|
+
});
|
|
724
1182
|
}
|
|
725
1183
|
/**
|
|
726
1184
|
* Create a new context with optional voice settings.
|
|
@@ -734,10 +1192,13 @@ var MultiContextSession = class {
|
|
|
734
1192
|
context_id: contextId
|
|
735
1193
|
};
|
|
736
1194
|
if (!this.isStarted) {
|
|
1195
|
+
warnIfNoLanguage(this.config.language, this.config.normalize);
|
|
737
1196
|
if (this.config.sampleRate) msg.sample_rate = this.config.sampleRate;
|
|
738
1197
|
if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
|
|
1198
|
+
if (this.config.temperature !== void 0) msg.temperature = this.config.temperature;
|
|
739
1199
|
if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
|
|
740
1200
|
if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
|
|
1201
|
+
if (this.config.language) msg.language = this.config.language;
|
|
741
1202
|
if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
|
|
742
1203
|
}
|
|
743
1204
|
const voiceId = options?.voiceId || this.config.defaultVoiceId;
|
|
@@ -824,18 +1285,271 @@ var MultiContextSession = class {
|
|
|
824
1285
|
return this.ws !== null && this.ws.readyState === WS_OPEN;
|
|
825
1286
|
}
|
|
826
1287
|
};
|
|
1288
|
+
var StreamingSession = class {
|
|
1289
|
+
constructor(client, config, callbacks) {
|
|
1290
|
+
this.ws = null;
|
|
1291
|
+
this.configSent = false;
|
|
1292
|
+
this.client = client;
|
|
1293
|
+
this.config = config;
|
|
1294
|
+
this.callbacks = callbacks;
|
|
1295
|
+
}
|
|
1296
|
+
/**
|
|
1297
|
+
* Open the WebSocket connection and authenticate.
|
|
1298
|
+
*
|
|
1299
|
+
* The returned promise resolves once the WebSocket is OPEN, so callers can
|
|
1300
|
+
* ``await session.connect()`` and then ``send()`` without racing the
|
|
1301
|
+
* handshake. Pre-open errors (network failure, 4001 unauthorized, …) reject
|
|
1302
|
+
* the promise with the typed error.
|
|
1303
|
+
*/
|
|
1304
|
+
connect() {
|
|
1305
|
+
const wsUrl = this.client.ttsUrl.replace("https://", "wss://").replace("http://", "ws://");
|
|
1306
|
+
let authParam;
|
|
1307
|
+
if (this.client.isToken) {
|
|
1308
|
+
authParam = "token";
|
|
1309
|
+
} else if (this.client.isMasterKey) {
|
|
1310
|
+
authParam = "master_key";
|
|
1311
|
+
} else {
|
|
1312
|
+
authParam = "api_key";
|
|
1313
|
+
}
|
|
1314
|
+
const url = `${wsUrl}/ws/tts/stream?${authParam}=${this.client.apiKey}`;
|
|
1315
|
+
this.ws = createWs(url);
|
|
1316
|
+
const ws = this.ws;
|
|
1317
|
+
ws.onmessage = (event) => {
|
|
1318
|
+
try {
|
|
1319
|
+
const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
1320
|
+
const data = JSON.parse(messageData);
|
|
1321
|
+
if (data.error) {
|
|
1322
|
+
this.callbacks.onError?.(new KugelAudioError(data.error));
|
|
1323
|
+
return;
|
|
1324
|
+
}
|
|
1325
|
+
if (data.audio) {
|
|
1326
|
+
const chunk = {
|
|
1327
|
+
audio: data.audio,
|
|
1328
|
+
encoding: data.enc || "pcm_s16le",
|
|
1329
|
+
index: data.idx,
|
|
1330
|
+
sampleRate: data.sr,
|
|
1331
|
+
samples: data.samples
|
|
1332
|
+
};
|
|
1333
|
+
this.callbacks.onChunk?.(chunk);
|
|
1334
|
+
}
|
|
1335
|
+
if (data.word_timestamps) {
|
|
1336
|
+
const timestamps = data.word_timestamps.map((w) => ({
|
|
1337
|
+
word: w.word,
|
|
1338
|
+
startMs: w.start_ms,
|
|
1339
|
+
endMs: w.end_ms,
|
|
1340
|
+
charStart: w.char_start,
|
|
1341
|
+
charEnd: w.char_end,
|
|
1342
|
+
score: w.score ?? 1
|
|
1343
|
+
}));
|
|
1344
|
+
this.callbacks.onWordTimestamps?.(timestamps);
|
|
1345
|
+
}
|
|
1346
|
+
if (data.chunk_complete) {
|
|
1347
|
+
this.callbacks.onChunkComplete?.(
|
|
1348
|
+
data.chunk_id ?? 0,
|
|
1349
|
+
data.audio_seconds ?? 0,
|
|
1350
|
+
data.gen_ms ?? 0
|
|
1351
|
+
);
|
|
1352
|
+
}
|
|
1353
|
+
if (data.generation_started) {
|
|
1354
|
+
this.callbacks.onGenerationStarted?.(data.chunk_id ?? 0, data.text ?? "");
|
|
1355
|
+
}
|
|
1356
|
+
if (data.session_closed) {
|
|
1357
|
+
this.callbacks.onSessionClosed?.(
|
|
1358
|
+
data.total_audio_seconds ?? 0,
|
|
1359
|
+
data.total_text_chunks ?? 0,
|
|
1360
|
+
data.total_audio_chunks ?? 0
|
|
1361
|
+
);
|
|
1362
|
+
}
|
|
1363
|
+
} catch (e) {
|
|
1364
|
+
console.error("[KugelAudio] Failed to parse streaming session message:", e);
|
|
1365
|
+
}
|
|
1366
|
+
};
|
|
1367
|
+
return new Promise((resolve, reject) => {
|
|
1368
|
+
let opened = false;
|
|
1369
|
+
ws.onopen = () => {
|
|
1370
|
+
opened = true;
|
|
1371
|
+
resolve();
|
|
1372
|
+
};
|
|
1373
|
+
ws.onerror = (event) => {
|
|
1374
|
+
const underlying = event?.error ?? event;
|
|
1375
|
+
const err = classifyWsHandshakeError(underlying) ?? new ConnectionError(
|
|
1376
|
+
"KugelAudio streaming WebSocket connection error. Check network connectivity."
|
|
1377
|
+
);
|
|
1378
|
+
if (!opened) reject(err);
|
|
1379
|
+
this.callbacks.onError?.(err);
|
|
1380
|
+
};
|
|
1381
|
+
ws.onclose = (event) => {
|
|
1382
|
+
let typedErr = null;
|
|
1383
|
+
if (event.code === 4001 || event.code === 4003 || event.code === 4029 || event.code === 4500) {
|
|
1384
|
+
typedErr = classifyWsClose(event.code, event.reason);
|
|
1385
|
+
this.callbacks.onError?.(typedErr);
|
|
1386
|
+
}
|
|
1387
|
+
if (!opened) {
|
|
1388
|
+
reject(
|
|
1389
|
+
typedErr ?? new ConnectionError(
|
|
1390
|
+
`KugelAudio streaming WebSocket closed before ready (code ${event.code}).`
|
|
1391
|
+
)
|
|
1392
|
+
);
|
|
1393
|
+
}
|
|
1394
|
+
this.ws = null;
|
|
1395
|
+
this.configSent = false;
|
|
1396
|
+
};
|
|
1397
|
+
});
|
|
1398
|
+
}
|
|
1399
|
+
/**
|
|
1400
|
+
* Send a text chunk to the server (e.g. one LLM output token).
|
|
1401
|
+
*
|
|
1402
|
+
* The server buffers text across multiple calls and starts generating at
|
|
1403
|
+
* natural sentence boundaries automatically — no need to call `flush`.
|
|
1404
|
+
*
|
|
1405
|
+
* @param text - Raw text or LLM token to append to the server buffer.
|
|
1406
|
+
* @param flush - Force immediate generation of whatever is buffered.
|
|
1407
|
+
* **Avoid calling this per-sentence from the client.** Doing so bypasses
|
|
1408
|
+
* the server's semantic chunking, incurs a fresh model prefill cost on
|
|
1409
|
+
* every flush, and makes latency *worse*, not better. Let the server
|
|
1410
|
+
* handle chunking via `chunkLengthSchedule` / `autoMode` instead.
|
|
1411
|
+
*/
|
|
1412
|
+
send(text, flush = false) {
|
|
1413
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) {
|
|
1414
|
+
throw new KugelAudioError("StreamingSession not connected. Call connect() first.");
|
|
1415
|
+
}
|
|
1416
|
+
const msg = { text, flush };
|
|
1417
|
+
if (!this.configSent) {
|
|
1418
|
+
if (this.config.voiceId !== void 0) msg.voice_id = this.config.voiceId;
|
|
1419
|
+
if (this.config.modelId !== void 0) msg.model_id = this.config.modelId;
|
|
1420
|
+
if (this.config.cfgScale !== void 0) msg.cfg_scale = this.config.cfgScale;
|
|
1421
|
+
if (this.config.temperature !== void 0) msg.temperature = this.config.temperature;
|
|
1422
|
+
if (this.config.maxNewTokens !== void 0) msg.max_new_tokens = this.config.maxNewTokens;
|
|
1423
|
+
if (this.config.sampleRate !== void 0) msg.sample_rate = this.config.sampleRate;
|
|
1424
|
+
if (this.config.flushTimeoutMs !== void 0) msg.flush_timeout_ms = this.config.flushTimeoutMs;
|
|
1425
|
+
if (this.config.maxBufferLength !== void 0) msg.max_buffer_length = this.config.maxBufferLength;
|
|
1426
|
+
if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
|
|
1427
|
+
if (this.config.language !== void 0) msg.language = this.config.language;
|
|
1428
|
+
if (this.config.wordTimestamps) msg.word_timestamps = true;
|
|
1429
|
+
if (this.config.autoMode !== void 0) msg.auto_mode = this.config.autoMode;
|
|
1430
|
+
if (this.config.chunkLengthSchedule?.length) msg.chunk_length_schedule = this.config.chunkLengthSchedule;
|
|
1431
|
+
if (this.config.speed !== void 0) msg.speed = this.config.speed;
|
|
1432
|
+
this.configSent = true;
|
|
1433
|
+
}
|
|
1434
|
+
this.ws.send(JSON.stringify(msg));
|
|
1435
|
+
}
|
|
1436
|
+
/**
|
|
1437
|
+
* End the current session but keep the WebSocket connection open.
|
|
1438
|
+
*
|
|
1439
|
+
* This allows starting a new session on the same connection, avoiding
|
|
1440
|
+
* the overhead of a new WebSocket handshake (~200-300ms). After calling
|
|
1441
|
+
* this, optionally call {@link updateConfig} to change voice/model settings,
|
|
1442
|
+
* then call {@link send} to start the next session.
|
|
1443
|
+
*
|
|
1444
|
+
* The returned promise resolves once the server confirms with a
|
|
1445
|
+
* `session_closed` message, or after a 15 s **quiet** timeout — i.e. 15 s
|
|
1446
|
+
* elapse without *any* server message arriving. The timer resets on every
|
|
1447
|
+
* incoming frame so a long final flush that streams audio for tens of
|
|
1448
|
+
* seconds is not truncated; only a genuinely silent server trips the fuse.
|
|
1449
|
+
*/
|
|
1450
|
+
endSession() {
|
|
1451
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) return Promise.resolve();
|
|
1452
|
+
const ws = this.ws;
|
|
1453
|
+
const QUIET_TIMEOUT_MS = 15e3;
|
|
1454
|
+
return new Promise((resolve) => {
|
|
1455
|
+
let settled = false;
|
|
1456
|
+
let timer;
|
|
1457
|
+
const prevMessage = ws.onmessage;
|
|
1458
|
+
const prevClose = ws.onclose;
|
|
1459
|
+
const done = () => {
|
|
1460
|
+
if (settled) return;
|
|
1461
|
+
settled = true;
|
|
1462
|
+
clearTimeout(timer);
|
|
1463
|
+
ws.onmessage = prevMessage;
|
|
1464
|
+
ws.onclose = prevClose;
|
|
1465
|
+
this.configSent = false;
|
|
1466
|
+
resolve();
|
|
1467
|
+
};
|
|
1468
|
+
const armQuietTimer = () => {
|
|
1469
|
+
clearTimeout(timer);
|
|
1470
|
+
timer = setTimeout(done, QUIET_TIMEOUT_MS);
|
|
1471
|
+
};
|
|
1472
|
+
armQuietTimer();
|
|
1473
|
+
ws.onmessage = (event) => {
|
|
1474
|
+
armQuietTimer();
|
|
1475
|
+
if (prevMessage) prevMessage.call(ws, event);
|
|
1476
|
+
try {
|
|
1477
|
+
const raw = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
1478
|
+
if (JSON.parse(raw).session_closed) done();
|
|
1479
|
+
} catch {
|
|
1480
|
+
}
|
|
1481
|
+
};
|
|
1482
|
+
ws.onclose = (event) => {
|
|
1483
|
+
this.ws = null;
|
|
1484
|
+
if (prevClose) prevClose.call(ws, event);
|
|
1485
|
+
done();
|
|
1486
|
+
};
|
|
1487
|
+
ws.send(JSON.stringify({ close: true }));
|
|
1488
|
+
});
|
|
1489
|
+
}
|
|
1490
|
+
/**
|
|
1491
|
+
* Update session configuration for the next session.
|
|
1492
|
+
*
|
|
1493
|
+
* Call this after {@link endSession} and before the next {@link send}
|
|
1494
|
+
* to change voice, model, language, or other settings.
|
|
1495
|
+
*/
|
|
1496
|
+
updateConfig(config) {
|
|
1497
|
+
Object.assign(this.config, config);
|
|
1498
|
+
this.configSent = false;
|
|
1499
|
+
}
|
|
1500
|
+
/**
|
|
1501
|
+
* Close the session and the WebSocket connection.
|
|
1502
|
+
*
|
|
1503
|
+
* For session reuse without closing the connection, use
|
|
1504
|
+
* {@link endSession} instead.
|
|
1505
|
+
*
|
|
1506
|
+
* The returned promise resolves once the server confirms the close with a
|
|
1507
|
+
* `session_closed` message, or after a 15 s **quiet** timeout (no traffic
|
|
1508
|
+
* from the server in that window). Audio frames from the server-side
|
|
1509
|
+
* final-flush of the still-buffered text are delivered to your callbacks
|
|
1510
|
+
* before this promise resolves, and each frame resets the quiet timer.
|
|
1511
|
+
*/
|
|
1512
|
+
async close() {
|
|
1513
|
+
await this.endSession();
|
|
1514
|
+
if (this.ws) {
|
|
1515
|
+
try {
|
|
1516
|
+
this.ws.close();
|
|
1517
|
+
} catch {
|
|
1518
|
+
}
|
|
1519
|
+
this.ws = null;
|
|
1520
|
+
}
|
|
1521
|
+
}
|
|
1522
|
+
/** Whether the underlying WebSocket is open. */
|
|
1523
|
+
get isConnected() {
|
|
1524
|
+
return this.ws !== null && this.ws.readyState === WS_OPEN;
|
|
1525
|
+
}
|
|
1526
|
+
};
|
|
827
1527
|
var KugelAudio = class _KugelAudio {
|
|
828
1528
|
constructor(options) {
|
|
829
1529
|
if (!options.apiKey) {
|
|
830
|
-
throw new
|
|
1530
|
+
throw new ValidationError(
|
|
1531
|
+
"KugelAudio API key is missing. Set the KUGELAUDIO_API_KEY environment variable or pass { apiKey: ... } to the client. Get a key at https://app.kugelaudio.com/settings/api-keys."
|
|
1532
|
+
);
|
|
831
1533
|
}
|
|
832
|
-
|
|
1534
|
+
const { cleanKey, detectedRegion } = parseApiKey(options.apiKey);
|
|
1535
|
+
this._apiKey = cleanKey;
|
|
833
1536
|
this._isMasterKey = options.isMasterKey || false;
|
|
834
1537
|
this._isToken = options.isToken || false;
|
|
835
1538
|
this._orgId = options.orgId;
|
|
836
|
-
|
|
1539
|
+
if (options.apiUrl) {
|
|
1540
|
+
this._apiUrl = options.apiUrl.replace(/\/$/, "");
|
|
1541
|
+
} else {
|
|
1542
|
+
const effectiveRegion = options.region || detectedRegion || "eu";
|
|
1543
|
+
if (!(effectiveRegion in REGION_URLS)) {
|
|
1544
|
+
throw new ValidationError(
|
|
1545
|
+
`Invalid region '${effectiveRegion}'. Must be one of: ${Object.keys(REGION_URLS).join(", ")}.`
|
|
1546
|
+
);
|
|
1547
|
+
}
|
|
1548
|
+
this._apiUrl = REGION_URLS[effectiveRegion];
|
|
1549
|
+
}
|
|
837
1550
|
this._ttsUrl = (options.ttsUrl || this._apiUrl).replace(/\/$/, "");
|
|
838
1551
|
this._timeout = options.timeout || 6e4;
|
|
1552
|
+
this._keepalivePingInterval = options.keepalivePingInterval !== void 0 ? options.keepalivePingInterval : 2e4;
|
|
839
1553
|
this.models = new ModelsResource(this);
|
|
840
1554
|
this.voices = new VoicesResource(this);
|
|
841
1555
|
this.tts = new TTSResource(this);
|
|
@@ -881,6 +1595,10 @@ var KugelAudio = class _KugelAudio {
|
|
|
881
1595
|
get ttsUrl() {
|
|
882
1596
|
return this._ttsUrl;
|
|
883
1597
|
}
|
|
1598
|
+
/** Get keepalive ping interval in milliseconds, or null if disabled. */
|
|
1599
|
+
get keepalivePingInterval() {
|
|
1600
|
+
return this._keepalivePingInterval;
|
|
1601
|
+
}
|
|
884
1602
|
/**
|
|
885
1603
|
* Close the client and release resources.
|
|
886
1604
|
* This closes any pooled WebSocket connections.
|
|
@@ -935,25 +1653,49 @@ var KugelAudio = class _KugelAudio {
|
|
|
935
1653
|
signal: controller.signal
|
|
936
1654
|
});
|
|
937
1655
|
clearTimeout(timeoutId);
|
|
938
|
-
if (response.
|
|
939
|
-
|
|
1656
|
+
if (!response.ok) {
|
|
1657
|
+
const text = await response.text();
|
|
1658
|
+
throw classifyHttpError(response.status, text, response.headers);
|
|
940
1659
|
}
|
|
941
|
-
|
|
942
|
-
|
|
1660
|
+
return await response.json();
|
|
1661
|
+
} catch (error) {
|
|
1662
|
+
clearTimeout(timeoutId);
|
|
1663
|
+
if (error instanceof KugelAudioError) {
|
|
1664
|
+
throw error;
|
|
943
1665
|
}
|
|
944
|
-
if (
|
|
945
|
-
throw new
|
|
1666
|
+
if (error.name === "AbortError") {
|
|
1667
|
+
throw new ConnectionError(
|
|
1668
|
+
`Request to ${method} ${path} timed out after ${this._timeout}ms.`
|
|
1669
|
+
);
|
|
946
1670
|
}
|
|
1671
|
+
throw new ConnectionError(
|
|
1672
|
+
`Could not reach KugelAudio at ${url}: ${error.message}. Check network connectivity.`
|
|
1673
|
+
);
|
|
1674
|
+
}
|
|
1675
|
+
}
|
|
1676
|
+
/**
|
|
1677
|
+
* Make a multipart/form-data request (for file uploads).
|
|
1678
|
+
* @internal Used by VoicesResource for reference file uploads.
|
|
1679
|
+
*/
|
|
1680
|
+
async requestMultipart(method, path, formData) {
|
|
1681
|
+
const url = `${this._apiUrl}${path}`;
|
|
1682
|
+
const headers = {
|
|
1683
|
+
"X-API-Key": this._apiKey,
|
|
1684
|
+
"Authorization": `Bearer ${this._apiKey}`
|
|
1685
|
+
};
|
|
1686
|
+
const controller = new AbortController();
|
|
1687
|
+
const timeoutId = setTimeout(() => controller.abort(), this._timeout);
|
|
1688
|
+
try {
|
|
1689
|
+
const response = await fetch(url, {
|
|
1690
|
+
method,
|
|
1691
|
+
headers,
|
|
1692
|
+
body: formData,
|
|
1693
|
+
signal: controller.signal
|
|
1694
|
+
});
|
|
1695
|
+
clearTimeout(timeoutId);
|
|
947
1696
|
if (!response.ok) {
|
|
948
1697
|
const text = await response.text();
|
|
949
|
-
|
|
950
|
-
try {
|
|
951
|
-
const json = JSON.parse(text);
|
|
952
|
-
message = json.detail || json.error || message;
|
|
953
|
-
} catch {
|
|
954
|
-
message = text || message;
|
|
955
|
-
}
|
|
956
|
-
throw new KugelAudioError(message, response.status);
|
|
1698
|
+
throw classifyHttpError(response.status, text, response.headers);
|
|
957
1699
|
}
|
|
958
1700
|
return await response.json();
|
|
959
1701
|
} catch (error) {
|
|
@@ -962,9 +1704,13 @@ var KugelAudio = class _KugelAudio {
|
|
|
962
1704
|
throw error;
|
|
963
1705
|
}
|
|
964
1706
|
if (error.name === "AbortError") {
|
|
965
|
-
throw new
|
|
1707
|
+
throw new ConnectionError(
|
|
1708
|
+
`Request to ${method} ${path} timed out after ${this._timeout}ms.`
|
|
1709
|
+
);
|
|
966
1710
|
}
|
|
967
|
-
throw new
|
|
1711
|
+
throw new ConnectionError(
|
|
1712
|
+
`Could not reach KugelAudio at ${url}: ${error.message}. Check network connectivity.`
|
|
1713
|
+
);
|
|
968
1714
|
}
|
|
969
1715
|
}
|
|
970
1716
|
};
|
|
@@ -972,12 +1718,18 @@ var KugelAudio = class _KugelAudio {
|
|
|
972
1718
|
0 && (module.exports = {
|
|
973
1719
|
AuthenticationError,
|
|
974
1720
|
ConnectionError,
|
|
1721
|
+
ErrorCodes,
|
|
975
1722
|
InsufficientCreditsError,
|
|
976
1723
|
KugelAudio,
|
|
977
1724
|
KugelAudioError,
|
|
978
1725
|
RateLimitError,
|
|
979
1726
|
ValidationError,
|
|
1727
|
+
WsCloseCodes,
|
|
980
1728
|
base64ToArrayBuffer,
|
|
1729
|
+
classifyHttpError,
|
|
1730
|
+
classifyWsClose,
|
|
1731
|
+
classifyWsFrame,
|
|
1732
|
+
classifyWsHandshakeError,
|
|
981
1733
|
createWavBlob,
|
|
982
1734
|
createWavFile,
|
|
983
1735
|
decodePCM16
|