kugelaudio 0.2.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/README.md +35 -14
- package/dist/index.d.mts +518 -26
- package/dist/index.d.ts +518 -26
- package/dist/index.js +864 -111
- package/dist/index.mjs +858 -111
- package/package.json +8 -7
- package/src/client.test.ts +548 -0
- package/src/client.ts +885 -103
- package/src/errors.ts +266 -18
- package/src/index.ts +17 -2
- package/src/types.ts +215 -9
- package/src/websocket.ts +38 -18
package/dist/index.js
CHANGED
|
@@ -22,12 +22,18 @@ var index_exports = {};
|
|
|
22
22
|
__export(index_exports, {
|
|
23
23
|
AuthenticationError: () => AuthenticationError,
|
|
24
24
|
ConnectionError: () => ConnectionError,
|
|
25
|
+
ErrorCodes: () => ErrorCodes,
|
|
25
26
|
InsufficientCreditsError: () => InsufficientCreditsError,
|
|
26
27
|
KugelAudio: () => KugelAudio,
|
|
27
28
|
KugelAudioError: () => KugelAudioError,
|
|
28
29
|
RateLimitError: () => RateLimitError,
|
|
29
30
|
ValidationError: () => ValidationError,
|
|
31
|
+
WsCloseCodes: () => WsCloseCodes,
|
|
30
32
|
base64ToArrayBuffer: () => base64ToArrayBuffer,
|
|
33
|
+
classifyHttpError: () => classifyHttpError,
|
|
34
|
+
classifyWsClose: () => classifyWsClose,
|
|
35
|
+
classifyWsFrame: () => classifyWsFrame,
|
|
36
|
+
classifyWsHandshakeError: () => classifyWsHandshakeError,
|
|
31
37
|
createWavBlob: () => createWavBlob,
|
|
32
38
|
createWavFile: () => createWavFile,
|
|
33
39
|
decodePCM16: () => decodePCM16
|
|
@@ -35,49 +41,192 @@ __export(index_exports, {
|
|
|
35
41
|
module.exports = __toCommonJS(index_exports);
|
|
36
42
|
|
|
37
43
|
// src/errors.ts
|
|
44
|
+
var ErrorCodes = {
|
|
45
|
+
UNAUTHORIZED: "UNAUTHORIZED",
|
|
46
|
+
RATE_LIMITED: "RATE_LIMITED",
|
|
47
|
+
INSUFFICIENT_CREDITS: "INSUFFICIENT_CREDITS",
|
|
48
|
+
MODEL_UNAVAILABLE: "MODEL_UNAVAILABLE",
|
|
49
|
+
EMPTY_AUDIO: "EMPTY_AUDIO",
|
|
50
|
+
VALIDATION: "VALIDATION_ERROR",
|
|
51
|
+
INTERNAL: "INTERNAL_ERROR",
|
|
52
|
+
NOT_FOUND: "NOT_FOUND"
|
|
53
|
+
};
|
|
54
|
+
var WsCloseCodes = {
|
|
55
|
+
UNAUTHORIZED: 4001,
|
|
56
|
+
INSUFFICIENT_CREDITS: 4003,
|
|
57
|
+
RATE_LIMITED: 4029,
|
|
58
|
+
MODEL_UNAVAILABLE: 4500
|
|
59
|
+
};
|
|
60
|
+
var API_KEYS_URL = "https://app.kugelaudio.com/settings/api-keys";
|
|
61
|
+
var BILLING_URL = "https://app.kugelaudio.com/billing";
|
|
38
62
|
var KugelAudioError = class _KugelAudioError extends Error {
|
|
39
|
-
constructor(message,
|
|
40
|
-
super(message);
|
|
63
|
+
constructor(message, options = {}) {
|
|
64
|
+
super(options.requestId ? `${message} (request_id: ${options.requestId})` : message);
|
|
41
65
|
this.name = "KugelAudioError";
|
|
42
|
-
this.statusCode = statusCode;
|
|
66
|
+
this.statusCode = options.statusCode;
|
|
67
|
+
this.errorCode = options.errorCode;
|
|
68
|
+
this.requestId = options.requestId;
|
|
69
|
+
this.retryAfter = options.retryAfter;
|
|
43
70
|
Object.setPrototypeOf(this, _KugelAudioError.prototype);
|
|
44
71
|
}
|
|
45
72
|
};
|
|
46
73
|
var AuthenticationError = class _AuthenticationError extends KugelAudioError {
|
|
47
|
-
constructor(message =
|
|
48
|
-
super(
|
|
74
|
+
constructor(message, options = {}) {
|
|
75
|
+
super(
|
|
76
|
+
message ?? `KugelAudio rejected the API key. Check it is current at ${API_KEYS_URL}.`,
|
|
77
|
+
{ statusCode: 401, errorCode: ErrorCodes.UNAUTHORIZED, ...options }
|
|
78
|
+
);
|
|
49
79
|
this.name = "AuthenticationError";
|
|
50
80
|
Object.setPrototypeOf(this, _AuthenticationError.prototype);
|
|
51
81
|
}
|
|
52
82
|
};
|
|
53
83
|
var RateLimitError = class _RateLimitError extends KugelAudioError {
|
|
54
|
-
constructor(message =
|
|
55
|
-
|
|
84
|
+
constructor(message, options = {}) {
|
|
85
|
+
const msg = message ?? (options.retryAfter ? `KugelAudio rate limit hit; retry after ${options.retryAfter}s.` : "KugelAudio rate limit hit; retry shortly.");
|
|
86
|
+
super(msg, { statusCode: 429, errorCode: ErrorCodes.RATE_LIMITED, ...options });
|
|
56
87
|
this.name = "RateLimitError";
|
|
57
88
|
Object.setPrototypeOf(this, _RateLimitError.prototype);
|
|
58
89
|
}
|
|
59
90
|
};
|
|
60
91
|
var InsufficientCreditsError = class _InsufficientCreditsError extends KugelAudioError {
|
|
61
|
-
constructor(message =
|
|
62
|
-
super(
|
|
92
|
+
constructor(message, options = {}) {
|
|
93
|
+
super(
|
|
94
|
+
message ?? `Your KugelAudio account is out of credits. Top up at ${BILLING_URL}.`,
|
|
95
|
+
{ statusCode: 402, errorCode: ErrorCodes.INSUFFICIENT_CREDITS, ...options }
|
|
96
|
+
);
|
|
63
97
|
this.name = "InsufficientCreditsError";
|
|
64
98
|
Object.setPrototypeOf(this, _InsufficientCreditsError.prototype);
|
|
65
99
|
}
|
|
66
100
|
};
|
|
67
101
|
var ValidationError = class _ValidationError extends KugelAudioError {
|
|
68
|
-
constructor(message) {
|
|
69
|
-
super(message, 400);
|
|
102
|
+
constructor(message, options = {}) {
|
|
103
|
+
super(message, { statusCode: 400, errorCode: ErrorCodes.VALIDATION, ...options });
|
|
70
104
|
this.name = "ValidationError";
|
|
71
105
|
Object.setPrototypeOf(this, _ValidationError.prototype);
|
|
72
106
|
}
|
|
73
107
|
};
|
|
74
108
|
var ConnectionError = class _ConnectionError extends KugelAudioError {
|
|
75
|
-
constructor(message =
|
|
76
|
-
super(message, 503);
|
|
109
|
+
constructor(message, options = {}) {
|
|
110
|
+
super(message, { statusCode: 503, ...options });
|
|
77
111
|
this.name = "ConnectionError";
|
|
78
112
|
Object.setPrototypeOf(this, _ConnectionError.prototype);
|
|
79
113
|
}
|
|
80
114
|
};
|
|
115
|
+
function build(status, errorCode, message, opts = {}) {
|
|
116
|
+
const common = { ...opts };
|
|
117
|
+
if (status !== void 0) common.statusCode = status;
|
|
118
|
+
if (errorCode !== void 0) common.errorCode = errorCode;
|
|
119
|
+
if (errorCode === ErrorCodes.UNAUTHORIZED || status === 401) {
|
|
120
|
+
return new AuthenticationError(message || void 0, common);
|
|
121
|
+
}
|
|
122
|
+
if (errorCode === ErrorCodes.INSUFFICIENT_CREDITS || status === 402) {
|
|
123
|
+
return new InsufficientCreditsError(message || void 0, common);
|
|
124
|
+
}
|
|
125
|
+
if (errorCode === ErrorCodes.RATE_LIMITED || status === 429) {
|
|
126
|
+
return new RateLimitError(message || void 0, common);
|
|
127
|
+
}
|
|
128
|
+
if (errorCode === ErrorCodes.VALIDATION || status === 400) {
|
|
129
|
+
return new ValidationError(message || "Request validation failed.", common);
|
|
130
|
+
}
|
|
131
|
+
if (errorCode === ErrorCodes.MODEL_UNAVAILABLE || status === 503) {
|
|
132
|
+
const detail = message || "service temporarily unavailable";
|
|
133
|
+
return new ConnectionError(
|
|
134
|
+
`KugelAudio is temporarily unavailable: ${detail}. Retry shortly.`,
|
|
135
|
+
common
|
|
136
|
+
);
|
|
137
|
+
}
|
|
138
|
+
return new KugelAudioError(message || `HTTP ${status}`, common);
|
|
139
|
+
}
|
|
140
|
+
function readHeader(headers, name) {
|
|
141
|
+
if (headers && typeof headers.get === "function") {
|
|
142
|
+
return headers.get(name) ?? void 0;
|
|
143
|
+
}
|
|
144
|
+
const rec = headers;
|
|
145
|
+
return rec[name] ?? rec[name.toLowerCase()] ?? void 0;
|
|
146
|
+
}
|
|
147
|
+
function classifyHttpError(status, bodyText, headers) {
|
|
148
|
+
let errorCode;
|
|
149
|
+
let message = "";
|
|
150
|
+
let retryAfter;
|
|
151
|
+
if (bodyText) {
|
|
152
|
+
try {
|
|
153
|
+
const body = JSON.parse(bodyText);
|
|
154
|
+
if (body && typeof body === "object") {
|
|
155
|
+
errorCode = typeof body.error_code === "string" ? body.error_code : void 0;
|
|
156
|
+
const msg = body.error ?? body.detail;
|
|
157
|
+
if (Array.isArray(msg)) {
|
|
158
|
+
message = msg.map((m) => String(m)).join("; ");
|
|
159
|
+
} else if (typeof msg === "string") {
|
|
160
|
+
message = msg;
|
|
161
|
+
}
|
|
162
|
+
if (typeof body.retry_after === "number") {
|
|
163
|
+
retryAfter = body.retry_after;
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
} catch {
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
if (retryAfter === void 0) {
|
|
170
|
+
const header = readHeader(headers, "Retry-After") ?? readHeader(headers, "retry-after");
|
|
171
|
+
if (header) {
|
|
172
|
+
const n = Number(header);
|
|
173
|
+
if (Number.isFinite(n)) retryAfter = n;
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
const requestId = readHeader(headers, "x-request-id") ?? readHeader(headers, "X-Request-Id");
|
|
177
|
+
if (!message) {
|
|
178
|
+
message = (bodyText || "").trim();
|
|
179
|
+
}
|
|
180
|
+
return build(status, errorCode, message, { requestId, retryAfter });
|
|
181
|
+
}
|
|
182
|
+
function classifyWsFrame(data) {
|
|
183
|
+
const errorCode = data.error_code;
|
|
184
|
+
const message = data.error ?? "Server reported an error.";
|
|
185
|
+
const retryAfter = typeof data.retry_after === "number" ? data.retry_after : void 0;
|
|
186
|
+
return build(void 0, errorCode, message, { retryAfter });
|
|
187
|
+
}
|
|
188
|
+
function classifyWsClose(code, reason) {
|
|
189
|
+
const reasonTxt = (reason ?? "").trim();
|
|
190
|
+
if (code === WsCloseCodes.UNAUTHORIZED) {
|
|
191
|
+
let msg = `KugelAudio rejected the API key. Check it is current at ${API_KEYS_URL}.`;
|
|
192
|
+
if (reasonTxt) msg = `${msg} (${reasonTxt})`;
|
|
193
|
+
return new AuthenticationError(msg);
|
|
194
|
+
}
|
|
195
|
+
if (code === WsCloseCodes.INSUFFICIENT_CREDITS) {
|
|
196
|
+
return new InsufficientCreditsError();
|
|
197
|
+
}
|
|
198
|
+
if (code === WsCloseCodes.RATE_LIMITED) {
|
|
199
|
+
return new RateLimitError();
|
|
200
|
+
}
|
|
201
|
+
if (code === WsCloseCodes.MODEL_UNAVAILABLE) {
|
|
202
|
+
const suffix = reasonTxt ? ` (${reasonTxt})` : "";
|
|
203
|
+
return new ConnectionError(
|
|
204
|
+
`KugelAudio model is temporarily unavailable. Retry shortly.${suffix}`
|
|
205
|
+
);
|
|
206
|
+
}
|
|
207
|
+
const detail = reasonTxt || "no reason given";
|
|
208
|
+
const codeStr = code !== void 0 ? ` (code ${code})` : "";
|
|
209
|
+
return new ConnectionError(
|
|
210
|
+
`KugelAudio WebSocket closed by server: ${detail}${codeStr}.`
|
|
211
|
+
);
|
|
212
|
+
}
|
|
213
|
+
function classifyWsHandshakeError(err) {
|
|
214
|
+
if (!err || typeof err !== "object") return null;
|
|
215
|
+
const e = err;
|
|
216
|
+
let status;
|
|
217
|
+
if (typeof e.statusCode === "number") {
|
|
218
|
+
status = e.statusCode;
|
|
219
|
+
}
|
|
220
|
+
if (status === void 0 && typeof e.message === "string") {
|
|
221
|
+
const m = e.message.match(/Unexpected server response:\s*(\d{3})/i);
|
|
222
|
+
if (m) status = Number(m[1]);
|
|
223
|
+
}
|
|
224
|
+
if (status === void 0) return null;
|
|
225
|
+
if (status === 403) {
|
|
226
|
+
return new AuthenticationError();
|
|
227
|
+
}
|
|
228
|
+
return build(status, void 0, typeof e.message === "string" ? e.message : "");
|
|
229
|
+
}
|
|
81
230
|
|
|
82
231
|
// src/utils.ts
|
|
83
232
|
function base64ToArrayBuffer(base64) {
|
|
@@ -137,21 +286,26 @@ function createWavBlob(audio, sampleRate) {
|
|
|
137
286
|
|
|
138
287
|
// src/websocket.ts
|
|
139
288
|
var _cachedWs = null;
|
|
289
|
+
function isNodeJs() {
|
|
290
|
+
return typeof process !== "undefined" && !!process.versions && typeof process.versions.node === "string";
|
|
291
|
+
}
|
|
140
292
|
function getWebSocket() {
|
|
141
293
|
if (_cachedWs) return _cachedWs;
|
|
294
|
+
if (isNodeJs()) {
|
|
295
|
+
try {
|
|
296
|
+
const _require = typeof require !== "undefined" ? require : Function('return typeof require !== "undefined" ? require : undefined')();
|
|
297
|
+
if (_require) {
|
|
298
|
+
const ws = _require("ws");
|
|
299
|
+
_cachedWs = ws.default || ws;
|
|
300
|
+
return _cachedWs;
|
|
301
|
+
}
|
|
302
|
+
} catch {
|
|
303
|
+
}
|
|
304
|
+
}
|
|
142
305
|
if (typeof globalThis !== "undefined" && typeof globalThis.WebSocket !== "undefined") {
|
|
143
306
|
_cachedWs = globalThis.WebSocket;
|
|
144
307
|
return _cachedWs;
|
|
145
308
|
}
|
|
146
|
-
try {
|
|
147
|
-
const _require = typeof require !== "undefined" ? require : Function('return typeof require !== "undefined" ? require : undefined')();
|
|
148
|
-
if (_require) {
|
|
149
|
-
const ws = _require("ws");
|
|
150
|
-
_cachedWs = ws.default || ws;
|
|
151
|
-
return _cachedWs;
|
|
152
|
-
}
|
|
153
|
-
} catch {
|
|
154
|
-
}
|
|
155
309
|
throw new Error(
|
|
156
310
|
'WebSocket not available. In Node.js, install the "ws" package: npm install ws'
|
|
157
311
|
);
|
|
@@ -159,11 +313,32 @@ function getWebSocket() {
|
|
|
159
313
|
|
|
160
314
|
// src/client.ts
|
|
161
315
|
var DEFAULT_API_URL = "https://api.kugelaudio.com";
|
|
316
|
+
var EU_API_URL = "https://api.eu.kugelaudio.com";
|
|
317
|
+
var SUPPORTED_REGIONS = ["eu", "us", "global"];
|
|
318
|
+
var REGION_PREFIXES = ["eu-", "us-", "global-"];
|
|
319
|
+
function parseApiKey(apiKey) {
|
|
320
|
+
for (const prefix of REGION_PREFIXES) {
|
|
321
|
+
if (apiKey.startsWith(prefix)) {
|
|
322
|
+
return { cleanKey: apiKey.slice(prefix.length), detectedRegion: prefix.slice(0, -1) };
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
return { cleanKey: apiKey };
|
|
326
|
+
}
|
|
162
327
|
function createWs(url) {
|
|
163
328
|
const WS = getWebSocket();
|
|
164
329
|
return new WS(url);
|
|
165
330
|
}
|
|
166
331
|
var WS_OPEN = 1;
|
|
332
|
+
var _languageWarningLogged = false;
|
|
333
|
+
function warnIfNoLanguage(language, normalize) {
|
|
334
|
+
const normEnabled = normalize === void 0 || normalize;
|
|
335
|
+
if (!language && normEnabled && !_languageWarningLogged) {
|
|
336
|
+
_languageWarningLogged = true;
|
|
337
|
+
console.warn(
|
|
338
|
+
"[KugelAudio] No 'language' set with normalization enabled \u2014 the server will auto-detect the language, adding ~60-150ms to TTFA. Set language (e.g., language: 'en') for optimal latency."
|
|
339
|
+
);
|
|
340
|
+
}
|
|
341
|
+
}
|
|
167
342
|
var ModelsResource = class {
|
|
168
343
|
constructor(client) {
|
|
169
344
|
this.client = client;
|
|
@@ -197,42 +372,177 @@ var VoicesResource = class {
|
|
|
197
372
|
params.set("include_public", String(options.includePublic));
|
|
198
373
|
}
|
|
199
374
|
if (options?.limit) params.set("limit", String(options.limit));
|
|
375
|
+
if (options?.offset) params.set("offset", String(options.offset));
|
|
200
376
|
const query = params.toString();
|
|
201
377
|
const path = query ? `/v1/voices?${query}` : "/v1/voices";
|
|
202
378
|
const response = await this.client.request("GET", path);
|
|
203
|
-
return
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
379
|
+
return {
|
|
380
|
+
voices: response.voices.map((v) => ({
|
|
381
|
+
id: v.id,
|
|
382
|
+
name: v.name,
|
|
383
|
+
description: v.description,
|
|
384
|
+
category: v.category,
|
|
385
|
+
sex: v.sex,
|
|
386
|
+
age: v.age,
|
|
387
|
+
supportedLanguages: v.supported_languages || [],
|
|
388
|
+
sampleText: v.sample_text,
|
|
389
|
+
avatarUrl: v.avatar_url,
|
|
390
|
+
sampleUrl: v.sample_url,
|
|
391
|
+
isPublic: v.is_public || false,
|
|
392
|
+
verified: v.verified || false
|
|
393
|
+
})),
|
|
394
|
+
total: response.total,
|
|
395
|
+
limit: response.limit,
|
|
396
|
+
offset: response.offset
|
|
397
|
+
};
|
|
217
398
|
}
|
|
218
399
|
/**
|
|
219
400
|
* Get a specific voice by ID.
|
|
220
401
|
*/
|
|
221
402
|
async get(voiceId) {
|
|
222
403
|
const v = await this.client.request("GET", `/v1/voices/${voiceId}`);
|
|
404
|
+
return this.mapVoiceDetail(v);
|
|
405
|
+
}
|
|
406
|
+
/**
|
|
407
|
+
* Create a new voice.
|
|
408
|
+
*/
|
|
409
|
+
async create(options) {
|
|
410
|
+
const metadata = {
|
|
411
|
+
name: options.name,
|
|
412
|
+
sex: options.sex,
|
|
413
|
+
description: options.description ?? "",
|
|
414
|
+
category: options.category ?? "conversational",
|
|
415
|
+
age: options.age ?? "middle_age",
|
|
416
|
+
quality: options.quality ?? "mid",
|
|
417
|
+
supported_languages: options.supportedLanguages ?? ["en"],
|
|
418
|
+
is_public: options.isPublic ?? false,
|
|
419
|
+
sample_text: options.sampleText ?? ""
|
|
420
|
+
};
|
|
421
|
+
const formData = new FormData();
|
|
422
|
+
formData.append(
|
|
423
|
+
"metadata",
|
|
424
|
+
new Blob([JSON.stringify(metadata)], { type: "application/json" })
|
|
425
|
+
);
|
|
426
|
+
if (options.referenceFiles) {
|
|
427
|
+
for (const file of options.referenceFiles) {
|
|
428
|
+
formData.append("files", file);
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
const v = await this.client.requestMultipart("POST", "/v1/voices", formData);
|
|
432
|
+
return this.mapVoiceDetail(v);
|
|
433
|
+
}
|
|
434
|
+
/**
|
|
435
|
+
* Update an existing voice. Only provided fields are updated.
|
|
436
|
+
*/
|
|
437
|
+
async update(voiceId, options) {
|
|
438
|
+
const payload = {};
|
|
439
|
+
if (options.name !== void 0) payload.name = options.name;
|
|
440
|
+
if (options.description !== void 0) payload.description = options.description;
|
|
441
|
+
if (options.category !== void 0) payload.category = options.category;
|
|
442
|
+
if (options.age !== void 0) payload.age = options.age;
|
|
443
|
+
if (options.sex !== void 0) payload.sex = options.sex;
|
|
444
|
+
if (options.quality !== void 0) payload.quality = options.quality;
|
|
445
|
+
if (options.supportedLanguages !== void 0) payload.supported_languages = options.supportedLanguages;
|
|
446
|
+
if (options.isPublic !== void 0) payload.is_public = options.isPublic;
|
|
447
|
+
if (options.sampleText !== void 0) payload.sample_text = options.sampleText;
|
|
448
|
+
const v = await this.client.request("PATCH", `/v1/voices/${voiceId}`, payload);
|
|
449
|
+
return this.mapVoiceDetail(v);
|
|
450
|
+
}
|
|
451
|
+
/**
|
|
452
|
+
* Delete a voice.
|
|
453
|
+
*/
|
|
454
|
+
async delete(voiceId) {
|
|
455
|
+
await this.client.request("DELETE", `/v1/voices/${voiceId}`);
|
|
456
|
+
}
|
|
457
|
+
// -- Reference management --
|
|
458
|
+
/**
|
|
459
|
+
* List reference audio files for a voice.
|
|
460
|
+
*/
|
|
461
|
+
async listReferences(voiceId) {
|
|
462
|
+
const response = await this.client.request(
|
|
463
|
+
"GET",
|
|
464
|
+
`/v1/voices/${voiceId}/references`
|
|
465
|
+
);
|
|
466
|
+
return response.references.map((r) => this.mapVoiceReference(r));
|
|
467
|
+
}
|
|
468
|
+
/**
|
|
469
|
+
* Upload a reference audio file to a voice.
|
|
470
|
+
*
|
|
471
|
+
* @param voiceId - Voice ID
|
|
472
|
+
* @param file - Audio file (File in browser, Blob in Node.js)
|
|
473
|
+
* @param referenceText - Optional transcript of the reference audio
|
|
474
|
+
*/
|
|
475
|
+
async addReference(voiceId, file, referenceText) {
|
|
476
|
+
const formData = new FormData();
|
|
477
|
+
formData.append("file", file);
|
|
478
|
+
if (referenceText) {
|
|
479
|
+
formData.append("reference_text", referenceText);
|
|
480
|
+
}
|
|
481
|
+
const r = await this.client.requestMultipart(
|
|
482
|
+
"POST",
|
|
483
|
+
`/v1/voices/${voiceId}/references`,
|
|
484
|
+
formData
|
|
485
|
+
);
|
|
486
|
+
return this.mapVoiceReference(r);
|
|
487
|
+
}
|
|
488
|
+
/**
|
|
489
|
+
* Delete a reference audio file from a voice.
|
|
490
|
+
*/
|
|
491
|
+
async deleteReference(voiceId, referenceId) {
|
|
492
|
+
await this.client.request(
|
|
493
|
+
"DELETE",
|
|
494
|
+
`/v1/voices/${voiceId}/references/${referenceId}`
|
|
495
|
+
);
|
|
496
|
+
}
|
|
497
|
+
// -- Publishing --
|
|
498
|
+
/**
|
|
499
|
+
* Request publication of a voice. Sets it as public and marks it
|
|
500
|
+
* as pending verification by an admin.
|
|
501
|
+
*/
|
|
502
|
+
async publish(voiceId) {
|
|
503
|
+
const v = await this.client.request("POST", `/v1/voices/${voiceId}/publish`);
|
|
504
|
+
return this.mapVoiceDetail(v);
|
|
505
|
+
}
|
|
506
|
+
// -- Sample generation --
|
|
507
|
+
/**
|
|
508
|
+
* Trigger sample audio generation for a voice.
|
|
509
|
+
*/
|
|
510
|
+
async generateSample(voiceId) {
|
|
511
|
+
const v = await this.client.request(
|
|
512
|
+
"POST",
|
|
513
|
+
`/v1/voices/${voiceId}/generate-sample`
|
|
514
|
+
);
|
|
515
|
+
return this.mapVoiceDetail(v);
|
|
516
|
+
}
|
|
517
|
+
// -- Helpers --
|
|
518
|
+
mapVoiceDetail(v) {
|
|
223
519
|
return {
|
|
224
520
|
id: v.id,
|
|
225
521
|
name: v.name,
|
|
226
|
-
description: v.description,
|
|
227
|
-
|
|
228
|
-
|
|
522
|
+
description: v.description ?? "",
|
|
523
|
+
generativeVoiceDescription: v.generative_voice_description ?? "",
|
|
524
|
+
supportedLanguages: v.supported_languages ?? [],
|
|
525
|
+
category: v.category ?? "cloned",
|
|
229
526
|
age: v.age,
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
527
|
+
sex: v.sex,
|
|
528
|
+
quality: v.quality ?? "mid",
|
|
529
|
+
isPublic: v.is_public ?? false,
|
|
530
|
+
verified: v.verified ?? false,
|
|
531
|
+
pendingVerification: v.pending_verification ?? false,
|
|
233
532
|
sampleUrl: v.sample_url,
|
|
234
|
-
|
|
235
|
-
|
|
533
|
+
avatarUrl: v.avatar_url,
|
|
534
|
+
sampleText: v.sample_text ?? ""
|
|
535
|
+
};
|
|
536
|
+
}
|
|
537
|
+
mapVoiceReference(r) {
|
|
538
|
+
return {
|
|
539
|
+
id: r.id,
|
|
540
|
+
voiceId: r.voice_id,
|
|
541
|
+
name: r.name ?? "",
|
|
542
|
+
referenceText: r.reference_text ?? "",
|
|
543
|
+
s3Path: r.s3_path ?? "",
|
|
544
|
+
audioUrl: r.audio_url,
|
|
545
|
+
isGenerated: r.is_generated ?? false
|
|
236
546
|
};
|
|
237
547
|
}
|
|
238
548
|
};
|
|
@@ -244,6 +554,7 @@ var TTSResource = class {
|
|
|
244
554
|
this.wsUrl = null;
|
|
245
555
|
this.pendingRequests = /* @__PURE__ */ new Map();
|
|
246
556
|
this.requestCounter = 0;
|
|
557
|
+
this.keepaliveTimer = null;
|
|
247
558
|
}
|
|
248
559
|
/**
|
|
249
560
|
* Pre-establish WebSocket connection for faster first request.
|
|
@@ -307,6 +618,63 @@ var TTSResource = class {
|
|
|
307
618
|
wordTimestamps: allTimestamps
|
|
308
619
|
};
|
|
309
620
|
}
|
|
621
|
+
/**
|
|
622
|
+
* Stream audio and return a Node.js Readable stream of raw PCM16 binary data.
|
|
623
|
+
*
|
|
624
|
+
* **Node.js only** — this method requires the `stream` built-in module and is
|
|
625
|
+
* intended for server-side integrations such as Vapi custom TTS endpoints,
|
|
626
|
+
* Express/Fastify handlers, or any pipeline that expects a Node.js `Readable`.
|
|
627
|
+
*
|
|
628
|
+
* Compared to manually wiring `onChunk` to a `Readable`, this method avoids
|
|
629
|
+
* a common race-condition: the stream object is created and returned **before**
|
|
630
|
+
* any chunks arrive, so the caller can safely pipe or attach listeners before
|
|
631
|
+
* the first audio byte is pushed.
|
|
632
|
+
*
|
|
633
|
+
* @example Vapi custom TTS endpoint
|
|
634
|
+
* ```typescript
|
|
635
|
+
* app.post('/synthesize', (req, res) => {
|
|
636
|
+
* res.setHeader('Content-Type', 'audio/pcm');
|
|
637
|
+
* res.setHeader('Transfer-Encoding', 'chunked');
|
|
638
|
+
*
|
|
639
|
+
* const readable = client.tts.toReadable({
|
|
640
|
+
* text: req.body.message.text,
|
|
641
|
+
* modelId: 'kugel-1-turbo',
|
|
642
|
+
* sampleRate: req.body.message.sampleRate,
|
|
643
|
+
* language: 'en',
|
|
644
|
+
* });
|
|
645
|
+
*
|
|
646
|
+
* readable.pipe(res);
|
|
647
|
+
* });
|
|
648
|
+
* ```
|
|
649
|
+
*
|
|
650
|
+
* @param options - TTS generation options (same as `stream()`)
|
|
651
|
+
* @param reuseConnection - Reuse the pooled WebSocket connection (default: true)
|
|
652
|
+
* @returns Node.js Readable stream emitting raw PCM16 binary Buffer chunks
|
|
653
|
+
*/
|
|
654
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
655
|
+
toReadable(options, reuseConnection = true) {
|
|
656
|
+
const { Readable } = require("stream");
|
|
657
|
+
const readable = new Readable({ read() {
|
|
658
|
+
} });
|
|
659
|
+
this.stream(
|
|
660
|
+
options,
|
|
661
|
+
{
|
|
662
|
+
onChunk: (chunk) => {
|
|
663
|
+
readable.push(Buffer.from(chunk.audio, "base64"));
|
|
664
|
+
},
|
|
665
|
+
onFinal: () => {
|
|
666
|
+
readable.push(null);
|
|
667
|
+
},
|
|
668
|
+
onError: (error) => {
|
|
669
|
+
readable.destroy(error);
|
|
670
|
+
}
|
|
671
|
+
},
|
|
672
|
+
reuseConnection
|
|
673
|
+
).catch((error) => {
|
|
674
|
+
readable.destroy(error);
|
|
675
|
+
});
|
|
676
|
+
return readable;
|
|
677
|
+
}
|
|
310
678
|
/**
|
|
311
679
|
* Build the WebSocket URL with appropriate auth param.
|
|
312
680
|
*/
|
|
@@ -348,10 +716,17 @@ var TTSResource = class {
|
|
|
348
716
|
this.wsConnection = ws;
|
|
349
717
|
this.wsUrl = url;
|
|
350
718
|
this.setupMessageHandler(ws);
|
|
719
|
+
this.startKeepalive(ws);
|
|
351
720
|
resolve(ws);
|
|
352
721
|
};
|
|
353
|
-
ws.onerror = () => {
|
|
354
|
-
|
|
722
|
+
ws.onerror = (event) => {
|
|
723
|
+
const underlying = event?.error ?? event;
|
|
724
|
+
const typed = classifyWsHandshakeError(underlying);
|
|
725
|
+
reject(
|
|
726
|
+
typed ?? new ConnectionError(
|
|
727
|
+
`Could not establish KugelAudio WebSocket connection to ${url}. Check network connectivity.`
|
|
728
|
+
)
|
|
729
|
+
);
|
|
355
730
|
};
|
|
356
731
|
});
|
|
357
732
|
}
|
|
@@ -366,7 +741,7 @@ var TTSResource = class {
|
|
|
366
741
|
const [requestId, pending] = [...this.pendingRequests.entries()][0] || [];
|
|
367
742
|
if (!pending) return;
|
|
368
743
|
if (data.error) {
|
|
369
|
-
const error = this.parseError(data
|
|
744
|
+
const error = this.parseError(data);
|
|
370
745
|
pending.callbacks.onError?.(error);
|
|
371
746
|
this.pendingRequests.delete(requestId);
|
|
372
747
|
pending.reject(error);
|
|
@@ -379,7 +754,6 @@ var TTSResource = class {
|
|
|
379
754
|
totalSamples: data.total_samples,
|
|
380
755
|
durationMs: data.dur_ms,
|
|
381
756
|
generationMs: data.gen_ms,
|
|
382
|
-
ttfaMs: data.ttfa_ms,
|
|
383
757
|
rtf: data.rtf,
|
|
384
758
|
error: data.error
|
|
385
759
|
};
|
|
@@ -416,20 +790,23 @@ var TTSResource = class {
|
|
|
416
790
|
}
|
|
417
791
|
};
|
|
418
792
|
ws.onclose = (event) => {
|
|
793
|
+
this.stopKeepalive();
|
|
419
794
|
this.wsConnection = null;
|
|
420
795
|
this.wsUrl = null;
|
|
421
796
|
for (const [id, pending] of this.pendingRequests) {
|
|
422
797
|
pending.callbacks.onClose?.();
|
|
423
|
-
if (event.code === 4001) {
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
pending.reject(
|
|
798
|
+
if (event.code === 4001 || event.code === 4003 || event.code === 4029 || event.code === 4500) {
|
|
799
|
+
const error = classifyWsClose(event.code, event.reason);
|
|
800
|
+
pending.callbacks.onError?.(error);
|
|
801
|
+
pending.reject(error);
|
|
427
802
|
}
|
|
428
803
|
this.pendingRequests.delete(id);
|
|
429
804
|
}
|
|
430
805
|
};
|
|
431
806
|
ws.onerror = () => {
|
|
432
|
-
const error = new
|
|
807
|
+
const error = new ConnectionError(
|
|
808
|
+
"KugelAudio WebSocket connection error. Check network connectivity."
|
|
809
|
+
);
|
|
433
810
|
for (const [id, pending] of this.pendingRequests) {
|
|
434
811
|
pending.callbacks.onError?.(error);
|
|
435
812
|
pending.reject(error);
|
|
@@ -455,6 +832,7 @@ var TTSResource = class {
|
|
|
455
832
|
* Stream with connection pooling (fast path).
|
|
456
833
|
*/
|
|
457
834
|
async streamWithPooling(options, callbacks) {
|
|
835
|
+
warnIfNoLanguage(options.language, options.normalize);
|
|
458
836
|
const ws = await this.getConnection();
|
|
459
837
|
const requestId = ++this.requestCounter;
|
|
460
838
|
return new Promise((resolve, reject) => {
|
|
@@ -465,11 +843,14 @@ var TTSResource = class {
|
|
|
465
843
|
model_id: options.modelId || "kugel-1-turbo",
|
|
466
844
|
voice_id: options.voiceId,
|
|
467
845
|
cfg_scale: options.cfgScale ?? 2,
|
|
846
|
+
...options.temperature !== void 0 && { temperature: options.temperature },
|
|
468
847
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
469
848
|
sample_rate: options.sampleRate ?? 24e3,
|
|
470
849
|
normalize: options.normalize ?? true,
|
|
471
850
|
...options.language && { language: options.language },
|
|
472
|
-
...options.wordTimestamps && { word_timestamps: true }
|
|
851
|
+
...options.wordTimestamps && { word_timestamps: true },
|
|
852
|
+
...options.speed !== void 0 && { speed: options.speed },
|
|
853
|
+
...options.projectId !== void 0 && { project_id: options.projectId }
|
|
473
854
|
}));
|
|
474
855
|
});
|
|
475
856
|
}
|
|
@@ -477,6 +858,7 @@ var TTSResource = class {
|
|
|
477
858
|
* Stream without connection pooling (original behavior).
|
|
478
859
|
*/
|
|
479
860
|
streamWithoutPooling(options, callbacks) {
|
|
861
|
+
warnIfNoLanguage(options.language, options.normalize);
|
|
480
862
|
return new Promise((resolve, reject) => {
|
|
481
863
|
const url = this.buildWsUrl();
|
|
482
864
|
const ws = createWs(url);
|
|
@@ -491,7 +873,9 @@ var TTSResource = class {
|
|
|
491
873
|
sample_rate: options.sampleRate ?? 24e3,
|
|
492
874
|
normalize: options.normalize ?? true,
|
|
493
875
|
...options.language && { language: options.language },
|
|
494
|
-
...options.wordTimestamps && { word_timestamps: true }
|
|
876
|
+
...options.wordTimestamps && { word_timestamps: true },
|
|
877
|
+
...options.speed !== void 0 && { speed: options.speed },
|
|
878
|
+
...options.projectId !== void 0 && { project_id: options.projectId }
|
|
495
879
|
}));
|
|
496
880
|
};
|
|
497
881
|
ws.onmessage = (event) => {
|
|
@@ -499,7 +883,7 @@ var TTSResource = class {
|
|
|
499
883
|
const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
500
884
|
const data = JSON.parse(messageData);
|
|
501
885
|
if (data.error) {
|
|
502
|
-
const error = this.parseError(data
|
|
886
|
+
const error = this.parseError(data);
|
|
503
887
|
callbacks.onError?.(error);
|
|
504
888
|
ws.close();
|
|
505
889
|
reject(error);
|
|
@@ -512,7 +896,6 @@ var TTSResource = class {
|
|
|
512
896
|
totalSamples: data.total_samples,
|
|
513
897
|
durationMs: data.dur_ms,
|
|
514
898
|
generationMs: data.gen_ms,
|
|
515
|
-
ttfaMs: data.ttfa_ms,
|
|
516
899
|
rtf: data.rtf,
|
|
517
900
|
error: data.error
|
|
518
901
|
};
|
|
@@ -548,25 +931,54 @@ var TTSResource = class {
|
|
|
548
931
|
console.error("Failed to parse WebSocket message:", e);
|
|
549
932
|
}
|
|
550
933
|
};
|
|
551
|
-
ws.onerror = () => {
|
|
552
|
-
const
|
|
934
|
+
ws.onerror = (event) => {
|
|
935
|
+
const underlying = event?.error ?? event;
|
|
936
|
+
const error = classifyWsHandshakeError(underlying) ?? new ConnectionError(
|
|
937
|
+
"KugelAudio WebSocket connection error. Check network connectivity."
|
|
938
|
+
);
|
|
553
939
|
callbacks.onError?.(error);
|
|
554
940
|
reject(error);
|
|
555
941
|
};
|
|
556
942
|
ws.onclose = (event) => {
|
|
557
943
|
callbacks.onClose?.();
|
|
558
|
-
if (event.code === 4001) {
|
|
559
|
-
|
|
560
|
-
|
|
561
|
-
reject(
|
|
944
|
+
if (event.code === 4001 || event.code === 4003 || event.code === 4029 || event.code === 4500) {
|
|
945
|
+
const error = classifyWsClose(event.code, event.reason);
|
|
946
|
+
callbacks.onError?.(error);
|
|
947
|
+
reject(error);
|
|
562
948
|
}
|
|
563
949
|
};
|
|
564
950
|
});
|
|
565
951
|
}
|
|
952
|
+
/**
|
|
953
|
+
* Start periodic keepalive pings on the pooled connection.
|
|
954
|
+
* Uses the ws package's ping() in Node.js; silently skips in browsers
|
|
955
|
+
* where WebSocket doesn't expose a ping method.
|
|
956
|
+
*/
|
|
957
|
+
startKeepalive(ws) {
|
|
958
|
+
this.stopKeepalive();
|
|
959
|
+
const intervalMs = this.client.keepalivePingInterval;
|
|
960
|
+
if (intervalMs == null || intervalMs <= 0) return;
|
|
961
|
+
this.keepaliveTimer = setInterval(() => {
|
|
962
|
+
if (this.wsConnection !== ws || ws.readyState !== WS_OPEN) {
|
|
963
|
+
this.stopKeepalive();
|
|
964
|
+
return;
|
|
965
|
+
}
|
|
966
|
+
if (typeof ws.ping === "function") {
|
|
967
|
+
ws.ping();
|
|
968
|
+
}
|
|
969
|
+
}, intervalMs);
|
|
970
|
+
}
|
|
971
|
+
stopKeepalive() {
|
|
972
|
+
if (this.keepaliveTimer !== null) {
|
|
973
|
+
clearInterval(this.keepaliveTimer);
|
|
974
|
+
this.keepaliveTimer = null;
|
|
975
|
+
}
|
|
976
|
+
}
|
|
566
977
|
/**
|
|
567
978
|
* Close the pooled WebSocket connection.
|
|
568
979
|
*/
|
|
569
980
|
close() {
|
|
981
|
+
this.stopKeepalive();
|
|
570
982
|
if (this.wsConnection) {
|
|
571
983
|
try {
|
|
572
984
|
this.wsConnection.close();
|
|
@@ -576,15 +988,39 @@ var TTSResource = class {
|
|
|
576
988
|
this.wsUrl = null;
|
|
577
989
|
}
|
|
578
990
|
}
|
|
579
|
-
parseError(
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
991
|
+
parseError(data) {
|
|
992
|
+
return classifyWsFrame(data);
|
|
993
|
+
}
|
|
994
|
+
/**
|
|
995
|
+
* Create a streaming session for LLM integration.
|
|
996
|
+
*
|
|
997
|
+
* The session connects to `/ws/tts/stream` and keeps a persistent
|
|
998
|
+
* connection across multiple {@link StreamingSession.send} calls.
|
|
999
|
+
* The server auto-chunks text at sentence boundaries — no client-side
|
|
1000
|
+
* flushing required.
|
|
1001
|
+
*
|
|
1002
|
+
* @param config - Session configuration (voice, model, chunking strategy).
|
|
1003
|
+
* @param callbacks - Callbacks for audio chunks and session lifecycle events.
|
|
1004
|
+
* @returns A {@link StreamingSession} instance. Call `.connect()` before sending.
|
|
1005
|
+
*
|
|
1006
|
+
* @example
|
|
1007
|
+
* ```typescript
|
|
1008
|
+
* const session = client.tts.streamingSession(
|
|
1009
|
+
* { voiceId: 123, autoMode: true, chunkLengthSchedule: [50, 100, 150, 250] },
|
|
1010
|
+
* { onChunk: (chunk) => playAudio(chunk.audio) },
|
|
1011
|
+
* );
|
|
1012
|
+
*
|
|
1013
|
+
* session.connect();
|
|
1014
|
+
*
|
|
1015
|
+
* for await (const token of llmStream) {
|
|
1016
|
+
* session.send(token);
|
|
1017
|
+
* }
|
|
1018
|
+
*
|
|
1019
|
+
* await session.close();
|
|
1020
|
+
* ```
|
|
1021
|
+
*/
|
|
1022
|
+
streamingSession(config, callbacks) {
|
|
1023
|
+
return new StreamingSession(this.client, config, callbacks);
|
|
588
1024
|
}
|
|
589
1025
|
/**
|
|
590
1026
|
* Create a multi-context session for concurrent TTS streams.
|
|
@@ -604,7 +1040,7 @@ var TTSResource = class {
|
|
|
604
1040
|
* console.log(`Audio from ${chunk.contextId}`);
|
|
605
1041
|
* playAudio(chunk.audio);
|
|
606
1042
|
* },
|
|
607
|
-
*
|
|
1043
|
+
* onContextClosed: (contextId) => {
|
|
608
1044
|
* console.log(`${contextId} finished`);
|
|
609
1045
|
* },
|
|
610
1046
|
* });
|
|
@@ -643,6 +1079,11 @@ var MultiContextSession = class {
|
|
|
643
1079
|
}
|
|
644
1080
|
/**
|
|
645
1081
|
* Connect to the multi-context WebSocket endpoint.
|
|
1082
|
+
*
|
|
1083
|
+
* The returned promise resolves once the WebSocket is OPEN so callers can
|
|
1084
|
+
* ``await session.connect(callbacks)`` before invoking
|
|
1085
|
+
* {@link createContext} / {@link send}. Pre-open errors reject with the
|
|
1086
|
+
* typed error.
|
|
646
1087
|
*/
|
|
647
1088
|
connect(callbacks) {
|
|
648
1089
|
this.callbacks = callbacks;
|
|
@@ -657,9 +1098,8 @@ var MultiContextSession = class {
|
|
|
657
1098
|
}
|
|
658
1099
|
const url = `${wsUrl}/ws/tts/multi?${authParam}=${this.client.apiKey}`;
|
|
659
1100
|
this.ws = createWs(url);
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
this.ws.onmessage = (event) => {
|
|
1101
|
+
const ws = this.ws;
|
|
1102
|
+
ws.onmessage = (event) => {
|
|
663
1103
|
try {
|
|
664
1104
|
const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
665
1105
|
const data = JSON.parse(messageData);
|
|
@@ -690,9 +1130,6 @@ var MultiContextSession = class {
|
|
|
690
1130
|
};
|
|
691
1131
|
this.callbacks.onChunk?.(chunk);
|
|
692
1132
|
}
|
|
693
|
-
if (data.is_final) {
|
|
694
|
-
this.callbacks.onContextFinal?.(data.context_id);
|
|
695
|
-
}
|
|
696
1133
|
if (data.context_closed) {
|
|
697
1134
|
this.contexts.delete(data.context_id);
|
|
698
1135
|
this.callbacks.onContextClosed?.(data.context_id);
|
|
@@ -708,19 +1145,38 @@ var MultiContextSession = class {
|
|
|
708
1145
|
console.error("Failed to parse WebSocket message:", e);
|
|
709
1146
|
}
|
|
710
1147
|
};
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
1148
|
+
return new Promise((resolve, reject) => {
|
|
1149
|
+
let opened = false;
|
|
1150
|
+
ws.onopen = () => {
|
|
1151
|
+
opened = true;
|
|
1152
|
+
resolve();
|
|
1153
|
+
};
|
|
1154
|
+
ws.onerror = (event) => {
|
|
1155
|
+
const underlying = event?.error ?? event;
|
|
1156
|
+
const err = classifyWsHandshakeError(underlying) ?? new ConnectionError(
|
|
1157
|
+
"KugelAudio multi-context WebSocket connection error. Check network connectivity."
|
|
1158
|
+
);
|
|
1159
|
+
if (!opened) reject(err);
|
|
1160
|
+
this.callbacks.onError?.(err);
|
|
1161
|
+
};
|
|
1162
|
+
ws.onclose = (event) => {
|
|
1163
|
+
let typedErr = null;
|
|
1164
|
+
if (event.code === 4001 || event.code === 4003 || event.code === 4029 || event.code === 4500) {
|
|
1165
|
+
typedErr = classifyWsClose(event.code, event.reason);
|
|
1166
|
+
this.callbacks.onError?.(typedErr);
|
|
1167
|
+
}
|
|
1168
|
+
if (!opened) {
|
|
1169
|
+
reject(
|
|
1170
|
+
typedErr ?? new ConnectionError(
|
|
1171
|
+
`KugelAudio multi-context WebSocket closed before ready (code ${event.code}).`
|
|
1172
|
+
)
|
|
1173
|
+
);
|
|
1174
|
+
}
|
|
1175
|
+
this.ws = null;
|
|
1176
|
+
this.isStarted = false;
|
|
1177
|
+
this.contexts.clear();
|
|
1178
|
+
};
|
|
1179
|
+
});
|
|
724
1180
|
}
|
|
725
1181
|
/**
|
|
726
1182
|
* Create a new context with optional voice settings.
|
|
@@ -734,10 +1190,13 @@ var MultiContextSession = class {
|
|
|
734
1190
|
context_id: contextId
|
|
735
1191
|
};
|
|
736
1192
|
if (!this.isStarted) {
|
|
1193
|
+
warnIfNoLanguage(this.config.language, this.config.normalize);
|
|
737
1194
|
if (this.config.sampleRate) msg.sample_rate = this.config.sampleRate;
|
|
738
1195
|
if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
|
|
1196
|
+
if (this.config.temperature !== void 0) msg.temperature = this.config.temperature;
|
|
739
1197
|
if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
|
|
740
1198
|
if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
|
|
1199
|
+
if (this.config.language) msg.language = this.config.language;
|
|
741
1200
|
if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
|
|
742
1201
|
}
|
|
743
1202
|
const voiceId = options?.voiceId || this.config.defaultVoiceId;
|
|
@@ -824,18 +1283,274 @@ var MultiContextSession = class {
|
|
|
824
1283
|
return this.ws !== null && this.ws.readyState === WS_OPEN;
|
|
825
1284
|
}
|
|
826
1285
|
};
|
|
1286
|
+
var StreamingSession = class {
|
|
1287
|
+
constructor(client, config, callbacks) {
|
|
1288
|
+
this.ws = null;
|
|
1289
|
+
this.configSent = false;
|
|
1290
|
+
this.client = client;
|
|
1291
|
+
this.config = config;
|
|
1292
|
+
this.callbacks = callbacks;
|
|
1293
|
+
}
|
|
1294
|
+
/**
|
|
1295
|
+
* Open the WebSocket connection and authenticate.
|
|
1296
|
+
*
|
|
1297
|
+
* The returned promise resolves once the WebSocket is OPEN, so callers can
|
|
1298
|
+
* ``await session.connect()`` and then ``send()`` without racing the
|
|
1299
|
+
* handshake. Pre-open errors (network failure, 4001 unauthorized, …) reject
|
|
1300
|
+
* the promise with the typed error.
|
|
1301
|
+
*/
|
|
1302
|
+
connect() {
|
|
1303
|
+
const wsUrl = this.client.ttsUrl.replace("https://", "wss://").replace("http://", "ws://");
|
|
1304
|
+
let authParam;
|
|
1305
|
+
if (this.client.isToken) {
|
|
1306
|
+
authParam = "token";
|
|
1307
|
+
} else if (this.client.isMasterKey) {
|
|
1308
|
+
authParam = "master_key";
|
|
1309
|
+
} else {
|
|
1310
|
+
authParam = "api_key";
|
|
1311
|
+
}
|
|
1312
|
+
const url = `${wsUrl}/ws/tts/stream?${authParam}=${this.client.apiKey}`;
|
|
1313
|
+
this.ws = createWs(url);
|
|
1314
|
+
const ws = this.ws;
|
|
1315
|
+
ws.onmessage = (event) => {
|
|
1316
|
+
try {
|
|
1317
|
+
const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
1318
|
+
const data = JSON.parse(messageData);
|
|
1319
|
+
if (data.error) {
|
|
1320
|
+
this.callbacks.onError?.(new KugelAudioError(data.error));
|
|
1321
|
+
return;
|
|
1322
|
+
}
|
|
1323
|
+
if (data.audio) {
|
|
1324
|
+
const chunk = {
|
|
1325
|
+
audio: data.audio,
|
|
1326
|
+
encoding: data.enc || "pcm_s16le",
|
|
1327
|
+
index: data.idx,
|
|
1328
|
+
sampleRate: data.sr,
|
|
1329
|
+
samples: data.samples
|
|
1330
|
+
};
|
|
1331
|
+
this.callbacks.onChunk?.(chunk);
|
|
1332
|
+
}
|
|
1333
|
+
if (data.word_timestamps) {
|
|
1334
|
+
const timestamps = data.word_timestamps.map((w) => ({
|
|
1335
|
+
word: w.word,
|
|
1336
|
+
startMs: w.start_ms,
|
|
1337
|
+
endMs: w.end_ms,
|
|
1338
|
+
charStart: w.char_start,
|
|
1339
|
+
charEnd: w.char_end,
|
|
1340
|
+
score: w.score ?? 1
|
|
1341
|
+
}));
|
|
1342
|
+
this.callbacks.onWordTimestamps?.(timestamps);
|
|
1343
|
+
}
|
|
1344
|
+
if (data.chunk_complete) {
|
|
1345
|
+
this.callbacks.onChunkComplete?.(
|
|
1346
|
+
data.chunk_id ?? 0,
|
|
1347
|
+
data.audio_seconds ?? 0,
|
|
1348
|
+
data.gen_ms ?? 0
|
|
1349
|
+
);
|
|
1350
|
+
}
|
|
1351
|
+
if (data.generation_started) {
|
|
1352
|
+
this.callbacks.onGenerationStarted?.(data.chunk_id ?? 0, data.text ?? "");
|
|
1353
|
+
}
|
|
1354
|
+
if (data.session_closed) {
|
|
1355
|
+
this.callbacks.onSessionClosed?.(
|
|
1356
|
+
data.total_audio_seconds ?? 0,
|
|
1357
|
+
data.total_text_chunks ?? 0,
|
|
1358
|
+
data.total_audio_chunks ?? 0
|
|
1359
|
+
);
|
|
1360
|
+
}
|
|
1361
|
+
} catch (e) {
|
|
1362
|
+
console.error("[KugelAudio] Failed to parse streaming session message:", e);
|
|
1363
|
+
}
|
|
1364
|
+
};
|
|
1365
|
+
return new Promise((resolve, reject) => {
|
|
1366
|
+
let opened = false;
|
|
1367
|
+
ws.onopen = () => {
|
|
1368
|
+
opened = true;
|
|
1369
|
+
resolve();
|
|
1370
|
+
};
|
|
1371
|
+
ws.onerror = (event) => {
|
|
1372
|
+
const underlying = event?.error ?? event;
|
|
1373
|
+
const err = classifyWsHandshakeError(underlying) ?? new ConnectionError(
|
|
1374
|
+
"KugelAudio streaming WebSocket connection error. Check network connectivity."
|
|
1375
|
+
);
|
|
1376
|
+
if (!opened) reject(err);
|
|
1377
|
+
this.callbacks.onError?.(err);
|
|
1378
|
+
};
|
|
1379
|
+
ws.onclose = (event) => {
|
|
1380
|
+
let typedErr = null;
|
|
1381
|
+
if (event.code === 4001 || event.code === 4003 || event.code === 4029 || event.code === 4500) {
|
|
1382
|
+
typedErr = classifyWsClose(event.code, event.reason);
|
|
1383
|
+
this.callbacks.onError?.(typedErr);
|
|
1384
|
+
}
|
|
1385
|
+
if (!opened) {
|
|
1386
|
+
reject(
|
|
1387
|
+
typedErr ?? new ConnectionError(
|
|
1388
|
+
`KugelAudio streaming WebSocket closed before ready (code ${event.code}).`
|
|
1389
|
+
)
|
|
1390
|
+
);
|
|
1391
|
+
}
|
|
1392
|
+
this.ws = null;
|
|
1393
|
+
this.configSent = false;
|
|
1394
|
+
};
|
|
1395
|
+
});
|
|
1396
|
+
}
|
|
1397
|
+
/**
|
|
1398
|
+
* Send a text chunk to the server (e.g. one LLM output token).
|
|
1399
|
+
*
|
|
1400
|
+
* The server buffers text across multiple calls and starts generating at
|
|
1401
|
+
* natural sentence boundaries automatically — no need to call `flush`.
|
|
1402
|
+
*
|
|
1403
|
+
* @param text - Raw text or LLM token to append to the server buffer.
|
|
1404
|
+
* @param flush - Force immediate generation of whatever is buffered.
|
|
1405
|
+
* **Avoid calling this per-sentence from the client.** Doing so bypasses
|
|
1406
|
+
* the server's semantic chunking, incurs a fresh model prefill cost on
|
|
1407
|
+
* every flush, and makes latency *worse*, not better. Let the server
|
|
1408
|
+
* handle chunking via `chunkLengthSchedule` / `autoMode` instead.
|
|
1409
|
+
*/
|
|
1410
|
+
send(text, flush = false) {
|
|
1411
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) {
|
|
1412
|
+
throw new KugelAudioError("StreamingSession not connected. Call connect() first.");
|
|
1413
|
+
}
|
|
1414
|
+
const msg = { text, flush };
|
|
1415
|
+
if (!this.configSent) {
|
|
1416
|
+
if (this.config.voiceId !== void 0) msg.voice_id = this.config.voiceId;
|
|
1417
|
+
if (this.config.modelId !== void 0) msg.model_id = this.config.modelId;
|
|
1418
|
+
if (this.config.cfgScale !== void 0) msg.cfg_scale = this.config.cfgScale;
|
|
1419
|
+
if (this.config.temperature !== void 0) msg.temperature = this.config.temperature;
|
|
1420
|
+
if (this.config.maxNewTokens !== void 0) msg.max_new_tokens = this.config.maxNewTokens;
|
|
1421
|
+
if (this.config.sampleRate !== void 0) msg.sample_rate = this.config.sampleRate;
|
|
1422
|
+
if (this.config.flushTimeoutMs !== void 0) msg.flush_timeout_ms = this.config.flushTimeoutMs;
|
|
1423
|
+
if (this.config.maxBufferLength !== void 0) msg.max_buffer_length = this.config.maxBufferLength;
|
|
1424
|
+
if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
|
|
1425
|
+
if (this.config.language !== void 0) msg.language = this.config.language;
|
|
1426
|
+
if (this.config.wordTimestamps) msg.word_timestamps = true;
|
|
1427
|
+
if (this.config.autoMode !== void 0) msg.auto_mode = this.config.autoMode;
|
|
1428
|
+
if (this.config.chunkLengthSchedule?.length) msg.chunk_length_schedule = this.config.chunkLengthSchedule;
|
|
1429
|
+
if (this.config.speed !== void 0) msg.speed = this.config.speed;
|
|
1430
|
+
this.configSent = true;
|
|
1431
|
+
}
|
|
1432
|
+
this.ws.send(JSON.stringify(msg));
|
|
1433
|
+
}
|
|
1434
|
+
/**
|
|
1435
|
+
* End the current session but keep the WebSocket connection open.
|
|
1436
|
+
*
|
|
1437
|
+
* This allows starting a new session on the same connection, avoiding
|
|
1438
|
+
* the overhead of a new WebSocket handshake (~200-300ms). After calling
|
|
1439
|
+
* this, optionally call {@link updateConfig} to change voice/model settings,
|
|
1440
|
+
* then call {@link send} to start the next session.
|
|
1441
|
+
*
|
|
1442
|
+
* The returned promise resolves once the server confirms with a
|
|
1443
|
+
* `session_closed` message, or after a 15 s **quiet** timeout — i.e. 15 s
|
|
1444
|
+
* elapse without *any* server message arriving. The timer resets on every
|
|
1445
|
+
* incoming frame so a long final flush that streams audio for tens of
|
|
1446
|
+
* seconds is not truncated; only a genuinely silent server trips the fuse.
|
|
1447
|
+
*/
|
|
1448
|
+
endSession() {
|
|
1449
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) return Promise.resolve();
|
|
1450
|
+
const ws = this.ws;
|
|
1451
|
+
const QUIET_TIMEOUT_MS = 15e3;
|
|
1452
|
+
return new Promise((resolve) => {
|
|
1453
|
+
let settled = false;
|
|
1454
|
+
let timer;
|
|
1455
|
+
const prevMessage = ws.onmessage;
|
|
1456
|
+
const prevClose = ws.onclose;
|
|
1457
|
+
const done = () => {
|
|
1458
|
+
if (settled) return;
|
|
1459
|
+
settled = true;
|
|
1460
|
+
clearTimeout(timer);
|
|
1461
|
+
ws.onmessage = prevMessage;
|
|
1462
|
+
ws.onclose = prevClose;
|
|
1463
|
+
this.configSent = false;
|
|
1464
|
+
resolve();
|
|
1465
|
+
};
|
|
1466
|
+
const armQuietTimer = () => {
|
|
1467
|
+
clearTimeout(timer);
|
|
1468
|
+
timer = setTimeout(done, QUIET_TIMEOUT_MS);
|
|
1469
|
+
};
|
|
1470
|
+
armQuietTimer();
|
|
1471
|
+
ws.onmessage = (event) => {
|
|
1472
|
+
armQuietTimer();
|
|
1473
|
+
if (prevMessage) prevMessage.call(ws, event);
|
|
1474
|
+
try {
|
|
1475
|
+
const raw = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
1476
|
+
if (JSON.parse(raw).session_closed) done();
|
|
1477
|
+
} catch {
|
|
1478
|
+
}
|
|
1479
|
+
};
|
|
1480
|
+
ws.onclose = (event) => {
|
|
1481
|
+
this.ws = null;
|
|
1482
|
+
if (prevClose) prevClose.call(ws, event);
|
|
1483
|
+
done();
|
|
1484
|
+
};
|
|
1485
|
+
ws.send(JSON.stringify({ close: true }));
|
|
1486
|
+
});
|
|
1487
|
+
}
|
|
1488
|
+
/**
|
|
1489
|
+
* Update session configuration for the next session.
|
|
1490
|
+
*
|
|
1491
|
+
* Call this after {@link endSession} and before the next {@link send}
|
|
1492
|
+
* to change voice, model, language, or other settings.
|
|
1493
|
+
*/
|
|
1494
|
+
updateConfig(config) {
|
|
1495
|
+
Object.assign(this.config, config);
|
|
1496
|
+
this.configSent = false;
|
|
1497
|
+
}
|
|
1498
|
+
/**
|
|
1499
|
+
* Close the session and the WebSocket connection.
|
|
1500
|
+
*
|
|
1501
|
+
* For session reuse without closing the connection, use
|
|
1502
|
+
* {@link endSession} instead.
|
|
1503
|
+
*
|
|
1504
|
+
* The returned promise resolves once the server confirms the close with a
|
|
1505
|
+
* `session_closed` message, or after a 15 s **quiet** timeout (no traffic
|
|
1506
|
+
* from the server in that window). Audio frames from the server-side
|
|
1507
|
+
* final-flush of the still-buffered text are delivered to your callbacks
|
|
1508
|
+
* before this promise resolves, and each frame resets the quiet timer.
|
|
1509
|
+
*/
|
|
1510
|
+
async close() {
|
|
1511
|
+
await this.endSession();
|
|
1512
|
+
if (this.ws) {
|
|
1513
|
+
try {
|
|
1514
|
+
this.ws.close();
|
|
1515
|
+
} catch {
|
|
1516
|
+
}
|
|
1517
|
+
this.ws = null;
|
|
1518
|
+
}
|
|
1519
|
+
}
|
|
1520
|
+
/** Whether the underlying WebSocket is open. */
|
|
1521
|
+
get isConnected() {
|
|
1522
|
+
return this.ws !== null && this.ws.readyState === WS_OPEN;
|
|
1523
|
+
}
|
|
1524
|
+
};
|
|
827
1525
|
var KugelAudio = class _KugelAudio {
|
|
828
1526
|
constructor(options) {
|
|
829
1527
|
if (!options.apiKey) {
|
|
830
|
-
throw new
|
|
1528
|
+
throw new ValidationError(
|
|
1529
|
+
"KugelAudio API key is missing. Set the KUGELAUDIO_API_KEY environment variable or pass { apiKey: ... } to the client. Get a key at https://app.kugelaudio.com/settings/api-keys."
|
|
1530
|
+
);
|
|
831
1531
|
}
|
|
832
|
-
|
|
1532
|
+
const { cleanKey, detectedRegion } = parseApiKey(options.apiKey);
|
|
1533
|
+
this._apiKey = cleanKey;
|
|
833
1534
|
this._isMasterKey = options.isMasterKey || false;
|
|
834
1535
|
this._isToken = options.isToken || false;
|
|
835
1536
|
this._orgId = options.orgId;
|
|
836
|
-
|
|
1537
|
+
if (options.apiUrl) {
|
|
1538
|
+
this._apiUrl = options.apiUrl.replace(/\/$/, "");
|
|
1539
|
+
} else {
|
|
1540
|
+
const effectiveRegion = options.region || detectedRegion;
|
|
1541
|
+
if (!effectiveRegion) {
|
|
1542
|
+
this._apiUrl = DEFAULT_API_URL;
|
|
1543
|
+
} else if (!SUPPORTED_REGIONS.includes(effectiveRegion)) {
|
|
1544
|
+
throw new ValidationError(
|
|
1545
|
+
`Invalid region '${effectiveRegion}'. Must be one of: ${SUPPORTED_REGIONS.join(", ")}.`
|
|
1546
|
+
);
|
|
1547
|
+
} else {
|
|
1548
|
+
this._apiUrl = effectiveRegion === "eu" ? EU_API_URL : DEFAULT_API_URL;
|
|
1549
|
+
}
|
|
1550
|
+
}
|
|
837
1551
|
this._ttsUrl = (options.ttsUrl || this._apiUrl).replace(/\/$/, "");
|
|
838
1552
|
this._timeout = options.timeout || 6e4;
|
|
1553
|
+
this._keepalivePingInterval = options.keepalivePingInterval !== void 0 ? options.keepalivePingInterval : 2e4;
|
|
839
1554
|
this.models = new ModelsResource(this);
|
|
840
1555
|
this.voices = new VoicesResource(this);
|
|
841
1556
|
this.tts = new TTSResource(this);
|
|
@@ -881,6 +1596,10 @@ var KugelAudio = class _KugelAudio {
|
|
|
881
1596
|
get ttsUrl() {
|
|
882
1597
|
return this._ttsUrl;
|
|
883
1598
|
}
|
|
1599
|
+
/** Get keepalive ping interval in milliseconds, or null if disabled. */
|
|
1600
|
+
get keepalivePingInterval() {
|
|
1601
|
+
return this._keepalivePingInterval;
|
|
1602
|
+
}
|
|
884
1603
|
/**
|
|
885
1604
|
* Close the client and release resources.
|
|
886
1605
|
* This closes any pooled WebSocket connections.
|
|
@@ -935,25 +1654,49 @@ var KugelAudio = class _KugelAudio {
|
|
|
935
1654
|
signal: controller.signal
|
|
936
1655
|
});
|
|
937
1656
|
clearTimeout(timeoutId);
|
|
938
|
-
if (response.
|
|
939
|
-
|
|
1657
|
+
if (!response.ok) {
|
|
1658
|
+
const text = await response.text();
|
|
1659
|
+
throw classifyHttpError(response.status, text, response.headers);
|
|
940
1660
|
}
|
|
941
|
-
|
|
942
|
-
|
|
1661
|
+
return await response.json();
|
|
1662
|
+
} catch (error) {
|
|
1663
|
+
clearTimeout(timeoutId);
|
|
1664
|
+
if (error instanceof KugelAudioError) {
|
|
1665
|
+
throw error;
|
|
943
1666
|
}
|
|
944
|
-
if (
|
|
945
|
-
throw new
|
|
1667
|
+
if (error.name === "AbortError") {
|
|
1668
|
+
throw new ConnectionError(
|
|
1669
|
+
`Request to ${method} ${path} timed out after ${this._timeout}ms.`
|
|
1670
|
+
);
|
|
946
1671
|
}
|
|
1672
|
+
throw new ConnectionError(
|
|
1673
|
+
`Could not reach KugelAudio at ${url}: ${error.message}. Check network connectivity.`
|
|
1674
|
+
);
|
|
1675
|
+
}
|
|
1676
|
+
}
|
|
1677
|
+
/**
|
|
1678
|
+
* Make a multipart/form-data request (for file uploads).
|
|
1679
|
+
* @internal Used by VoicesResource for reference file uploads.
|
|
1680
|
+
*/
|
|
1681
|
+
async requestMultipart(method, path, formData) {
|
|
1682
|
+
const url = `${this._apiUrl}${path}`;
|
|
1683
|
+
const headers = {
|
|
1684
|
+
"X-API-Key": this._apiKey,
|
|
1685
|
+
"Authorization": `Bearer ${this._apiKey}`
|
|
1686
|
+
};
|
|
1687
|
+
const controller = new AbortController();
|
|
1688
|
+
const timeoutId = setTimeout(() => controller.abort(), this._timeout);
|
|
1689
|
+
try {
|
|
1690
|
+
const response = await fetch(url, {
|
|
1691
|
+
method,
|
|
1692
|
+
headers,
|
|
1693
|
+
body: formData,
|
|
1694
|
+
signal: controller.signal
|
|
1695
|
+
});
|
|
1696
|
+
clearTimeout(timeoutId);
|
|
947
1697
|
if (!response.ok) {
|
|
948
1698
|
const text = await response.text();
|
|
949
|
-
|
|
950
|
-
try {
|
|
951
|
-
const json = JSON.parse(text);
|
|
952
|
-
message = json.detail || json.error || message;
|
|
953
|
-
} catch {
|
|
954
|
-
message = text || message;
|
|
955
|
-
}
|
|
956
|
-
throw new KugelAudioError(message, response.status);
|
|
1699
|
+
throw classifyHttpError(response.status, text, response.headers);
|
|
957
1700
|
}
|
|
958
1701
|
return await response.json();
|
|
959
1702
|
} catch (error) {
|
|
@@ -962,9 +1705,13 @@ var KugelAudio = class _KugelAudio {
|
|
|
962
1705
|
throw error;
|
|
963
1706
|
}
|
|
964
1707
|
if (error.name === "AbortError") {
|
|
965
|
-
throw new
|
|
1708
|
+
throw new ConnectionError(
|
|
1709
|
+
`Request to ${method} ${path} timed out after ${this._timeout}ms.`
|
|
1710
|
+
);
|
|
966
1711
|
}
|
|
967
|
-
throw new
|
|
1712
|
+
throw new ConnectionError(
|
|
1713
|
+
`Could not reach KugelAudio at ${url}: ${error.message}. Check network connectivity.`
|
|
1714
|
+
);
|
|
968
1715
|
}
|
|
969
1716
|
}
|
|
970
1717
|
};
|
|
@@ -972,12 +1719,18 @@ var KugelAudio = class _KugelAudio {
|
|
|
972
1719
|
0 && (module.exports = {
|
|
973
1720
|
AuthenticationError,
|
|
974
1721
|
ConnectionError,
|
|
1722
|
+
ErrorCodes,
|
|
975
1723
|
InsufficientCreditsError,
|
|
976
1724
|
KugelAudio,
|
|
977
1725
|
KugelAudioError,
|
|
978
1726
|
RateLimitError,
|
|
979
1727
|
ValidationError,
|
|
1728
|
+
WsCloseCodes,
|
|
980
1729
|
base64ToArrayBuffer,
|
|
1730
|
+
classifyHttpError,
|
|
1731
|
+
classifyWsClose,
|
|
1732
|
+
classifyWsFrame,
|
|
1733
|
+
classifyWsHandshakeError,
|
|
981
1734
|
createWavBlob,
|
|
982
1735
|
createWavFile,
|
|
983
1736
|
decodePCM16
|