kugelaudio 0.2.3 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +25 -0
- package/README.md +35 -14
- package/dist/index.d.mts +518 -26
- package/dist/index.d.ts +518 -26
- package/dist/index.js +864 -111
- package/dist/index.mjs +858 -111
- package/package.json +8 -7
- package/src/client.test.ts +548 -0
- package/src/client.ts +885 -103
- package/src/errors.ts +266 -18
- package/src/index.ts +17 -2
- package/src/types.ts +215 -9
- package/src/websocket.ts +38 -18
package/dist/index.mjs
CHANGED
|
@@ -6,49 +6,192 @@ var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require
|
|
|
6
6
|
});
|
|
7
7
|
|
|
8
8
|
// src/errors.ts
|
|
9
|
+
var ErrorCodes = {
|
|
10
|
+
UNAUTHORIZED: "UNAUTHORIZED",
|
|
11
|
+
RATE_LIMITED: "RATE_LIMITED",
|
|
12
|
+
INSUFFICIENT_CREDITS: "INSUFFICIENT_CREDITS",
|
|
13
|
+
MODEL_UNAVAILABLE: "MODEL_UNAVAILABLE",
|
|
14
|
+
EMPTY_AUDIO: "EMPTY_AUDIO",
|
|
15
|
+
VALIDATION: "VALIDATION_ERROR",
|
|
16
|
+
INTERNAL: "INTERNAL_ERROR",
|
|
17
|
+
NOT_FOUND: "NOT_FOUND"
|
|
18
|
+
};
|
|
19
|
+
var WsCloseCodes = {
|
|
20
|
+
UNAUTHORIZED: 4001,
|
|
21
|
+
INSUFFICIENT_CREDITS: 4003,
|
|
22
|
+
RATE_LIMITED: 4029,
|
|
23
|
+
MODEL_UNAVAILABLE: 4500
|
|
24
|
+
};
|
|
25
|
+
var API_KEYS_URL = "https://app.kugelaudio.com/settings/api-keys";
|
|
26
|
+
var BILLING_URL = "https://app.kugelaudio.com/billing";
|
|
9
27
|
var KugelAudioError = class _KugelAudioError extends Error {
|
|
10
|
-
constructor(message,
|
|
11
|
-
super(message);
|
|
28
|
+
constructor(message, options = {}) {
|
|
29
|
+
super(options.requestId ? `${message} (request_id: ${options.requestId})` : message);
|
|
12
30
|
this.name = "KugelAudioError";
|
|
13
|
-
this.statusCode = statusCode;
|
|
31
|
+
this.statusCode = options.statusCode;
|
|
32
|
+
this.errorCode = options.errorCode;
|
|
33
|
+
this.requestId = options.requestId;
|
|
34
|
+
this.retryAfter = options.retryAfter;
|
|
14
35
|
Object.setPrototypeOf(this, _KugelAudioError.prototype);
|
|
15
36
|
}
|
|
16
37
|
};
|
|
17
38
|
var AuthenticationError = class _AuthenticationError extends KugelAudioError {
|
|
18
|
-
constructor(message =
|
|
19
|
-
super(
|
|
39
|
+
constructor(message, options = {}) {
|
|
40
|
+
super(
|
|
41
|
+
message ?? `KugelAudio rejected the API key. Check it is current at ${API_KEYS_URL}.`,
|
|
42
|
+
{ statusCode: 401, errorCode: ErrorCodes.UNAUTHORIZED, ...options }
|
|
43
|
+
);
|
|
20
44
|
this.name = "AuthenticationError";
|
|
21
45
|
Object.setPrototypeOf(this, _AuthenticationError.prototype);
|
|
22
46
|
}
|
|
23
47
|
};
|
|
24
48
|
var RateLimitError = class _RateLimitError extends KugelAudioError {
|
|
25
|
-
constructor(message =
|
|
26
|
-
|
|
49
|
+
constructor(message, options = {}) {
|
|
50
|
+
const msg = message ?? (options.retryAfter ? `KugelAudio rate limit hit; retry after ${options.retryAfter}s.` : "KugelAudio rate limit hit; retry shortly.");
|
|
51
|
+
super(msg, { statusCode: 429, errorCode: ErrorCodes.RATE_LIMITED, ...options });
|
|
27
52
|
this.name = "RateLimitError";
|
|
28
53
|
Object.setPrototypeOf(this, _RateLimitError.prototype);
|
|
29
54
|
}
|
|
30
55
|
};
|
|
31
56
|
var InsufficientCreditsError = class _InsufficientCreditsError extends KugelAudioError {
|
|
32
|
-
constructor(message =
|
|
33
|
-
super(
|
|
57
|
+
constructor(message, options = {}) {
|
|
58
|
+
super(
|
|
59
|
+
message ?? `Your KugelAudio account is out of credits. Top up at ${BILLING_URL}.`,
|
|
60
|
+
{ statusCode: 402, errorCode: ErrorCodes.INSUFFICIENT_CREDITS, ...options }
|
|
61
|
+
);
|
|
34
62
|
this.name = "InsufficientCreditsError";
|
|
35
63
|
Object.setPrototypeOf(this, _InsufficientCreditsError.prototype);
|
|
36
64
|
}
|
|
37
65
|
};
|
|
38
66
|
var ValidationError = class _ValidationError extends KugelAudioError {
|
|
39
|
-
constructor(message) {
|
|
40
|
-
super(message, 400);
|
|
67
|
+
constructor(message, options = {}) {
|
|
68
|
+
super(message, { statusCode: 400, errorCode: ErrorCodes.VALIDATION, ...options });
|
|
41
69
|
this.name = "ValidationError";
|
|
42
70
|
Object.setPrototypeOf(this, _ValidationError.prototype);
|
|
43
71
|
}
|
|
44
72
|
};
|
|
45
73
|
var ConnectionError = class _ConnectionError extends KugelAudioError {
|
|
46
|
-
constructor(message =
|
|
47
|
-
super(message, 503);
|
|
74
|
+
constructor(message, options = {}) {
|
|
75
|
+
super(message, { statusCode: 503, ...options });
|
|
48
76
|
this.name = "ConnectionError";
|
|
49
77
|
Object.setPrototypeOf(this, _ConnectionError.prototype);
|
|
50
78
|
}
|
|
51
79
|
};
|
|
80
|
+
function build(status, errorCode, message, opts = {}) {
|
|
81
|
+
const common = { ...opts };
|
|
82
|
+
if (status !== void 0) common.statusCode = status;
|
|
83
|
+
if (errorCode !== void 0) common.errorCode = errorCode;
|
|
84
|
+
if (errorCode === ErrorCodes.UNAUTHORIZED || status === 401) {
|
|
85
|
+
return new AuthenticationError(message || void 0, common);
|
|
86
|
+
}
|
|
87
|
+
if (errorCode === ErrorCodes.INSUFFICIENT_CREDITS || status === 402) {
|
|
88
|
+
return new InsufficientCreditsError(message || void 0, common);
|
|
89
|
+
}
|
|
90
|
+
if (errorCode === ErrorCodes.RATE_LIMITED || status === 429) {
|
|
91
|
+
return new RateLimitError(message || void 0, common);
|
|
92
|
+
}
|
|
93
|
+
if (errorCode === ErrorCodes.VALIDATION || status === 400) {
|
|
94
|
+
return new ValidationError(message || "Request validation failed.", common);
|
|
95
|
+
}
|
|
96
|
+
if (errorCode === ErrorCodes.MODEL_UNAVAILABLE || status === 503) {
|
|
97
|
+
const detail = message || "service temporarily unavailable";
|
|
98
|
+
return new ConnectionError(
|
|
99
|
+
`KugelAudio is temporarily unavailable: ${detail}. Retry shortly.`,
|
|
100
|
+
common
|
|
101
|
+
);
|
|
102
|
+
}
|
|
103
|
+
return new KugelAudioError(message || `HTTP ${status}`, common);
|
|
104
|
+
}
|
|
105
|
+
function readHeader(headers, name) {
|
|
106
|
+
if (headers && typeof headers.get === "function") {
|
|
107
|
+
return headers.get(name) ?? void 0;
|
|
108
|
+
}
|
|
109
|
+
const rec = headers;
|
|
110
|
+
return rec[name] ?? rec[name.toLowerCase()] ?? void 0;
|
|
111
|
+
}
|
|
112
|
+
function classifyHttpError(status, bodyText, headers) {
|
|
113
|
+
let errorCode;
|
|
114
|
+
let message = "";
|
|
115
|
+
let retryAfter;
|
|
116
|
+
if (bodyText) {
|
|
117
|
+
try {
|
|
118
|
+
const body = JSON.parse(bodyText);
|
|
119
|
+
if (body && typeof body === "object") {
|
|
120
|
+
errorCode = typeof body.error_code === "string" ? body.error_code : void 0;
|
|
121
|
+
const msg = body.error ?? body.detail;
|
|
122
|
+
if (Array.isArray(msg)) {
|
|
123
|
+
message = msg.map((m) => String(m)).join("; ");
|
|
124
|
+
} else if (typeof msg === "string") {
|
|
125
|
+
message = msg;
|
|
126
|
+
}
|
|
127
|
+
if (typeof body.retry_after === "number") {
|
|
128
|
+
retryAfter = body.retry_after;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
} catch {
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
if (retryAfter === void 0) {
|
|
135
|
+
const header = readHeader(headers, "Retry-After") ?? readHeader(headers, "retry-after");
|
|
136
|
+
if (header) {
|
|
137
|
+
const n = Number(header);
|
|
138
|
+
if (Number.isFinite(n)) retryAfter = n;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
const requestId = readHeader(headers, "x-request-id") ?? readHeader(headers, "X-Request-Id");
|
|
142
|
+
if (!message) {
|
|
143
|
+
message = (bodyText || "").trim();
|
|
144
|
+
}
|
|
145
|
+
return build(status, errorCode, message, { requestId, retryAfter });
|
|
146
|
+
}
|
|
147
|
+
function classifyWsFrame(data) {
|
|
148
|
+
const errorCode = data.error_code;
|
|
149
|
+
const message = data.error ?? "Server reported an error.";
|
|
150
|
+
const retryAfter = typeof data.retry_after === "number" ? data.retry_after : void 0;
|
|
151
|
+
return build(void 0, errorCode, message, { retryAfter });
|
|
152
|
+
}
|
|
153
|
+
function classifyWsClose(code, reason) {
|
|
154
|
+
const reasonTxt = (reason ?? "").trim();
|
|
155
|
+
if (code === WsCloseCodes.UNAUTHORIZED) {
|
|
156
|
+
let msg = `KugelAudio rejected the API key. Check it is current at ${API_KEYS_URL}.`;
|
|
157
|
+
if (reasonTxt) msg = `${msg} (${reasonTxt})`;
|
|
158
|
+
return new AuthenticationError(msg);
|
|
159
|
+
}
|
|
160
|
+
if (code === WsCloseCodes.INSUFFICIENT_CREDITS) {
|
|
161
|
+
return new InsufficientCreditsError();
|
|
162
|
+
}
|
|
163
|
+
if (code === WsCloseCodes.RATE_LIMITED) {
|
|
164
|
+
return new RateLimitError();
|
|
165
|
+
}
|
|
166
|
+
if (code === WsCloseCodes.MODEL_UNAVAILABLE) {
|
|
167
|
+
const suffix = reasonTxt ? ` (${reasonTxt})` : "";
|
|
168
|
+
return new ConnectionError(
|
|
169
|
+
`KugelAudio model is temporarily unavailable. Retry shortly.${suffix}`
|
|
170
|
+
);
|
|
171
|
+
}
|
|
172
|
+
const detail = reasonTxt || "no reason given";
|
|
173
|
+
const codeStr = code !== void 0 ? ` (code ${code})` : "";
|
|
174
|
+
return new ConnectionError(
|
|
175
|
+
`KugelAudio WebSocket closed by server: ${detail}${codeStr}.`
|
|
176
|
+
);
|
|
177
|
+
}
|
|
178
|
+
function classifyWsHandshakeError(err) {
|
|
179
|
+
if (!err || typeof err !== "object") return null;
|
|
180
|
+
const e = err;
|
|
181
|
+
let status;
|
|
182
|
+
if (typeof e.statusCode === "number") {
|
|
183
|
+
status = e.statusCode;
|
|
184
|
+
}
|
|
185
|
+
if (status === void 0 && typeof e.message === "string") {
|
|
186
|
+
const m = e.message.match(/Unexpected server response:\s*(\d{3})/i);
|
|
187
|
+
if (m) status = Number(m[1]);
|
|
188
|
+
}
|
|
189
|
+
if (status === void 0) return null;
|
|
190
|
+
if (status === 403) {
|
|
191
|
+
return new AuthenticationError();
|
|
192
|
+
}
|
|
193
|
+
return build(status, void 0, typeof e.message === "string" ? e.message : "");
|
|
194
|
+
}
|
|
52
195
|
|
|
53
196
|
// src/utils.ts
|
|
54
197
|
function base64ToArrayBuffer(base64) {
|
|
@@ -108,21 +251,26 @@ function createWavBlob(audio, sampleRate) {
|
|
|
108
251
|
|
|
109
252
|
// src/websocket.ts
|
|
110
253
|
var _cachedWs = null;
|
|
254
|
+
function isNodeJs() {
|
|
255
|
+
return typeof process !== "undefined" && !!process.versions && typeof process.versions.node === "string";
|
|
256
|
+
}
|
|
111
257
|
function getWebSocket() {
|
|
112
258
|
if (_cachedWs) return _cachedWs;
|
|
259
|
+
if (isNodeJs()) {
|
|
260
|
+
try {
|
|
261
|
+
const _require = typeof __require !== "undefined" ? __require : Function('return typeof require !== "undefined" ? require : undefined')();
|
|
262
|
+
if (_require) {
|
|
263
|
+
const ws = _require("ws");
|
|
264
|
+
_cachedWs = ws.default || ws;
|
|
265
|
+
return _cachedWs;
|
|
266
|
+
}
|
|
267
|
+
} catch {
|
|
268
|
+
}
|
|
269
|
+
}
|
|
113
270
|
if (typeof globalThis !== "undefined" && typeof globalThis.WebSocket !== "undefined") {
|
|
114
271
|
_cachedWs = globalThis.WebSocket;
|
|
115
272
|
return _cachedWs;
|
|
116
273
|
}
|
|
117
|
-
try {
|
|
118
|
-
const _require = typeof __require !== "undefined" ? __require : Function('return typeof require !== "undefined" ? require : undefined')();
|
|
119
|
-
if (_require) {
|
|
120
|
-
const ws = _require("ws");
|
|
121
|
-
_cachedWs = ws.default || ws;
|
|
122
|
-
return _cachedWs;
|
|
123
|
-
}
|
|
124
|
-
} catch {
|
|
125
|
-
}
|
|
126
274
|
throw new Error(
|
|
127
275
|
'WebSocket not available. In Node.js, install the "ws" package: npm install ws'
|
|
128
276
|
);
|
|
@@ -130,11 +278,32 @@ function getWebSocket() {
|
|
|
130
278
|
|
|
131
279
|
// src/client.ts
|
|
132
280
|
var DEFAULT_API_URL = "https://api.kugelaudio.com";
|
|
281
|
+
var EU_API_URL = "https://api.eu.kugelaudio.com";
|
|
282
|
+
var SUPPORTED_REGIONS = ["eu", "us", "global"];
|
|
283
|
+
var REGION_PREFIXES = ["eu-", "us-", "global-"];
|
|
284
|
+
function parseApiKey(apiKey) {
|
|
285
|
+
for (const prefix of REGION_PREFIXES) {
|
|
286
|
+
if (apiKey.startsWith(prefix)) {
|
|
287
|
+
return { cleanKey: apiKey.slice(prefix.length), detectedRegion: prefix.slice(0, -1) };
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
return { cleanKey: apiKey };
|
|
291
|
+
}
|
|
133
292
|
function createWs(url) {
|
|
134
293
|
const WS = getWebSocket();
|
|
135
294
|
return new WS(url);
|
|
136
295
|
}
|
|
137
296
|
var WS_OPEN = 1;
|
|
297
|
+
var _languageWarningLogged = false;
|
|
298
|
+
function warnIfNoLanguage(language, normalize) {
|
|
299
|
+
const normEnabled = normalize === void 0 || normalize;
|
|
300
|
+
if (!language && normEnabled && !_languageWarningLogged) {
|
|
301
|
+
_languageWarningLogged = true;
|
|
302
|
+
console.warn(
|
|
303
|
+
"[KugelAudio] No 'language' set with normalization enabled \u2014 the server will auto-detect the language, adding ~60-150ms to TTFA. Set language (e.g., language: 'en') for optimal latency."
|
|
304
|
+
);
|
|
305
|
+
}
|
|
306
|
+
}
|
|
138
307
|
var ModelsResource = class {
|
|
139
308
|
constructor(client) {
|
|
140
309
|
this.client = client;
|
|
@@ -168,42 +337,177 @@ var VoicesResource = class {
|
|
|
168
337
|
params.set("include_public", String(options.includePublic));
|
|
169
338
|
}
|
|
170
339
|
if (options?.limit) params.set("limit", String(options.limit));
|
|
340
|
+
if (options?.offset) params.set("offset", String(options.offset));
|
|
171
341
|
const query = params.toString();
|
|
172
342
|
const path = query ? `/v1/voices?${query}` : "/v1/voices";
|
|
173
343
|
const response = await this.client.request("GET", path);
|
|
174
|
-
return
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
344
|
+
return {
|
|
345
|
+
voices: response.voices.map((v) => ({
|
|
346
|
+
id: v.id,
|
|
347
|
+
name: v.name,
|
|
348
|
+
description: v.description,
|
|
349
|
+
category: v.category,
|
|
350
|
+
sex: v.sex,
|
|
351
|
+
age: v.age,
|
|
352
|
+
supportedLanguages: v.supported_languages || [],
|
|
353
|
+
sampleText: v.sample_text,
|
|
354
|
+
avatarUrl: v.avatar_url,
|
|
355
|
+
sampleUrl: v.sample_url,
|
|
356
|
+
isPublic: v.is_public || false,
|
|
357
|
+
verified: v.verified || false
|
|
358
|
+
})),
|
|
359
|
+
total: response.total,
|
|
360
|
+
limit: response.limit,
|
|
361
|
+
offset: response.offset
|
|
362
|
+
};
|
|
188
363
|
}
|
|
189
364
|
/**
|
|
190
365
|
* Get a specific voice by ID.
|
|
191
366
|
*/
|
|
192
367
|
async get(voiceId) {
|
|
193
368
|
const v = await this.client.request("GET", `/v1/voices/${voiceId}`);
|
|
369
|
+
return this.mapVoiceDetail(v);
|
|
370
|
+
}
|
|
371
|
+
/**
|
|
372
|
+
* Create a new voice.
|
|
373
|
+
*/
|
|
374
|
+
async create(options) {
|
|
375
|
+
const metadata = {
|
|
376
|
+
name: options.name,
|
|
377
|
+
sex: options.sex,
|
|
378
|
+
description: options.description ?? "",
|
|
379
|
+
category: options.category ?? "conversational",
|
|
380
|
+
age: options.age ?? "middle_age",
|
|
381
|
+
quality: options.quality ?? "mid",
|
|
382
|
+
supported_languages: options.supportedLanguages ?? ["en"],
|
|
383
|
+
is_public: options.isPublic ?? false,
|
|
384
|
+
sample_text: options.sampleText ?? ""
|
|
385
|
+
};
|
|
386
|
+
const formData = new FormData();
|
|
387
|
+
formData.append(
|
|
388
|
+
"metadata",
|
|
389
|
+
new Blob([JSON.stringify(metadata)], { type: "application/json" })
|
|
390
|
+
);
|
|
391
|
+
if (options.referenceFiles) {
|
|
392
|
+
for (const file of options.referenceFiles) {
|
|
393
|
+
formData.append("files", file);
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
const v = await this.client.requestMultipart("POST", "/v1/voices", formData);
|
|
397
|
+
return this.mapVoiceDetail(v);
|
|
398
|
+
}
|
|
399
|
+
/**
|
|
400
|
+
* Update an existing voice. Only provided fields are updated.
|
|
401
|
+
*/
|
|
402
|
+
async update(voiceId, options) {
|
|
403
|
+
const payload = {};
|
|
404
|
+
if (options.name !== void 0) payload.name = options.name;
|
|
405
|
+
if (options.description !== void 0) payload.description = options.description;
|
|
406
|
+
if (options.category !== void 0) payload.category = options.category;
|
|
407
|
+
if (options.age !== void 0) payload.age = options.age;
|
|
408
|
+
if (options.sex !== void 0) payload.sex = options.sex;
|
|
409
|
+
if (options.quality !== void 0) payload.quality = options.quality;
|
|
410
|
+
if (options.supportedLanguages !== void 0) payload.supported_languages = options.supportedLanguages;
|
|
411
|
+
if (options.isPublic !== void 0) payload.is_public = options.isPublic;
|
|
412
|
+
if (options.sampleText !== void 0) payload.sample_text = options.sampleText;
|
|
413
|
+
const v = await this.client.request("PATCH", `/v1/voices/${voiceId}`, payload);
|
|
414
|
+
return this.mapVoiceDetail(v);
|
|
415
|
+
}
|
|
416
|
+
/**
|
|
417
|
+
* Delete a voice.
|
|
418
|
+
*/
|
|
419
|
+
async delete(voiceId) {
|
|
420
|
+
await this.client.request("DELETE", `/v1/voices/${voiceId}`);
|
|
421
|
+
}
|
|
422
|
+
// -- Reference management --
|
|
423
|
+
/**
|
|
424
|
+
* List reference audio files for a voice.
|
|
425
|
+
*/
|
|
426
|
+
async listReferences(voiceId) {
|
|
427
|
+
const response = await this.client.request(
|
|
428
|
+
"GET",
|
|
429
|
+
`/v1/voices/${voiceId}/references`
|
|
430
|
+
);
|
|
431
|
+
return response.references.map((r) => this.mapVoiceReference(r));
|
|
432
|
+
}
|
|
433
|
+
/**
|
|
434
|
+
* Upload a reference audio file to a voice.
|
|
435
|
+
*
|
|
436
|
+
* @param voiceId - Voice ID
|
|
437
|
+
* @param file - Audio file (File in browser, Blob in Node.js)
|
|
438
|
+
* @param referenceText - Optional transcript of the reference audio
|
|
439
|
+
*/
|
|
440
|
+
async addReference(voiceId, file, referenceText) {
|
|
441
|
+
const formData = new FormData();
|
|
442
|
+
formData.append("file", file);
|
|
443
|
+
if (referenceText) {
|
|
444
|
+
formData.append("reference_text", referenceText);
|
|
445
|
+
}
|
|
446
|
+
const r = await this.client.requestMultipart(
|
|
447
|
+
"POST",
|
|
448
|
+
`/v1/voices/${voiceId}/references`,
|
|
449
|
+
formData
|
|
450
|
+
);
|
|
451
|
+
return this.mapVoiceReference(r);
|
|
452
|
+
}
|
|
453
|
+
/**
|
|
454
|
+
* Delete a reference audio file from a voice.
|
|
455
|
+
*/
|
|
456
|
+
async deleteReference(voiceId, referenceId) {
|
|
457
|
+
await this.client.request(
|
|
458
|
+
"DELETE",
|
|
459
|
+
`/v1/voices/${voiceId}/references/${referenceId}`
|
|
460
|
+
);
|
|
461
|
+
}
|
|
462
|
+
// -- Publishing --
|
|
463
|
+
/**
|
|
464
|
+
* Request publication of a voice. Sets it as public and marks it
|
|
465
|
+
* as pending verification by an admin.
|
|
466
|
+
*/
|
|
467
|
+
async publish(voiceId) {
|
|
468
|
+
const v = await this.client.request("POST", `/v1/voices/${voiceId}/publish`);
|
|
469
|
+
return this.mapVoiceDetail(v);
|
|
470
|
+
}
|
|
471
|
+
// -- Sample generation --
|
|
472
|
+
/**
|
|
473
|
+
* Trigger sample audio generation for a voice.
|
|
474
|
+
*/
|
|
475
|
+
async generateSample(voiceId) {
|
|
476
|
+
const v = await this.client.request(
|
|
477
|
+
"POST",
|
|
478
|
+
`/v1/voices/${voiceId}/generate-sample`
|
|
479
|
+
);
|
|
480
|
+
return this.mapVoiceDetail(v);
|
|
481
|
+
}
|
|
482
|
+
// -- Helpers --
|
|
483
|
+
mapVoiceDetail(v) {
|
|
194
484
|
return {
|
|
195
485
|
id: v.id,
|
|
196
486
|
name: v.name,
|
|
197
|
-
description: v.description,
|
|
198
|
-
|
|
199
|
-
|
|
487
|
+
description: v.description ?? "",
|
|
488
|
+
generativeVoiceDescription: v.generative_voice_description ?? "",
|
|
489
|
+
supportedLanguages: v.supported_languages ?? [],
|
|
490
|
+
category: v.category ?? "cloned",
|
|
200
491
|
age: v.age,
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
492
|
+
sex: v.sex,
|
|
493
|
+
quality: v.quality ?? "mid",
|
|
494
|
+
isPublic: v.is_public ?? false,
|
|
495
|
+
verified: v.verified ?? false,
|
|
496
|
+
pendingVerification: v.pending_verification ?? false,
|
|
204
497
|
sampleUrl: v.sample_url,
|
|
205
|
-
|
|
206
|
-
|
|
498
|
+
avatarUrl: v.avatar_url,
|
|
499
|
+
sampleText: v.sample_text ?? ""
|
|
500
|
+
};
|
|
501
|
+
}
|
|
502
|
+
mapVoiceReference(r) {
|
|
503
|
+
return {
|
|
504
|
+
id: r.id,
|
|
505
|
+
voiceId: r.voice_id,
|
|
506
|
+
name: r.name ?? "",
|
|
507
|
+
referenceText: r.reference_text ?? "",
|
|
508
|
+
s3Path: r.s3_path ?? "",
|
|
509
|
+
audioUrl: r.audio_url,
|
|
510
|
+
isGenerated: r.is_generated ?? false
|
|
207
511
|
};
|
|
208
512
|
}
|
|
209
513
|
};
|
|
@@ -215,6 +519,7 @@ var TTSResource = class {
|
|
|
215
519
|
this.wsUrl = null;
|
|
216
520
|
this.pendingRequests = /* @__PURE__ */ new Map();
|
|
217
521
|
this.requestCounter = 0;
|
|
522
|
+
this.keepaliveTimer = null;
|
|
218
523
|
}
|
|
219
524
|
/**
|
|
220
525
|
* Pre-establish WebSocket connection for faster first request.
|
|
@@ -278,6 +583,63 @@ var TTSResource = class {
|
|
|
278
583
|
wordTimestamps: allTimestamps
|
|
279
584
|
};
|
|
280
585
|
}
|
|
586
|
+
/**
|
|
587
|
+
* Stream audio and return a Node.js Readable stream of raw PCM16 binary data.
|
|
588
|
+
*
|
|
589
|
+
* **Node.js only** — this method requires the `stream` built-in module and is
|
|
590
|
+
* intended for server-side integrations such as Vapi custom TTS endpoints,
|
|
591
|
+
* Express/Fastify handlers, or any pipeline that expects a Node.js `Readable`.
|
|
592
|
+
*
|
|
593
|
+
* Compared to manually wiring `onChunk` to a `Readable`, this method avoids
|
|
594
|
+
* a common race-condition: the stream object is created and returned **before**
|
|
595
|
+
* any chunks arrive, so the caller can safely pipe or attach listeners before
|
|
596
|
+
* the first audio byte is pushed.
|
|
597
|
+
*
|
|
598
|
+
* @example Vapi custom TTS endpoint
|
|
599
|
+
* ```typescript
|
|
600
|
+
* app.post('/synthesize', (req, res) => {
|
|
601
|
+
* res.setHeader('Content-Type', 'audio/pcm');
|
|
602
|
+
* res.setHeader('Transfer-Encoding', 'chunked');
|
|
603
|
+
*
|
|
604
|
+
* const readable = client.tts.toReadable({
|
|
605
|
+
* text: req.body.message.text,
|
|
606
|
+
* modelId: 'kugel-1-turbo',
|
|
607
|
+
* sampleRate: req.body.message.sampleRate,
|
|
608
|
+
* language: 'en',
|
|
609
|
+
* });
|
|
610
|
+
*
|
|
611
|
+
* readable.pipe(res);
|
|
612
|
+
* });
|
|
613
|
+
* ```
|
|
614
|
+
*
|
|
615
|
+
* @param options - TTS generation options (same as `stream()`)
|
|
616
|
+
* @param reuseConnection - Reuse the pooled WebSocket connection (default: true)
|
|
617
|
+
* @returns Node.js Readable stream emitting raw PCM16 binary Buffer chunks
|
|
618
|
+
*/
|
|
619
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
620
|
+
toReadable(options, reuseConnection = true) {
|
|
621
|
+
const { Readable } = __require("stream");
|
|
622
|
+
const readable = new Readable({ read() {
|
|
623
|
+
} });
|
|
624
|
+
this.stream(
|
|
625
|
+
options,
|
|
626
|
+
{
|
|
627
|
+
onChunk: (chunk) => {
|
|
628
|
+
readable.push(Buffer.from(chunk.audio, "base64"));
|
|
629
|
+
},
|
|
630
|
+
onFinal: () => {
|
|
631
|
+
readable.push(null);
|
|
632
|
+
},
|
|
633
|
+
onError: (error) => {
|
|
634
|
+
readable.destroy(error);
|
|
635
|
+
}
|
|
636
|
+
},
|
|
637
|
+
reuseConnection
|
|
638
|
+
).catch((error) => {
|
|
639
|
+
readable.destroy(error);
|
|
640
|
+
});
|
|
641
|
+
return readable;
|
|
642
|
+
}
|
|
281
643
|
/**
|
|
282
644
|
* Build the WebSocket URL with appropriate auth param.
|
|
283
645
|
*/
|
|
@@ -319,10 +681,17 @@ var TTSResource = class {
|
|
|
319
681
|
this.wsConnection = ws;
|
|
320
682
|
this.wsUrl = url;
|
|
321
683
|
this.setupMessageHandler(ws);
|
|
684
|
+
this.startKeepalive(ws);
|
|
322
685
|
resolve(ws);
|
|
323
686
|
};
|
|
324
|
-
ws.onerror = () => {
|
|
325
|
-
|
|
687
|
+
ws.onerror = (event) => {
|
|
688
|
+
const underlying = event?.error ?? event;
|
|
689
|
+
const typed = classifyWsHandshakeError(underlying);
|
|
690
|
+
reject(
|
|
691
|
+
typed ?? new ConnectionError(
|
|
692
|
+
`Could not establish KugelAudio WebSocket connection to ${url}. Check network connectivity.`
|
|
693
|
+
)
|
|
694
|
+
);
|
|
326
695
|
};
|
|
327
696
|
});
|
|
328
697
|
}
|
|
@@ -337,7 +706,7 @@ var TTSResource = class {
|
|
|
337
706
|
const [requestId, pending] = [...this.pendingRequests.entries()][0] || [];
|
|
338
707
|
if (!pending) return;
|
|
339
708
|
if (data.error) {
|
|
340
|
-
const error = this.parseError(data
|
|
709
|
+
const error = this.parseError(data);
|
|
341
710
|
pending.callbacks.onError?.(error);
|
|
342
711
|
this.pendingRequests.delete(requestId);
|
|
343
712
|
pending.reject(error);
|
|
@@ -350,7 +719,6 @@ var TTSResource = class {
|
|
|
350
719
|
totalSamples: data.total_samples,
|
|
351
720
|
durationMs: data.dur_ms,
|
|
352
721
|
generationMs: data.gen_ms,
|
|
353
|
-
ttfaMs: data.ttfa_ms,
|
|
354
722
|
rtf: data.rtf,
|
|
355
723
|
error: data.error
|
|
356
724
|
};
|
|
@@ -387,20 +755,23 @@ var TTSResource = class {
|
|
|
387
755
|
}
|
|
388
756
|
};
|
|
389
757
|
ws.onclose = (event) => {
|
|
758
|
+
this.stopKeepalive();
|
|
390
759
|
this.wsConnection = null;
|
|
391
760
|
this.wsUrl = null;
|
|
392
761
|
for (const [id, pending] of this.pendingRequests) {
|
|
393
762
|
pending.callbacks.onClose?.();
|
|
394
|
-
if (event.code === 4001) {
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
pending.reject(
|
|
763
|
+
if (event.code === 4001 || event.code === 4003 || event.code === 4029 || event.code === 4500) {
|
|
764
|
+
const error = classifyWsClose(event.code, event.reason);
|
|
765
|
+
pending.callbacks.onError?.(error);
|
|
766
|
+
pending.reject(error);
|
|
398
767
|
}
|
|
399
768
|
this.pendingRequests.delete(id);
|
|
400
769
|
}
|
|
401
770
|
};
|
|
402
771
|
ws.onerror = () => {
|
|
403
|
-
const error = new
|
|
772
|
+
const error = new ConnectionError(
|
|
773
|
+
"KugelAudio WebSocket connection error. Check network connectivity."
|
|
774
|
+
);
|
|
404
775
|
for (const [id, pending] of this.pendingRequests) {
|
|
405
776
|
pending.callbacks.onError?.(error);
|
|
406
777
|
pending.reject(error);
|
|
@@ -426,6 +797,7 @@ var TTSResource = class {
|
|
|
426
797
|
* Stream with connection pooling (fast path).
|
|
427
798
|
*/
|
|
428
799
|
async streamWithPooling(options, callbacks) {
|
|
800
|
+
warnIfNoLanguage(options.language, options.normalize);
|
|
429
801
|
const ws = await this.getConnection();
|
|
430
802
|
const requestId = ++this.requestCounter;
|
|
431
803
|
return new Promise((resolve, reject) => {
|
|
@@ -436,11 +808,14 @@ var TTSResource = class {
|
|
|
436
808
|
model_id: options.modelId || "kugel-1-turbo",
|
|
437
809
|
voice_id: options.voiceId,
|
|
438
810
|
cfg_scale: options.cfgScale ?? 2,
|
|
811
|
+
...options.temperature !== void 0 && { temperature: options.temperature },
|
|
439
812
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
440
813
|
sample_rate: options.sampleRate ?? 24e3,
|
|
441
814
|
normalize: options.normalize ?? true,
|
|
442
815
|
...options.language && { language: options.language },
|
|
443
|
-
...options.wordTimestamps && { word_timestamps: true }
|
|
816
|
+
...options.wordTimestamps && { word_timestamps: true },
|
|
817
|
+
...options.speed !== void 0 && { speed: options.speed },
|
|
818
|
+
...options.projectId !== void 0 && { project_id: options.projectId }
|
|
444
819
|
}));
|
|
445
820
|
});
|
|
446
821
|
}
|
|
@@ -448,6 +823,7 @@ var TTSResource = class {
|
|
|
448
823
|
* Stream without connection pooling (original behavior).
|
|
449
824
|
*/
|
|
450
825
|
streamWithoutPooling(options, callbacks) {
|
|
826
|
+
warnIfNoLanguage(options.language, options.normalize);
|
|
451
827
|
return new Promise((resolve, reject) => {
|
|
452
828
|
const url = this.buildWsUrl();
|
|
453
829
|
const ws = createWs(url);
|
|
@@ -462,7 +838,9 @@ var TTSResource = class {
|
|
|
462
838
|
sample_rate: options.sampleRate ?? 24e3,
|
|
463
839
|
normalize: options.normalize ?? true,
|
|
464
840
|
...options.language && { language: options.language },
|
|
465
|
-
...options.wordTimestamps && { word_timestamps: true }
|
|
841
|
+
...options.wordTimestamps && { word_timestamps: true },
|
|
842
|
+
...options.speed !== void 0 && { speed: options.speed },
|
|
843
|
+
...options.projectId !== void 0 && { project_id: options.projectId }
|
|
466
844
|
}));
|
|
467
845
|
};
|
|
468
846
|
ws.onmessage = (event) => {
|
|
@@ -470,7 +848,7 @@ var TTSResource = class {
|
|
|
470
848
|
const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
471
849
|
const data = JSON.parse(messageData);
|
|
472
850
|
if (data.error) {
|
|
473
|
-
const error = this.parseError(data
|
|
851
|
+
const error = this.parseError(data);
|
|
474
852
|
callbacks.onError?.(error);
|
|
475
853
|
ws.close();
|
|
476
854
|
reject(error);
|
|
@@ -483,7 +861,6 @@ var TTSResource = class {
|
|
|
483
861
|
totalSamples: data.total_samples,
|
|
484
862
|
durationMs: data.dur_ms,
|
|
485
863
|
generationMs: data.gen_ms,
|
|
486
|
-
ttfaMs: data.ttfa_ms,
|
|
487
864
|
rtf: data.rtf,
|
|
488
865
|
error: data.error
|
|
489
866
|
};
|
|
@@ -519,25 +896,54 @@ var TTSResource = class {
|
|
|
519
896
|
console.error("Failed to parse WebSocket message:", e);
|
|
520
897
|
}
|
|
521
898
|
};
|
|
522
|
-
ws.onerror = () => {
|
|
523
|
-
const
|
|
899
|
+
ws.onerror = (event) => {
|
|
900
|
+
const underlying = event?.error ?? event;
|
|
901
|
+
const error = classifyWsHandshakeError(underlying) ?? new ConnectionError(
|
|
902
|
+
"KugelAudio WebSocket connection error. Check network connectivity."
|
|
903
|
+
);
|
|
524
904
|
callbacks.onError?.(error);
|
|
525
905
|
reject(error);
|
|
526
906
|
};
|
|
527
907
|
ws.onclose = (event) => {
|
|
528
908
|
callbacks.onClose?.();
|
|
529
|
-
if (event.code === 4001) {
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
reject(
|
|
909
|
+
if (event.code === 4001 || event.code === 4003 || event.code === 4029 || event.code === 4500) {
|
|
910
|
+
const error = classifyWsClose(event.code, event.reason);
|
|
911
|
+
callbacks.onError?.(error);
|
|
912
|
+
reject(error);
|
|
533
913
|
}
|
|
534
914
|
};
|
|
535
915
|
});
|
|
536
916
|
}
|
|
917
|
+
/**
|
|
918
|
+
* Start periodic keepalive pings on the pooled connection.
|
|
919
|
+
* Uses the ws package's ping() in Node.js; silently skips in browsers
|
|
920
|
+
* where WebSocket doesn't expose a ping method.
|
|
921
|
+
*/
|
|
922
|
+
startKeepalive(ws) {
|
|
923
|
+
this.stopKeepalive();
|
|
924
|
+
const intervalMs = this.client.keepalivePingInterval;
|
|
925
|
+
if (intervalMs == null || intervalMs <= 0) return;
|
|
926
|
+
this.keepaliveTimer = setInterval(() => {
|
|
927
|
+
if (this.wsConnection !== ws || ws.readyState !== WS_OPEN) {
|
|
928
|
+
this.stopKeepalive();
|
|
929
|
+
return;
|
|
930
|
+
}
|
|
931
|
+
if (typeof ws.ping === "function") {
|
|
932
|
+
ws.ping();
|
|
933
|
+
}
|
|
934
|
+
}, intervalMs);
|
|
935
|
+
}
|
|
936
|
+
stopKeepalive() {
|
|
937
|
+
if (this.keepaliveTimer !== null) {
|
|
938
|
+
clearInterval(this.keepaliveTimer);
|
|
939
|
+
this.keepaliveTimer = null;
|
|
940
|
+
}
|
|
941
|
+
}
|
|
537
942
|
/**
|
|
538
943
|
* Close the pooled WebSocket connection.
|
|
539
944
|
*/
|
|
540
945
|
close() {
|
|
946
|
+
this.stopKeepalive();
|
|
541
947
|
if (this.wsConnection) {
|
|
542
948
|
try {
|
|
543
949
|
this.wsConnection.close();
|
|
@@ -547,15 +953,39 @@ var TTSResource = class {
|
|
|
547
953
|
this.wsUrl = null;
|
|
548
954
|
}
|
|
549
955
|
}
|
|
550
|
-
parseError(
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
956
|
+
parseError(data) {
|
|
957
|
+
return classifyWsFrame(data);
|
|
958
|
+
}
|
|
959
|
+
/**
|
|
960
|
+
* Create a streaming session for LLM integration.
|
|
961
|
+
*
|
|
962
|
+
* The session connects to `/ws/tts/stream` and keeps a persistent
|
|
963
|
+
* connection across multiple {@link StreamingSession.send} calls.
|
|
964
|
+
* The server auto-chunks text at sentence boundaries — no client-side
|
|
965
|
+
* flushing required.
|
|
966
|
+
*
|
|
967
|
+
* @param config - Session configuration (voice, model, chunking strategy).
|
|
968
|
+
* @param callbacks - Callbacks for audio chunks and session lifecycle events.
|
|
969
|
+
* @returns A {@link StreamingSession} instance. Call `.connect()` before sending.
|
|
970
|
+
*
|
|
971
|
+
* @example
|
|
972
|
+
* ```typescript
|
|
973
|
+
* const session = client.tts.streamingSession(
|
|
974
|
+
* { voiceId: 123, autoMode: true, chunkLengthSchedule: [50, 100, 150, 250] },
|
|
975
|
+
* { onChunk: (chunk) => playAudio(chunk.audio) },
|
|
976
|
+
* );
|
|
977
|
+
*
|
|
978
|
+
* session.connect();
|
|
979
|
+
*
|
|
980
|
+
* for await (const token of llmStream) {
|
|
981
|
+
* session.send(token);
|
|
982
|
+
* }
|
|
983
|
+
*
|
|
984
|
+
* await session.close();
|
|
985
|
+
* ```
|
|
986
|
+
*/
|
|
987
|
+
streamingSession(config, callbacks) {
|
|
988
|
+
return new StreamingSession(this.client, config, callbacks);
|
|
559
989
|
}
|
|
560
990
|
/**
|
|
561
991
|
* Create a multi-context session for concurrent TTS streams.
|
|
@@ -575,7 +1005,7 @@ var TTSResource = class {
|
|
|
575
1005
|
* console.log(`Audio from ${chunk.contextId}`);
|
|
576
1006
|
* playAudio(chunk.audio);
|
|
577
1007
|
* },
|
|
578
|
-
*
|
|
1008
|
+
* onContextClosed: (contextId) => {
|
|
579
1009
|
* console.log(`${contextId} finished`);
|
|
580
1010
|
* },
|
|
581
1011
|
* });
|
|
@@ -614,6 +1044,11 @@ var MultiContextSession = class {
|
|
|
614
1044
|
}
|
|
615
1045
|
/**
|
|
616
1046
|
* Connect to the multi-context WebSocket endpoint.
|
|
1047
|
+
*
|
|
1048
|
+
* The returned promise resolves once the WebSocket is OPEN so callers can
|
|
1049
|
+
* ``await session.connect(callbacks)`` before invoking
|
|
1050
|
+
* {@link createContext} / {@link send}. Pre-open errors reject with the
|
|
1051
|
+
* typed error.
|
|
617
1052
|
*/
|
|
618
1053
|
connect(callbacks) {
|
|
619
1054
|
this.callbacks = callbacks;
|
|
@@ -628,9 +1063,8 @@ var MultiContextSession = class {
|
|
|
628
1063
|
}
|
|
629
1064
|
const url = `${wsUrl}/ws/tts/multi?${authParam}=${this.client.apiKey}`;
|
|
630
1065
|
this.ws = createWs(url);
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
this.ws.onmessage = (event) => {
|
|
1066
|
+
const ws = this.ws;
|
|
1067
|
+
ws.onmessage = (event) => {
|
|
634
1068
|
try {
|
|
635
1069
|
const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
636
1070
|
const data = JSON.parse(messageData);
|
|
@@ -661,9 +1095,6 @@ var MultiContextSession = class {
|
|
|
661
1095
|
};
|
|
662
1096
|
this.callbacks.onChunk?.(chunk);
|
|
663
1097
|
}
|
|
664
|
-
if (data.is_final) {
|
|
665
|
-
this.callbacks.onContextFinal?.(data.context_id);
|
|
666
|
-
}
|
|
667
1098
|
if (data.context_closed) {
|
|
668
1099
|
this.contexts.delete(data.context_id);
|
|
669
1100
|
this.callbacks.onContextClosed?.(data.context_id);
|
|
@@ -679,19 +1110,38 @@ var MultiContextSession = class {
|
|
|
679
1110
|
console.error("Failed to parse WebSocket message:", e);
|
|
680
1111
|
}
|
|
681
1112
|
};
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
1113
|
+
return new Promise((resolve, reject) => {
|
|
1114
|
+
let opened = false;
|
|
1115
|
+
ws.onopen = () => {
|
|
1116
|
+
opened = true;
|
|
1117
|
+
resolve();
|
|
1118
|
+
};
|
|
1119
|
+
ws.onerror = (event) => {
|
|
1120
|
+
const underlying = event?.error ?? event;
|
|
1121
|
+
const err = classifyWsHandshakeError(underlying) ?? new ConnectionError(
|
|
1122
|
+
"KugelAudio multi-context WebSocket connection error. Check network connectivity."
|
|
1123
|
+
);
|
|
1124
|
+
if (!opened) reject(err);
|
|
1125
|
+
this.callbacks.onError?.(err);
|
|
1126
|
+
};
|
|
1127
|
+
ws.onclose = (event) => {
|
|
1128
|
+
let typedErr = null;
|
|
1129
|
+
if (event.code === 4001 || event.code === 4003 || event.code === 4029 || event.code === 4500) {
|
|
1130
|
+
typedErr = classifyWsClose(event.code, event.reason);
|
|
1131
|
+
this.callbacks.onError?.(typedErr);
|
|
1132
|
+
}
|
|
1133
|
+
if (!opened) {
|
|
1134
|
+
reject(
|
|
1135
|
+
typedErr ?? new ConnectionError(
|
|
1136
|
+
`KugelAudio multi-context WebSocket closed before ready (code ${event.code}).`
|
|
1137
|
+
)
|
|
1138
|
+
);
|
|
1139
|
+
}
|
|
1140
|
+
this.ws = null;
|
|
1141
|
+
this.isStarted = false;
|
|
1142
|
+
this.contexts.clear();
|
|
1143
|
+
};
|
|
1144
|
+
});
|
|
695
1145
|
}
|
|
696
1146
|
/**
|
|
697
1147
|
* Create a new context with optional voice settings.
|
|
@@ -705,10 +1155,13 @@ var MultiContextSession = class {
|
|
|
705
1155
|
context_id: contextId
|
|
706
1156
|
};
|
|
707
1157
|
if (!this.isStarted) {
|
|
1158
|
+
warnIfNoLanguage(this.config.language, this.config.normalize);
|
|
708
1159
|
if (this.config.sampleRate) msg.sample_rate = this.config.sampleRate;
|
|
709
1160
|
if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
|
|
1161
|
+
if (this.config.temperature !== void 0) msg.temperature = this.config.temperature;
|
|
710
1162
|
if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
|
|
711
1163
|
if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
|
|
1164
|
+
if (this.config.language) msg.language = this.config.language;
|
|
712
1165
|
if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
|
|
713
1166
|
}
|
|
714
1167
|
const voiceId = options?.voiceId || this.config.defaultVoiceId;
|
|
@@ -795,18 +1248,274 @@ var MultiContextSession = class {
|
|
|
795
1248
|
return this.ws !== null && this.ws.readyState === WS_OPEN;
|
|
796
1249
|
}
|
|
797
1250
|
};
|
|
1251
|
+
var StreamingSession = class {
|
|
1252
|
+
constructor(client, config, callbacks) {
|
|
1253
|
+
this.ws = null;
|
|
1254
|
+
this.configSent = false;
|
|
1255
|
+
this.client = client;
|
|
1256
|
+
this.config = config;
|
|
1257
|
+
this.callbacks = callbacks;
|
|
1258
|
+
}
|
|
1259
|
+
/**
|
|
1260
|
+
* Open the WebSocket connection and authenticate.
|
|
1261
|
+
*
|
|
1262
|
+
* The returned promise resolves once the WebSocket is OPEN, so callers can
|
|
1263
|
+
* ``await session.connect()`` and then ``send()`` without racing the
|
|
1264
|
+
* handshake. Pre-open errors (network failure, 4001 unauthorized, …) reject
|
|
1265
|
+
* the promise with the typed error.
|
|
1266
|
+
*/
|
|
1267
|
+
connect() {
|
|
1268
|
+
const wsUrl = this.client.ttsUrl.replace("https://", "wss://").replace("http://", "ws://");
|
|
1269
|
+
let authParam;
|
|
1270
|
+
if (this.client.isToken) {
|
|
1271
|
+
authParam = "token";
|
|
1272
|
+
} else if (this.client.isMasterKey) {
|
|
1273
|
+
authParam = "master_key";
|
|
1274
|
+
} else {
|
|
1275
|
+
authParam = "api_key";
|
|
1276
|
+
}
|
|
1277
|
+
const url = `${wsUrl}/ws/tts/stream?${authParam}=${this.client.apiKey}`;
|
|
1278
|
+
this.ws = createWs(url);
|
|
1279
|
+
const ws = this.ws;
|
|
1280
|
+
ws.onmessage = (event) => {
|
|
1281
|
+
try {
|
|
1282
|
+
const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
1283
|
+
const data = JSON.parse(messageData);
|
|
1284
|
+
if (data.error) {
|
|
1285
|
+
this.callbacks.onError?.(new KugelAudioError(data.error));
|
|
1286
|
+
return;
|
|
1287
|
+
}
|
|
1288
|
+
if (data.audio) {
|
|
1289
|
+
const chunk = {
|
|
1290
|
+
audio: data.audio,
|
|
1291
|
+
encoding: data.enc || "pcm_s16le",
|
|
1292
|
+
index: data.idx,
|
|
1293
|
+
sampleRate: data.sr,
|
|
1294
|
+
samples: data.samples
|
|
1295
|
+
};
|
|
1296
|
+
this.callbacks.onChunk?.(chunk);
|
|
1297
|
+
}
|
|
1298
|
+
if (data.word_timestamps) {
|
|
1299
|
+
const timestamps = data.word_timestamps.map((w) => ({
|
|
1300
|
+
word: w.word,
|
|
1301
|
+
startMs: w.start_ms,
|
|
1302
|
+
endMs: w.end_ms,
|
|
1303
|
+
charStart: w.char_start,
|
|
1304
|
+
charEnd: w.char_end,
|
|
1305
|
+
score: w.score ?? 1
|
|
1306
|
+
}));
|
|
1307
|
+
this.callbacks.onWordTimestamps?.(timestamps);
|
|
1308
|
+
}
|
|
1309
|
+
if (data.chunk_complete) {
|
|
1310
|
+
this.callbacks.onChunkComplete?.(
|
|
1311
|
+
data.chunk_id ?? 0,
|
|
1312
|
+
data.audio_seconds ?? 0,
|
|
1313
|
+
data.gen_ms ?? 0
|
|
1314
|
+
);
|
|
1315
|
+
}
|
|
1316
|
+
if (data.generation_started) {
|
|
1317
|
+
this.callbacks.onGenerationStarted?.(data.chunk_id ?? 0, data.text ?? "");
|
|
1318
|
+
}
|
|
1319
|
+
if (data.session_closed) {
|
|
1320
|
+
this.callbacks.onSessionClosed?.(
|
|
1321
|
+
data.total_audio_seconds ?? 0,
|
|
1322
|
+
data.total_text_chunks ?? 0,
|
|
1323
|
+
data.total_audio_chunks ?? 0
|
|
1324
|
+
);
|
|
1325
|
+
}
|
|
1326
|
+
} catch (e) {
|
|
1327
|
+
console.error("[KugelAudio] Failed to parse streaming session message:", e);
|
|
1328
|
+
}
|
|
1329
|
+
};
|
|
1330
|
+
return new Promise((resolve, reject) => {
|
|
1331
|
+
let opened = false;
|
|
1332
|
+
ws.onopen = () => {
|
|
1333
|
+
opened = true;
|
|
1334
|
+
resolve();
|
|
1335
|
+
};
|
|
1336
|
+
ws.onerror = (event) => {
|
|
1337
|
+
const underlying = event?.error ?? event;
|
|
1338
|
+
const err = classifyWsHandshakeError(underlying) ?? new ConnectionError(
|
|
1339
|
+
"KugelAudio streaming WebSocket connection error. Check network connectivity."
|
|
1340
|
+
);
|
|
1341
|
+
if (!opened) reject(err);
|
|
1342
|
+
this.callbacks.onError?.(err);
|
|
1343
|
+
};
|
|
1344
|
+
ws.onclose = (event) => {
|
|
1345
|
+
let typedErr = null;
|
|
1346
|
+
if (event.code === 4001 || event.code === 4003 || event.code === 4029 || event.code === 4500) {
|
|
1347
|
+
typedErr = classifyWsClose(event.code, event.reason);
|
|
1348
|
+
this.callbacks.onError?.(typedErr);
|
|
1349
|
+
}
|
|
1350
|
+
if (!opened) {
|
|
1351
|
+
reject(
|
|
1352
|
+
typedErr ?? new ConnectionError(
|
|
1353
|
+
`KugelAudio streaming WebSocket closed before ready (code ${event.code}).`
|
|
1354
|
+
)
|
|
1355
|
+
);
|
|
1356
|
+
}
|
|
1357
|
+
this.ws = null;
|
|
1358
|
+
this.configSent = false;
|
|
1359
|
+
};
|
|
1360
|
+
});
|
|
1361
|
+
}
|
|
1362
|
+
/**
|
|
1363
|
+
* Send a text chunk to the server (e.g. one LLM output token).
|
|
1364
|
+
*
|
|
1365
|
+
* The server buffers text across multiple calls and starts generating at
|
|
1366
|
+
* natural sentence boundaries automatically — no need to call `flush`.
|
|
1367
|
+
*
|
|
1368
|
+
* @param text - Raw text or LLM token to append to the server buffer.
|
|
1369
|
+
* @param flush - Force immediate generation of whatever is buffered.
|
|
1370
|
+
* **Avoid calling this per-sentence from the client.** Doing so bypasses
|
|
1371
|
+
* the server's semantic chunking, incurs a fresh model prefill cost on
|
|
1372
|
+
* every flush, and makes latency *worse*, not better. Let the server
|
|
1373
|
+
* handle chunking via `chunkLengthSchedule` / `autoMode` instead.
|
|
1374
|
+
*/
|
|
1375
|
+
send(text, flush = false) {
|
|
1376
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) {
|
|
1377
|
+
throw new KugelAudioError("StreamingSession not connected. Call connect() first.");
|
|
1378
|
+
}
|
|
1379
|
+
const msg = { text, flush };
|
|
1380
|
+
if (!this.configSent) {
|
|
1381
|
+
if (this.config.voiceId !== void 0) msg.voice_id = this.config.voiceId;
|
|
1382
|
+
if (this.config.modelId !== void 0) msg.model_id = this.config.modelId;
|
|
1383
|
+
if (this.config.cfgScale !== void 0) msg.cfg_scale = this.config.cfgScale;
|
|
1384
|
+
if (this.config.temperature !== void 0) msg.temperature = this.config.temperature;
|
|
1385
|
+
if (this.config.maxNewTokens !== void 0) msg.max_new_tokens = this.config.maxNewTokens;
|
|
1386
|
+
if (this.config.sampleRate !== void 0) msg.sample_rate = this.config.sampleRate;
|
|
1387
|
+
if (this.config.flushTimeoutMs !== void 0) msg.flush_timeout_ms = this.config.flushTimeoutMs;
|
|
1388
|
+
if (this.config.maxBufferLength !== void 0) msg.max_buffer_length = this.config.maxBufferLength;
|
|
1389
|
+
if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
|
|
1390
|
+
if (this.config.language !== void 0) msg.language = this.config.language;
|
|
1391
|
+
if (this.config.wordTimestamps) msg.word_timestamps = true;
|
|
1392
|
+
if (this.config.autoMode !== void 0) msg.auto_mode = this.config.autoMode;
|
|
1393
|
+
if (this.config.chunkLengthSchedule?.length) msg.chunk_length_schedule = this.config.chunkLengthSchedule;
|
|
1394
|
+
if (this.config.speed !== void 0) msg.speed = this.config.speed;
|
|
1395
|
+
this.configSent = true;
|
|
1396
|
+
}
|
|
1397
|
+
this.ws.send(JSON.stringify(msg));
|
|
1398
|
+
}
|
|
1399
|
+
/**
|
|
1400
|
+
* End the current session but keep the WebSocket connection open.
|
|
1401
|
+
*
|
|
1402
|
+
* This allows starting a new session on the same connection, avoiding
|
|
1403
|
+
* the overhead of a new WebSocket handshake (~200-300ms). After calling
|
|
1404
|
+
* this, optionally call {@link updateConfig} to change voice/model settings,
|
|
1405
|
+
* then call {@link send} to start the next session.
|
|
1406
|
+
*
|
|
1407
|
+
* The returned promise resolves once the server confirms with a
|
|
1408
|
+
* `session_closed` message, or after a 15 s **quiet** timeout — i.e. 15 s
|
|
1409
|
+
* elapse without *any* server message arriving. The timer resets on every
|
|
1410
|
+
* incoming frame so a long final flush that streams audio for tens of
|
|
1411
|
+
* seconds is not truncated; only a genuinely silent server trips the fuse.
|
|
1412
|
+
*/
|
|
1413
|
+
endSession() {
|
|
1414
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) return Promise.resolve();
|
|
1415
|
+
const ws = this.ws;
|
|
1416
|
+
const QUIET_TIMEOUT_MS = 15e3;
|
|
1417
|
+
return new Promise((resolve) => {
|
|
1418
|
+
let settled = false;
|
|
1419
|
+
let timer;
|
|
1420
|
+
const prevMessage = ws.onmessage;
|
|
1421
|
+
const prevClose = ws.onclose;
|
|
1422
|
+
const done = () => {
|
|
1423
|
+
if (settled) return;
|
|
1424
|
+
settled = true;
|
|
1425
|
+
clearTimeout(timer);
|
|
1426
|
+
ws.onmessage = prevMessage;
|
|
1427
|
+
ws.onclose = prevClose;
|
|
1428
|
+
this.configSent = false;
|
|
1429
|
+
resolve();
|
|
1430
|
+
};
|
|
1431
|
+
const armQuietTimer = () => {
|
|
1432
|
+
clearTimeout(timer);
|
|
1433
|
+
timer = setTimeout(done, QUIET_TIMEOUT_MS);
|
|
1434
|
+
};
|
|
1435
|
+
armQuietTimer();
|
|
1436
|
+
ws.onmessage = (event) => {
|
|
1437
|
+
armQuietTimer();
|
|
1438
|
+
if (prevMessage) prevMessage.call(ws, event);
|
|
1439
|
+
try {
|
|
1440
|
+
const raw = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
1441
|
+
if (JSON.parse(raw).session_closed) done();
|
|
1442
|
+
} catch {
|
|
1443
|
+
}
|
|
1444
|
+
};
|
|
1445
|
+
ws.onclose = (event) => {
|
|
1446
|
+
this.ws = null;
|
|
1447
|
+
if (prevClose) prevClose.call(ws, event);
|
|
1448
|
+
done();
|
|
1449
|
+
};
|
|
1450
|
+
ws.send(JSON.stringify({ close: true }));
|
|
1451
|
+
});
|
|
1452
|
+
}
|
|
1453
|
+
/**
|
|
1454
|
+
* Update session configuration for the next session.
|
|
1455
|
+
*
|
|
1456
|
+
* Call this after {@link endSession} and before the next {@link send}
|
|
1457
|
+
* to change voice, model, language, or other settings.
|
|
1458
|
+
*/
|
|
1459
|
+
updateConfig(config) {
|
|
1460
|
+
Object.assign(this.config, config);
|
|
1461
|
+
this.configSent = false;
|
|
1462
|
+
}
|
|
1463
|
+
/**
|
|
1464
|
+
* Close the session and the WebSocket connection.
|
|
1465
|
+
*
|
|
1466
|
+
* For session reuse without closing the connection, use
|
|
1467
|
+
* {@link endSession} instead.
|
|
1468
|
+
*
|
|
1469
|
+
* The returned promise resolves once the server confirms the close with a
|
|
1470
|
+
* `session_closed` message, or after a 15 s **quiet** timeout (no traffic
|
|
1471
|
+
* from the server in that window). Audio frames from the server-side
|
|
1472
|
+
* final-flush of the still-buffered text are delivered to your callbacks
|
|
1473
|
+
* before this promise resolves, and each frame resets the quiet timer.
|
|
1474
|
+
*/
|
|
1475
|
+
async close() {
|
|
1476
|
+
await this.endSession();
|
|
1477
|
+
if (this.ws) {
|
|
1478
|
+
try {
|
|
1479
|
+
this.ws.close();
|
|
1480
|
+
} catch {
|
|
1481
|
+
}
|
|
1482
|
+
this.ws = null;
|
|
1483
|
+
}
|
|
1484
|
+
}
|
|
1485
|
+
/** Whether the underlying WebSocket is open. */
|
|
1486
|
+
get isConnected() {
|
|
1487
|
+
return this.ws !== null && this.ws.readyState === WS_OPEN;
|
|
1488
|
+
}
|
|
1489
|
+
};
|
|
798
1490
|
var KugelAudio = class _KugelAudio {
|
|
799
1491
|
constructor(options) {
|
|
800
1492
|
if (!options.apiKey) {
|
|
801
|
-
throw new
|
|
1493
|
+
throw new ValidationError(
|
|
1494
|
+
"KugelAudio API key is missing. Set the KUGELAUDIO_API_KEY environment variable or pass { apiKey: ... } to the client. Get a key at https://app.kugelaudio.com/settings/api-keys."
|
|
1495
|
+
);
|
|
802
1496
|
}
|
|
803
|
-
|
|
1497
|
+
const { cleanKey, detectedRegion } = parseApiKey(options.apiKey);
|
|
1498
|
+
this._apiKey = cleanKey;
|
|
804
1499
|
this._isMasterKey = options.isMasterKey || false;
|
|
805
1500
|
this._isToken = options.isToken || false;
|
|
806
1501
|
this._orgId = options.orgId;
|
|
807
|
-
|
|
1502
|
+
if (options.apiUrl) {
|
|
1503
|
+
this._apiUrl = options.apiUrl.replace(/\/$/, "");
|
|
1504
|
+
} else {
|
|
1505
|
+
const effectiveRegion = options.region || detectedRegion;
|
|
1506
|
+
if (!effectiveRegion) {
|
|
1507
|
+
this._apiUrl = DEFAULT_API_URL;
|
|
1508
|
+
} else if (!SUPPORTED_REGIONS.includes(effectiveRegion)) {
|
|
1509
|
+
throw new ValidationError(
|
|
1510
|
+
`Invalid region '${effectiveRegion}'. Must be one of: ${SUPPORTED_REGIONS.join(", ")}.`
|
|
1511
|
+
);
|
|
1512
|
+
} else {
|
|
1513
|
+
this._apiUrl = effectiveRegion === "eu" ? EU_API_URL : DEFAULT_API_URL;
|
|
1514
|
+
}
|
|
1515
|
+
}
|
|
808
1516
|
this._ttsUrl = (options.ttsUrl || this._apiUrl).replace(/\/$/, "");
|
|
809
1517
|
this._timeout = options.timeout || 6e4;
|
|
1518
|
+
this._keepalivePingInterval = options.keepalivePingInterval !== void 0 ? options.keepalivePingInterval : 2e4;
|
|
810
1519
|
this.models = new ModelsResource(this);
|
|
811
1520
|
this.voices = new VoicesResource(this);
|
|
812
1521
|
this.tts = new TTSResource(this);
|
|
@@ -852,6 +1561,10 @@ var KugelAudio = class _KugelAudio {
|
|
|
852
1561
|
get ttsUrl() {
|
|
853
1562
|
return this._ttsUrl;
|
|
854
1563
|
}
|
|
1564
|
+
/** Get keepalive ping interval in milliseconds, or null if disabled. */
|
|
1565
|
+
get keepalivePingInterval() {
|
|
1566
|
+
return this._keepalivePingInterval;
|
|
1567
|
+
}
|
|
855
1568
|
/**
|
|
856
1569
|
* Close the client and release resources.
|
|
857
1570
|
* This closes any pooled WebSocket connections.
|
|
@@ -906,25 +1619,49 @@ var KugelAudio = class _KugelAudio {
|
|
|
906
1619
|
signal: controller.signal
|
|
907
1620
|
});
|
|
908
1621
|
clearTimeout(timeoutId);
|
|
909
|
-
if (response.
|
|
910
|
-
|
|
1622
|
+
if (!response.ok) {
|
|
1623
|
+
const text = await response.text();
|
|
1624
|
+
throw classifyHttpError(response.status, text, response.headers);
|
|
911
1625
|
}
|
|
912
|
-
|
|
913
|
-
|
|
1626
|
+
return await response.json();
|
|
1627
|
+
} catch (error) {
|
|
1628
|
+
clearTimeout(timeoutId);
|
|
1629
|
+
if (error instanceof KugelAudioError) {
|
|
1630
|
+
throw error;
|
|
914
1631
|
}
|
|
915
|
-
if (
|
|
916
|
-
throw new
|
|
1632
|
+
if (error.name === "AbortError") {
|
|
1633
|
+
throw new ConnectionError(
|
|
1634
|
+
`Request to ${method} ${path} timed out after ${this._timeout}ms.`
|
|
1635
|
+
);
|
|
917
1636
|
}
|
|
1637
|
+
throw new ConnectionError(
|
|
1638
|
+
`Could not reach KugelAudio at ${url}: ${error.message}. Check network connectivity.`
|
|
1639
|
+
);
|
|
1640
|
+
}
|
|
1641
|
+
}
|
|
1642
|
+
/**
|
|
1643
|
+
* Make a multipart/form-data request (for file uploads).
|
|
1644
|
+
* @internal Used by VoicesResource for reference file uploads.
|
|
1645
|
+
*/
|
|
1646
|
+
async requestMultipart(method, path, formData) {
|
|
1647
|
+
const url = `${this._apiUrl}${path}`;
|
|
1648
|
+
const headers = {
|
|
1649
|
+
"X-API-Key": this._apiKey,
|
|
1650
|
+
"Authorization": `Bearer ${this._apiKey}`
|
|
1651
|
+
};
|
|
1652
|
+
const controller = new AbortController();
|
|
1653
|
+
const timeoutId = setTimeout(() => controller.abort(), this._timeout);
|
|
1654
|
+
try {
|
|
1655
|
+
const response = await fetch(url, {
|
|
1656
|
+
method,
|
|
1657
|
+
headers,
|
|
1658
|
+
body: formData,
|
|
1659
|
+
signal: controller.signal
|
|
1660
|
+
});
|
|
1661
|
+
clearTimeout(timeoutId);
|
|
918
1662
|
if (!response.ok) {
|
|
919
1663
|
const text = await response.text();
|
|
920
|
-
|
|
921
|
-
try {
|
|
922
|
-
const json = JSON.parse(text);
|
|
923
|
-
message = json.detail || json.error || message;
|
|
924
|
-
} catch {
|
|
925
|
-
message = text || message;
|
|
926
|
-
}
|
|
927
|
-
throw new KugelAudioError(message, response.status);
|
|
1664
|
+
throw classifyHttpError(response.status, text, response.headers);
|
|
928
1665
|
}
|
|
929
1666
|
return await response.json();
|
|
930
1667
|
} catch (error) {
|
|
@@ -933,21 +1670,31 @@ var KugelAudio = class _KugelAudio {
|
|
|
933
1670
|
throw error;
|
|
934
1671
|
}
|
|
935
1672
|
if (error.name === "AbortError") {
|
|
936
|
-
throw new
|
|
1673
|
+
throw new ConnectionError(
|
|
1674
|
+
`Request to ${method} ${path} timed out after ${this._timeout}ms.`
|
|
1675
|
+
);
|
|
937
1676
|
}
|
|
938
|
-
throw new
|
|
1677
|
+
throw new ConnectionError(
|
|
1678
|
+
`Could not reach KugelAudio at ${url}: ${error.message}. Check network connectivity.`
|
|
1679
|
+
);
|
|
939
1680
|
}
|
|
940
1681
|
}
|
|
941
1682
|
};
|
|
942
1683
|
export {
|
|
943
1684
|
AuthenticationError,
|
|
944
1685
|
ConnectionError,
|
|
1686
|
+
ErrorCodes,
|
|
945
1687
|
InsufficientCreditsError,
|
|
946
1688
|
KugelAudio,
|
|
947
1689
|
KugelAudioError,
|
|
948
1690
|
RateLimitError,
|
|
949
1691
|
ValidationError,
|
|
1692
|
+
WsCloseCodes,
|
|
950
1693
|
base64ToArrayBuffer,
|
|
1694
|
+
classifyHttpError,
|
|
1695
|
+
classifyWsClose,
|
|
1696
|
+
classifyWsFrame,
|
|
1697
|
+
classifyWsHandshakeError,
|
|
951
1698
|
createWavBlob,
|
|
952
1699
|
createWavFile,
|
|
953
1700
|
decodePCM16
|