kugelaudio 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +37 -13
- package/dist/index.d.mts +518 -26
- package/dist/index.d.ts +518 -26
- package/dist/index.js +864 -112
- package/dist/index.mjs +858 -112
- package/package.json +9 -8
- package/src/client.test.ts +548 -0
- package/src/client.ts +885 -103
- package/src/errors.ts +266 -18
- package/src/index.ts +17 -2
- package/src/types.ts +215 -8
- package/src/websocket.ts +38 -18
package/dist/index.mjs
CHANGED
|
@@ -6,49 +6,192 @@ var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require
|
|
|
6
6
|
});
|
|
7
7
|
|
|
8
8
|
// src/errors.ts
|
|
9
|
+
var ErrorCodes = {
|
|
10
|
+
UNAUTHORIZED: "UNAUTHORIZED",
|
|
11
|
+
RATE_LIMITED: "RATE_LIMITED",
|
|
12
|
+
INSUFFICIENT_CREDITS: "INSUFFICIENT_CREDITS",
|
|
13
|
+
MODEL_UNAVAILABLE: "MODEL_UNAVAILABLE",
|
|
14
|
+
EMPTY_AUDIO: "EMPTY_AUDIO",
|
|
15
|
+
VALIDATION: "VALIDATION_ERROR",
|
|
16
|
+
INTERNAL: "INTERNAL_ERROR",
|
|
17
|
+
NOT_FOUND: "NOT_FOUND"
|
|
18
|
+
};
|
|
19
|
+
var WsCloseCodes = {
|
|
20
|
+
UNAUTHORIZED: 4001,
|
|
21
|
+
INSUFFICIENT_CREDITS: 4003,
|
|
22
|
+
RATE_LIMITED: 4029,
|
|
23
|
+
MODEL_UNAVAILABLE: 4500
|
|
24
|
+
};
|
|
25
|
+
var API_KEYS_URL = "https://app.kugelaudio.com/settings/api-keys";
|
|
26
|
+
var BILLING_URL = "https://app.kugelaudio.com/billing";
|
|
9
27
|
var KugelAudioError = class _KugelAudioError extends Error {
|
|
10
|
-
constructor(message,
|
|
11
|
-
super(message);
|
|
28
|
+
constructor(message, options = {}) {
|
|
29
|
+
super(options.requestId ? `${message} (request_id: ${options.requestId})` : message);
|
|
12
30
|
this.name = "KugelAudioError";
|
|
13
|
-
this.statusCode = statusCode;
|
|
31
|
+
this.statusCode = options.statusCode;
|
|
32
|
+
this.errorCode = options.errorCode;
|
|
33
|
+
this.requestId = options.requestId;
|
|
34
|
+
this.retryAfter = options.retryAfter;
|
|
14
35
|
Object.setPrototypeOf(this, _KugelAudioError.prototype);
|
|
15
36
|
}
|
|
16
37
|
};
|
|
17
38
|
var AuthenticationError = class _AuthenticationError extends KugelAudioError {
|
|
18
|
-
constructor(message =
|
|
19
|
-
super(
|
|
39
|
+
constructor(message, options = {}) {
|
|
40
|
+
super(
|
|
41
|
+
message ?? `KugelAudio rejected the API key. Check it is current at ${API_KEYS_URL}.`,
|
|
42
|
+
{ statusCode: 401, errorCode: ErrorCodes.UNAUTHORIZED, ...options }
|
|
43
|
+
);
|
|
20
44
|
this.name = "AuthenticationError";
|
|
21
45
|
Object.setPrototypeOf(this, _AuthenticationError.prototype);
|
|
22
46
|
}
|
|
23
47
|
};
|
|
24
48
|
var RateLimitError = class _RateLimitError extends KugelAudioError {
|
|
25
|
-
constructor(message =
|
|
26
|
-
|
|
49
|
+
constructor(message, options = {}) {
|
|
50
|
+
const msg = message ?? (options.retryAfter ? `KugelAudio rate limit hit; retry after ${options.retryAfter}s.` : "KugelAudio rate limit hit; retry shortly.");
|
|
51
|
+
super(msg, { statusCode: 429, errorCode: ErrorCodes.RATE_LIMITED, ...options });
|
|
27
52
|
this.name = "RateLimitError";
|
|
28
53
|
Object.setPrototypeOf(this, _RateLimitError.prototype);
|
|
29
54
|
}
|
|
30
55
|
};
|
|
31
56
|
var InsufficientCreditsError = class _InsufficientCreditsError extends KugelAudioError {
|
|
32
|
-
constructor(message =
|
|
33
|
-
super(
|
|
57
|
+
constructor(message, options = {}) {
|
|
58
|
+
super(
|
|
59
|
+
message ?? `Your KugelAudio account is out of credits. Top up at ${BILLING_URL}.`,
|
|
60
|
+
{ statusCode: 402, errorCode: ErrorCodes.INSUFFICIENT_CREDITS, ...options }
|
|
61
|
+
);
|
|
34
62
|
this.name = "InsufficientCreditsError";
|
|
35
63
|
Object.setPrototypeOf(this, _InsufficientCreditsError.prototype);
|
|
36
64
|
}
|
|
37
65
|
};
|
|
38
66
|
var ValidationError = class _ValidationError extends KugelAudioError {
|
|
39
|
-
constructor(message) {
|
|
40
|
-
super(message, 400);
|
|
67
|
+
constructor(message, options = {}) {
|
|
68
|
+
super(message, { statusCode: 400, errorCode: ErrorCodes.VALIDATION, ...options });
|
|
41
69
|
this.name = "ValidationError";
|
|
42
70
|
Object.setPrototypeOf(this, _ValidationError.prototype);
|
|
43
71
|
}
|
|
44
72
|
};
|
|
45
73
|
var ConnectionError = class _ConnectionError extends KugelAudioError {
|
|
46
|
-
constructor(message =
|
|
47
|
-
super(message, 503);
|
|
74
|
+
constructor(message, options = {}) {
|
|
75
|
+
super(message, { statusCode: 503, ...options });
|
|
48
76
|
this.name = "ConnectionError";
|
|
49
77
|
Object.setPrototypeOf(this, _ConnectionError.prototype);
|
|
50
78
|
}
|
|
51
79
|
};
|
|
80
|
+
function build(status, errorCode, message, opts = {}) {
|
|
81
|
+
const common = { ...opts };
|
|
82
|
+
if (status !== void 0) common.statusCode = status;
|
|
83
|
+
if (errorCode !== void 0) common.errorCode = errorCode;
|
|
84
|
+
if (errorCode === ErrorCodes.UNAUTHORIZED || status === 401) {
|
|
85
|
+
return new AuthenticationError(message || void 0, common);
|
|
86
|
+
}
|
|
87
|
+
if (errorCode === ErrorCodes.INSUFFICIENT_CREDITS || status === 402) {
|
|
88
|
+
return new InsufficientCreditsError(message || void 0, common);
|
|
89
|
+
}
|
|
90
|
+
if (errorCode === ErrorCodes.RATE_LIMITED || status === 429) {
|
|
91
|
+
return new RateLimitError(message || void 0, common);
|
|
92
|
+
}
|
|
93
|
+
if (errorCode === ErrorCodes.VALIDATION || status === 400) {
|
|
94
|
+
return new ValidationError(message || "Request validation failed.", common);
|
|
95
|
+
}
|
|
96
|
+
if (errorCode === ErrorCodes.MODEL_UNAVAILABLE || status === 503) {
|
|
97
|
+
const detail = message || "service temporarily unavailable";
|
|
98
|
+
return new ConnectionError(
|
|
99
|
+
`KugelAudio is temporarily unavailable: ${detail}. Retry shortly.`,
|
|
100
|
+
common
|
|
101
|
+
);
|
|
102
|
+
}
|
|
103
|
+
return new KugelAudioError(message || `HTTP ${status}`, common);
|
|
104
|
+
}
|
|
105
|
+
function readHeader(headers, name) {
|
|
106
|
+
if (headers && typeof headers.get === "function") {
|
|
107
|
+
return headers.get(name) ?? void 0;
|
|
108
|
+
}
|
|
109
|
+
const rec = headers;
|
|
110
|
+
return rec[name] ?? rec[name.toLowerCase()] ?? void 0;
|
|
111
|
+
}
|
|
112
|
+
function classifyHttpError(status, bodyText, headers) {
|
|
113
|
+
let errorCode;
|
|
114
|
+
let message = "";
|
|
115
|
+
let retryAfter;
|
|
116
|
+
if (bodyText) {
|
|
117
|
+
try {
|
|
118
|
+
const body = JSON.parse(bodyText);
|
|
119
|
+
if (body && typeof body === "object") {
|
|
120
|
+
errorCode = typeof body.error_code === "string" ? body.error_code : void 0;
|
|
121
|
+
const msg = body.error ?? body.detail;
|
|
122
|
+
if (Array.isArray(msg)) {
|
|
123
|
+
message = msg.map((m) => String(m)).join("; ");
|
|
124
|
+
} else if (typeof msg === "string") {
|
|
125
|
+
message = msg;
|
|
126
|
+
}
|
|
127
|
+
if (typeof body.retry_after === "number") {
|
|
128
|
+
retryAfter = body.retry_after;
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
} catch {
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
if (retryAfter === void 0) {
|
|
135
|
+
const header = readHeader(headers, "Retry-After") ?? readHeader(headers, "retry-after");
|
|
136
|
+
if (header) {
|
|
137
|
+
const n = Number(header);
|
|
138
|
+
if (Number.isFinite(n)) retryAfter = n;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
const requestId = readHeader(headers, "x-request-id") ?? readHeader(headers, "X-Request-Id");
|
|
142
|
+
if (!message) {
|
|
143
|
+
message = (bodyText || "").trim();
|
|
144
|
+
}
|
|
145
|
+
return build(status, errorCode, message, { requestId, retryAfter });
|
|
146
|
+
}
|
|
147
|
+
function classifyWsFrame(data) {
|
|
148
|
+
const errorCode = data.error_code;
|
|
149
|
+
const message = data.error ?? "Server reported an error.";
|
|
150
|
+
const retryAfter = typeof data.retry_after === "number" ? data.retry_after : void 0;
|
|
151
|
+
return build(void 0, errorCode, message, { retryAfter });
|
|
152
|
+
}
|
|
153
|
+
function classifyWsClose(code, reason) {
|
|
154
|
+
const reasonTxt = (reason ?? "").trim();
|
|
155
|
+
if (code === WsCloseCodes.UNAUTHORIZED) {
|
|
156
|
+
let msg = `KugelAudio rejected the API key. Check it is current at ${API_KEYS_URL}.`;
|
|
157
|
+
if (reasonTxt) msg = `${msg} (${reasonTxt})`;
|
|
158
|
+
return new AuthenticationError(msg);
|
|
159
|
+
}
|
|
160
|
+
if (code === WsCloseCodes.INSUFFICIENT_CREDITS) {
|
|
161
|
+
return new InsufficientCreditsError();
|
|
162
|
+
}
|
|
163
|
+
if (code === WsCloseCodes.RATE_LIMITED) {
|
|
164
|
+
return new RateLimitError();
|
|
165
|
+
}
|
|
166
|
+
if (code === WsCloseCodes.MODEL_UNAVAILABLE) {
|
|
167
|
+
const suffix = reasonTxt ? ` (${reasonTxt})` : "";
|
|
168
|
+
return new ConnectionError(
|
|
169
|
+
`KugelAudio model is temporarily unavailable. Retry shortly.${suffix}`
|
|
170
|
+
);
|
|
171
|
+
}
|
|
172
|
+
const detail = reasonTxt || "no reason given";
|
|
173
|
+
const codeStr = code !== void 0 ? ` (code ${code})` : "";
|
|
174
|
+
return new ConnectionError(
|
|
175
|
+
`KugelAudio WebSocket closed by server: ${detail}${codeStr}.`
|
|
176
|
+
);
|
|
177
|
+
}
|
|
178
|
+
function classifyWsHandshakeError(err) {
|
|
179
|
+
if (!err || typeof err !== "object") return null;
|
|
180
|
+
const e = err;
|
|
181
|
+
let status;
|
|
182
|
+
if (typeof e.statusCode === "number") {
|
|
183
|
+
status = e.statusCode;
|
|
184
|
+
}
|
|
185
|
+
if (status === void 0 && typeof e.message === "string") {
|
|
186
|
+
const m = e.message.match(/Unexpected server response:\s*(\d{3})/i);
|
|
187
|
+
if (m) status = Number(m[1]);
|
|
188
|
+
}
|
|
189
|
+
if (status === void 0) return null;
|
|
190
|
+
if (status === 403) {
|
|
191
|
+
return new AuthenticationError();
|
|
192
|
+
}
|
|
193
|
+
return build(status, void 0, typeof e.message === "string" ? e.message : "");
|
|
194
|
+
}
|
|
52
195
|
|
|
53
196
|
// src/utils.ts
|
|
54
197
|
function base64ToArrayBuffer(base64) {
|
|
@@ -108,33 +251,61 @@ function createWavBlob(audio, sampleRate) {
|
|
|
108
251
|
|
|
109
252
|
// src/websocket.ts
|
|
110
253
|
var _cachedWs = null;
|
|
254
|
+
function isNodeJs() {
|
|
255
|
+
return typeof process !== "undefined" && !!process.versions && typeof process.versions.node === "string";
|
|
256
|
+
}
|
|
111
257
|
function getWebSocket() {
|
|
112
258
|
if (_cachedWs) return _cachedWs;
|
|
259
|
+
if (isNodeJs()) {
|
|
260
|
+
try {
|
|
261
|
+
const _require = typeof __require !== "undefined" ? __require : Function('return typeof require !== "undefined" ? require : undefined')();
|
|
262
|
+
if (_require) {
|
|
263
|
+
const ws = _require("ws");
|
|
264
|
+
_cachedWs = ws.default || ws;
|
|
265
|
+
return _cachedWs;
|
|
266
|
+
}
|
|
267
|
+
} catch {
|
|
268
|
+
}
|
|
269
|
+
}
|
|
113
270
|
if (typeof globalThis !== "undefined" && typeof globalThis.WebSocket !== "undefined") {
|
|
114
271
|
_cachedWs = globalThis.WebSocket;
|
|
115
272
|
return _cachedWs;
|
|
116
273
|
}
|
|
117
|
-
try {
|
|
118
|
-
const _require = typeof __require !== "undefined" ? __require : Function('return typeof require !== "undefined" ? require : undefined')();
|
|
119
|
-
if (_require) {
|
|
120
|
-
const ws = _require("ws");
|
|
121
|
-
_cachedWs = ws.default || ws;
|
|
122
|
-
return _cachedWs;
|
|
123
|
-
}
|
|
124
|
-
} catch {
|
|
125
|
-
}
|
|
126
274
|
throw new Error(
|
|
127
275
|
'WebSocket not available. In Node.js, install the "ws" package: npm install ws'
|
|
128
276
|
);
|
|
129
277
|
}
|
|
130
278
|
|
|
131
279
|
// src/client.ts
|
|
132
|
-
var
|
|
280
|
+
var REGION_URLS = {
|
|
281
|
+
eu: "https://api.kugelaudio.com",
|
|
282
|
+
us: "https://us-api.kugelaudio.com",
|
|
283
|
+
global: "https://global-api.kugelaudio.com"
|
|
284
|
+
};
|
|
285
|
+
var REGION_PREFIXES = ["eu-", "us-", "global-"];
|
|
286
|
+
function parseApiKey(apiKey) {
|
|
287
|
+
for (const prefix of REGION_PREFIXES) {
|
|
288
|
+
if (apiKey.startsWith(prefix)) {
|
|
289
|
+
return { cleanKey: apiKey.slice(prefix.length), detectedRegion: prefix.slice(0, -1) };
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
return { cleanKey: apiKey };
|
|
293
|
+
}
|
|
133
294
|
function createWs(url) {
|
|
134
295
|
const WS = getWebSocket();
|
|
135
296
|
return new WS(url);
|
|
136
297
|
}
|
|
137
298
|
var WS_OPEN = 1;
|
|
299
|
+
var _languageWarningLogged = false;
|
|
300
|
+
function warnIfNoLanguage(language, normalize) {
|
|
301
|
+
const normEnabled = normalize === void 0 || normalize;
|
|
302
|
+
if (!language && normEnabled && !_languageWarningLogged) {
|
|
303
|
+
_languageWarningLogged = true;
|
|
304
|
+
console.warn(
|
|
305
|
+
"[KugelAudio] No 'language' set with normalization enabled \u2014 the server will auto-detect the language, adding ~60-150ms to TTFA. Set language (e.g., language: 'en') for optimal latency."
|
|
306
|
+
);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
138
309
|
var ModelsResource = class {
|
|
139
310
|
constructor(client) {
|
|
140
311
|
this.client = client;
|
|
@@ -168,42 +339,177 @@ var VoicesResource = class {
|
|
|
168
339
|
params.set("include_public", String(options.includePublic));
|
|
169
340
|
}
|
|
170
341
|
if (options?.limit) params.set("limit", String(options.limit));
|
|
342
|
+
if (options?.offset) params.set("offset", String(options.offset));
|
|
171
343
|
const query = params.toString();
|
|
172
344
|
const path = query ? `/v1/voices?${query}` : "/v1/voices";
|
|
173
345
|
const response = await this.client.request("GET", path);
|
|
174
|
-
return
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
346
|
+
return {
|
|
347
|
+
voices: response.voices.map((v) => ({
|
|
348
|
+
id: v.id,
|
|
349
|
+
name: v.name,
|
|
350
|
+
description: v.description,
|
|
351
|
+
category: v.category,
|
|
352
|
+
sex: v.sex,
|
|
353
|
+
age: v.age,
|
|
354
|
+
supportedLanguages: v.supported_languages || [],
|
|
355
|
+
sampleText: v.sample_text,
|
|
356
|
+
avatarUrl: v.avatar_url,
|
|
357
|
+
sampleUrl: v.sample_url,
|
|
358
|
+
isPublic: v.is_public || false,
|
|
359
|
+
verified: v.verified || false
|
|
360
|
+
})),
|
|
361
|
+
total: response.total,
|
|
362
|
+
limit: response.limit,
|
|
363
|
+
offset: response.offset
|
|
364
|
+
};
|
|
188
365
|
}
|
|
189
366
|
/**
|
|
190
367
|
* Get a specific voice by ID.
|
|
191
368
|
*/
|
|
192
369
|
async get(voiceId) {
|
|
193
370
|
const v = await this.client.request("GET", `/v1/voices/${voiceId}`);
|
|
371
|
+
return this.mapVoiceDetail(v);
|
|
372
|
+
}
|
|
373
|
+
/**
|
|
374
|
+
* Create a new voice.
|
|
375
|
+
*/
|
|
376
|
+
async create(options) {
|
|
377
|
+
const metadata = {
|
|
378
|
+
name: options.name,
|
|
379
|
+
sex: options.sex,
|
|
380
|
+
description: options.description ?? "",
|
|
381
|
+
category: options.category ?? "conversational",
|
|
382
|
+
age: options.age ?? "middle_age",
|
|
383
|
+
quality: options.quality ?? "mid",
|
|
384
|
+
supported_languages: options.supportedLanguages ?? ["en"],
|
|
385
|
+
is_public: options.isPublic ?? false,
|
|
386
|
+
sample_text: options.sampleText ?? ""
|
|
387
|
+
};
|
|
388
|
+
const formData = new FormData();
|
|
389
|
+
formData.append(
|
|
390
|
+
"metadata",
|
|
391
|
+
new Blob([JSON.stringify(metadata)], { type: "application/json" })
|
|
392
|
+
);
|
|
393
|
+
if (options.referenceFiles) {
|
|
394
|
+
for (const file of options.referenceFiles) {
|
|
395
|
+
formData.append("files", file);
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
const v = await this.client.requestMultipart("POST", "/v1/voices", formData);
|
|
399
|
+
return this.mapVoiceDetail(v);
|
|
400
|
+
}
|
|
401
|
+
/**
|
|
402
|
+
* Update an existing voice. Only provided fields are updated.
|
|
403
|
+
*/
|
|
404
|
+
async update(voiceId, options) {
|
|
405
|
+
const payload = {};
|
|
406
|
+
if (options.name !== void 0) payload.name = options.name;
|
|
407
|
+
if (options.description !== void 0) payload.description = options.description;
|
|
408
|
+
if (options.category !== void 0) payload.category = options.category;
|
|
409
|
+
if (options.age !== void 0) payload.age = options.age;
|
|
410
|
+
if (options.sex !== void 0) payload.sex = options.sex;
|
|
411
|
+
if (options.quality !== void 0) payload.quality = options.quality;
|
|
412
|
+
if (options.supportedLanguages !== void 0) payload.supported_languages = options.supportedLanguages;
|
|
413
|
+
if (options.isPublic !== void 0) payload.is_public = options.isPublic;
|
|
414
|
+
if (options.sampleText !== void 0) payload.sample_text = options.sampleText;
|
|
415
|
+
const v = await this.client.request("PATCH", `/v1/voices/${voiceId}`, payload);
|
|
416
|
+
return this.mapVoiceDetail(v);
|
|
417
|
+
}
|
|
418
|
+
/**
|
|
419
|
+
* Delete a voice.
|
|
420
|
+
*/
|
|
421
|
+
async delete(voiceId) {
|
|
422
|
+
await this.client.request("DELETE", `/v1/voices/${voiceId}`);
|
|
423
|
+
}
|
|
424
|
+
// -- Reference management --
|
|
425
|
+
/**
|
|
426
|
+
* List reference audio files for a voice.
|
|
427
|
+
*/
|
|
428
|
+
async listReferences(voiceId) {
|
|
429
|
+
const response = await this.client.request(
|
|
430
|
+
"GET",
|
|
431
|
+
`/v1/voices/${voiceId}/references`
|
|
432
|
+
);
|
|
433
|
+
return response.references.map((r) => this.mapVoiceReference(r));
|
|
434
|
+
}
|
|
435
|
+
/**
|
|
436
|
+
* Upload a reference audio file to a voice.
|
|
437
|
+
*
|
|
438
|
+
* @param voiceId - Voice ID
|
|
439
|
+
* @param file - Audio file (File in browser, Blob in Node.js)
|
|
440
|
+
* @param referenceText - Optional transcript of the reference audio
|
|
441
|
+
*/
|
|
442
|
+
async addReference(voiceId, file, referenceText) {
|
|
443
|
+
const formData = new FormData();
|
|
444
|
+
formData.append("file", file);
|
|
445
|
+
if (referenceText) {
|
|
446
|
+
formData.append("reference_text", referenceText);
|
|
447
|
+
}
|
|
448
|
+
const r = await this.client.requestMultipart(
|
|
449
|
+
"POST",
|
|
450
|
+
`/v1/voices/${voiceId}/references`,
|
|
451
|
+
formData
|
|
452
|
+
);
|
|
453
|
+
return this.mapVoiceReference(r);
|
|
454
|
+
}
|
|
455
|
+
/**
|
|
456
|
+
* Delete a reference audio file from a voice.
|
|
457
|
+
*/
|
|
458
|
+
async deleteReference(voiceId, referenceId) {
|
|
459
|
+
await this.client.request(
|
|
460
|
+
"DELETE",
|
|
461
|
+
`/v1/voices/${voiceId}/references/${referenceId}`
|
|
462
|
+
);
|
|
463
|
+
}
|
|
464
|
+
// -- Publishing --
|
|
465
|
+
/**
|
|
466
|
+
* Request publication of a voice. Sets it as public and marks it
|
|
467
|
+
* as pending verification by an admin.
|
|
468
|
+
*/
|
|
469
|
+
async publish(voiceId) {
|
|
470
|
+
const v = await this.client.request("POST", `/v1/voices/${voiceId}/publish`);
|
|
471
|
+
return this.mapVoiceDetail(v);
|
|
472
|
+
}
|
|
473
|
+
// -- Sample generation --
|
|
474
|
+
/**
|
|
475
|
+
* Trigger sample audio generation for a voice.
|
|
476
|
+
*/
|
|
477
|
+
async generateSample(voiceId) {
|
|
478
|
+
const v = await this.client.request(
|
|
479
|
+
"POST",
|
|
480
|
+
`/v1/voices/${voiceId}/generate-sample`
|
|
481
|
+
);
|
|
482
|
+
return this.mapVoiceDetail(v);
|
|
483
|
+
}
|
|
484
|
+
// -- Helpers --
|
|
485
|
+
mapVoiceDetail(v) {
|
|
194
486
|
return {
|
|
195
487
|
id: v.id,
|
|
196
488
|
name: v.name,
|
|
197
|
-
description: v.description,
|
|
198
|
-
|
|
199
|
-
|
|
489
|
+
description: v.description ?? "",
|
|
490
|
+
generativeVoiceDescription: v.generative_voice_description ?? "",
|
|
491
|
+
supportedLanguages: v.supported_languages ?? [],
|
|
492
|
+
category: v.category ?? "cloned",
|
|
200
493
|
age: v.age,
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
494
|
+
sex: v.sex,
|
|
495
|
+
quality: v.quality ?? "mid",
|
|
496
|
+
isPublic: v.is_public ?? false,
|
|
497
|
+
verified: v.verified ?? false,
|
|
498
|
+
pendingVerification: v.pending_verification ?? false,
|
|
204
499
|
sampleUrl: v.sample_url,
|
|
205
|
-
|
|
206
|
-
|
|
500
|
+
avatarUrl: v.avatar_url,
|
|
501
|
+
sampleText: v.sample_text ?? ""
|
|
502
|
+
};
|
|
503
|
+
}
|
|
504
|
+
mapVoiceReference(r) {
|
|
505
|
+
return {
|
|
506
|
+
id: r.id,
|
|
507
|
+
voiceId: r.voice_id,
|
|
508
|
+
name: r.name ?? "",
|
|
509
|
+
referenceText: r.reference_text ?? "",
|
|
510
|
+
s3Path: r.s3_path ?? "",
|
|
511
|
+
audioUrl: r.audio_url,
|
|
512
|
+
isGenerated: r.is_generated ?? false
|
|
207
513
|
};
|
|
208
514
|
}
|
|
209
515
|
};
|
|
@@ -215,6 +521,7 @@ var TTSResource = class {
|
|
|
215
521
|
this.wsUrl = null;
|
|
216
522
|
this.pendingRequests = /* @__PURE__ */ new Map();
|
|
217
523
|
this.requestCounter = 0;
|
|
524
|
+
this.keepaliveTimer = null;
|
|
218
525
|
}
|
|
219
526
|
/**
|
|
220
527
|
* Pre-establish WebSocket connection for faster first request.
|
|
@@ -278,6 +585,63 @@ var TTSResource = class {
|
|
|
278
585
|
wordTimestamps: allTimestamps
|
|
279
586
|
};
|
|
280
587
|
}
|
|
588
|
+
/**
|
|
589
|
+
* Stream audio and return a Node.js Readable stream of raw PCM16 binary data.
|
|
590
|
+
*
|
|
591
|
+
* **Node.js only** — this method requires the `stream` built-in module and is
|
|
592
|
+
* intended for server-side integrations such as Vapi custom TTS endpoints,
|
|
593
|
+
* Express/Fastify handlers, or any pipeline that expects a Node.js `Readable`.
|
|
594
|
+
*
|
|
595
|
+
* Compared to manually wiring `onChunk` to a `Readable`, this method avoids
|
|
596
|
+
* a common race-condition: the stream object is created and returned **before**
|
|
597
|
+
* any chunks arrive, so the caller can safely pipe or attach listeners before
|
|
598
|
+
* the first audio byte is pushed.
|
|
599
|
+
*
|
|
600
|
+
* @example Vapi custom TTS endpoint
|
|
601
|
+
* ```typescript
|
|
602
|
+
* app.post('/synthesize', (req, res) => {
|
|
603
|
+
* res.setHeader('Content-Type', 'audio/pcm');
|
|
604
|
+
* res.setHeader('Transfer-Encoding', 'chunked');
|
|
605
|
+
*
|
|
606
|
+
* const readable = client.tts.toReadable({
|
|
607
|
+
* text: req.body.message.text,
|
|
608
|
+
* modelId: 'kugel-1-turbo',
|
|
609
|
+
* sampleRate: req.body.message.sampleRate,
|
|
610
|
+
* language: 'en',
|
|
611
|
+
* });
|
|
612
|
+
*
|
|
613
|
+
* readable.pipe(res);
|
|
614
|
+
* });
|
|
615
|
+
* ```
|
|
616
|
+
*
|
|
617
|
+
* @param options - TTS generation options (same as `stream()`)
|
|
618
|
+
* @param reuseConnection - Reuse the pooled WebSocket connection (default: true)
|
|
619
|
+
* @returns Node.js Readable stream emitting raw PCM16 binary Buffer chunks
|
|
620
|
+
*/
|
|
621
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
622
|
+
toReadable(options, reuseConnection = true) {
|
|
623
|
+
const { Readable } = __require("stream");
|
|
624
|
+
const readable = new Readable({ read() {
|
|
625
|
+
} });
|
|
626
|
+
this.stream(
|
|
627
|
+
options,
|
|
628
|
+
{
|
|
629
|
+
onChunk: (chunk) => {
|
|
630
|
+
readable.push(Buffer.from(chunk.audio, "base64"));
|
|
631
|
+
},
|
|
632
|
+
onFinal: () => {
|
|
633
|
+
readable.push(null);
|
|
634
|
+
},
|
|
635
|
+
onError: (error) => {
|
|
636
|
+
readable.destroy(error);
|
|
637
|
+
}
|
|
638
|
+
},
|
|
639
|
+
reuseConnection
|
|
640
|
+
).catch((error) => {
|
|
641
|
+
readable.destroy(error);
|
|
642
|
+
});
|
|
643
|
+
return readable;
|
|
644
|
+
}
|
|
281
645
|
/**
|
|
282
646
|
* Build the WebSocket URL with appropriate auth param.
|
|
283
647
|
*/
|
|
@@ -319,10 +683,17 @@ var TTSResource = class {
|
|
|
319
683
|
this.wsConnection = ws;
|
|
320
684
|
this.wsUrl = url;
|
|
321
685
|
this.setupMessageHandler(ws);
|
|
686
|
+
this.startKeepalive(ws);
|
|
322
687
|
resolve(ws);
|
|
323
688
|
};
|
|
324
|
-
ws.onerror = () => {
|
|
325
|
-
|
|
689
|
+
ws.onerror = (event) => {
|
|
690
|
+
const underlying = event?.error ?? event;
|
|
691
|
+
const typed = classifyWsHandshakeError(underlying);
|
|
692
|
+
reject(
|
|
693
|
+
typed ?? new ConnectionError(
|
|
694
|
+
`Could not establish KugelAudio WebSocket connection to ${url}. Check network connectivity.`
|
|
695
|
+
)
|
|
696
|
+
);
|
|
326
697
|
};
|
|
327
698
|
});
|
|
328
699
|
}
|
|
@@ -337,7 +708,7 @@ var TTSResource = class {
|
|
|
337
708
|
const [requestId, pending] = [...this.pendingRequests.entries()][0] || [];
|
|
338
709
|
if (!pending) return;
|
|
339
710
|
if (data.error) {
|
|
340
|
-
const error = this.parseError(data
|
|
711
|
+
const error = this.parseError(data);
|
|
341
712
|
pending.callbacks.onError?.(error);
|
|
342
713
|
this.pendingRequests.delete(requestId);
|
|
343
714
|
pending.reject(error);
|
|
@@ -350,7 +721,6 @@ var TTSResource = class {
|
|
|
350
721
|
totalSamples: data.total_samples,
|
|
351
722
|
durationMs: data.dur_ms,
|
|
352
723
|
generationMs: data.gen_ms,
|
|
353
|
-
ttfaMs: data.ttfa_ms,
|
|
354
724
|
rtf: data.rtf,
|
|
355
725
|
error: data.error
|
|
356
726
|
};
|
|
@@ -387,20 +757,23 @@ var TTSResource = class {
|
|
|
387
757
|
}
|
|
388
758
|
};
|
|
389
759
|
ws.onclose = (event) => {
|
|
760
|
+
this.stopKeepalive();
|
|
390
761
|
this.wsConnection = null;
|
|
391
762
|
this.wsUrl = null;
|
|
392
763
|
for (const [id, pending] of this.pendingRequests) {
|
|
393
764
|
pending.callbacks.onClose?.();
|
|
394
|
-
if (event.code === 4001) {
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
pending.reject(
|
|
765
|
+
if (event.code === 4001 || event.code === 4003 || event.code === 4029 || event.code === 4500) {
|
|
766
|
+
const error = classifyWsClose(event.code, event.reason);
|
|
767
|
+
pending.callbacks.onError?.(error);
|
|
768
|
+
pending.reject(error);
|
|
398
769
|
}
|
|
399
770
|
this.pendingRequests.delete(id);
|
|
400
771
|
}
|
|
401
772
|
};
|
|
402
773
|
ws.onerror = () => {
|
|
403
|
-
const error = new
|
|
774
|
+
const error = new ConnectionError(
|
|
775
|
+
"KugelAudio WebSocket connection error. Check network connectivity."
|
|
776
|
+
);
|
|
404
777
|
for (const [id, pending] of this.pendingRequests) {
|
|
405
778
|
pending.callbacks.onError?.(error);
|
|
406
779
|
pending.reject(error);
|
|
@@ -426,6 +799,7 @@ var TTSResource = class {
|
|
|
426
799
|
* Stream with connection pooling (fast path).
|
|
427
800
|
*/
|
|
428
801
|
async streamWithPooling(options, callbacks) {
|
|
802
|
+
warnIfNoLanguage(options.language, options.normalize);
|
|
429
803
|
const ws = await this.getConnection();
|
|
430
804
|
const requestId = ++this.requestCounter;
|
|
431
805
|
return new Promise((resolve, reject) => {
|
|
@@ -436,11 +810,14 @@ var TTSResource = class {
|
|
|
436
810
|
model_id: options.modelId || "kugel-1-turbo",
|
|
437
811
|
voice_id: options.voiceId,
|
|
438
812
|
cfg_scale: options.cfgScale ?? 2,
|
|
813
|
+
...options.temperature !== void 0 && { temperature: options.temperature },
|
|
439
814
|
max_new_tokens: options.maxNewTokens ?? 2048,
|
|
440
815
|
sample_rate: options.sampleRate ?? 24e3,
|
|
441
816
|
normalize: options.normalize ?? true,
|
|
442
817
|
...options.language && { language: options.language },
|
|
443
|
-
...options.wordTimestamps && { word_timestamps: true }
|
|
818
|
+
...options.wordTimestamps && { word_timestamps: true },
|
|
819
|
+
...options.speed !== void 0 && { speed: options.speed },
|
|
820
|
+
...options.projectId !== void 0 && { project_id: options.projectId }
|
|
444
821
|
}));
|
|
445
822
|
});
|
|
446
823
|
}
|
|
@@ -448,6 +825,7 @@ var TTSResource = class {
|
|
|
448
825
|
* Stream without connection pooling (original behavior).
|
|
449
826
|
*/
|
|
450
827
|
streamWithoutPooling(options, callbacks) {
|
|
828
|
+
warnIfNoLanguage(options.language, options.normalize);
|
|
451
829
|
return new Promise((resolve, reject) => {
|
|
452
830
|
const url = this.buildWsUrl();
|
|
453
831
|
const ws = createWs(url);
|
|
@@ -462,7 +840,9 @@ var TTSResource = class {
|
|
|
462
840
|
sample_rate: options.sampleRate ?? 24e3,
|
|
463
841
|
normalize: options.normalize ?? true,
|
|
464
842
|
...options.language && { language: options.language },
|
|
465
|
-
...options.wordTimestamps && { word_timestamps: true }
|
|
843
|
+
...options.wordTimestamps && { word_timestamps: true },
|
|
844
|
+
...options.speed !== void 0 && { speed: options.speed },
|
|
845
|
+
...options.projectId !== void 0 && { project_id: options.projectId }
|
|
466
846
|
}));
|
|
467
847
|
};
|
|
468
848
|
ws.onmessage = (event) => {
|
|
@@ -470,7 +850,7 @@ var TTSResource = class {
|
|
|
470
850
|
const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
471
851
|
const data = JSON.parse(messageData);
|
|
472
852
|
if (data.error) {
|
|
473
|
-
const error = this.parseError(data
|
|
853
|
+
const error = this.parseError(data);
|
|
474
854
|
callbacks.onError?.(error);
|
|
475
855
|
ws.close();
|
|
476
856
|
reject(error);
|
|
@@ -483,7 +863,6 @@ var TTSResource = class {
|
|
|
483
863
|
totalSamples: data.total_samples,
|
|
484
864
|
durationMs: data.dur_ms,
|
|
485
865
|
generationMs: data.gen_ms,
|
|
486
|
-
ttfaMs: data.ttfa_ms,
|
|
487
866
|
rtf: data.rtf,
|
|
488
867
|
error: data.error
|
|
489
868
|
};
|
|
@@ -519,25 +898,54 @@ var TTSResource = class {
|
|
|
519
898
|
console.error("Failed to parse WebSocket message:", e);
|
|
520
899
|
}
|
|
521
900
|
};
|
|
522
|
-
ws.onerror = () => {
|
|
523
|
-
const
|
|
901
|
+
ws.onerror = (event) => {
|
|
902
|
+
const underlying = event?.error ?? event;
|
|
903
|
+
const error = classifyWsHandshakeError(underlying) ?? new ConnectionError(
|
|
904
|
+
"KugelAudio WebSocket connection error. Check network connectivity."
|
|
905
|
+
);
|
|
524
906
|
callbacks.onError?.(error);
|
|
525
907
|
reject(error);
|
|
526
908
|
};
|
|
527
909
|
ws.onclose = (event) => {
|
|
528
910
|
callbacks.onClose?.();
|
|
529
|
-
if (event.code === 4001) {
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
reject(
|
|
911
|
+
if (event.code === 4001 || event.code === 4003 || event.code === 4029 || event.code === 4500) {
|
|
912
|
+
const error = classifyWsClose(event.code, event.reason);
|
|
913
|
+
callbacks.onError?.(error);
|
|
914
|
+
reject(error);
|
|
533
915
|
}
|
|
534
916
|
};
|
|
535
917
|
});
|
|
536
918
|
}
|
|
919
|
+
/**
|
|
920
|
+
* Start periodic keepalive pings on the pooled connection.
|
|
921
|
+
* Uses the ws package's ping() in Node.js; silently skips in browsers
|
|
922
|
+
* where WebSocket doesn't expose a ping method.
|
|
923
|
+
*/
|
|
924
|
+
startKeepalive(ws) {
|
|
925
|
+
this.stopKeepalive();
|
|
926
|
+
const intervalMs = this.client.keepalivePingInterval;
|
|
927
|
+
if (intervalMs == null || intervalMs <= 0) return;
|
|
928
|
+
this.keepaliveTimer = setInterval(() => {
|
|
929
|
+
if (this.wsConnection !== ws || ws.readyState !== WS_OPEN) {
|
|
930
|
+
this.stopKeepalive();
|
|
931
|
+
return;
|
|
932
|
+
}
|
|
933
|
+
if (typeof ws.ping === "function") {
|
|
934
|
+
ws.ping();
|
|
935
|
+
}
|
|
936
|
+
}, intervalMs);
|
|
937
|
+
}
|
|
938
|
+
stopKeepalive() {
|
|
939
|
+
if (this.keepaliveTimer !== null) {
|
|
940
|
+
clearInterval(this.keepaliveTimer);
|
|
941
|
+
this.keepaliveTimer = null;
|
|
942
|
+
}
|
|
943
|
+
}
|
|
537
944
|
/**
|
|
538
945
|
* Close the pooled WebSocket connection.
|
|
539
946
|
*/
|
|
540
947
|
close() {
|
|
948
|
+
this.stopKeepalive();
|
|
541
949
|
if (this.wsConnection) {
|
|
542
950
|
try {
|
|
543
951
|
this.wsConnection.close();
|
|
@@ -547,15 +955,39 @@ var TTSResource = class {
|
|
|
547
955
|
this.wsUrl = null;
|
|
548
956
|
}
|
|
549
957
|
}
|
|
550
|
-
parseError(
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
958
|
+
parseError(data) {
|
|
959
|
+
return classifyWsFrame(data);
|
|
960
|
+
}
|
|
961
|
+
/**
|
|
962
|
+
* Create a streaming session for LLM integration.
|
|
963
|
+
*
|
|
964
|
+
* The session connects to `/ws/tts/stream` and keeps a persistent
|
|
965
|
+
* connection across multiple {@link StreamingSession.send} calls.
|
|
966
|
+
* The server auto-chunks text at sentence boundaries — no client-side
|
|
967
|
+
* flushing required.
|
|
968
|
+
*
|
|
969
|
+
* @param config - Session configuration (voice, model, chunking strategy).
|
|
970
|
+
* @param callbacks - Callbacks for audio chunks and session lifecycle events.
|
|
971
|
+
* @returns A {@link StreamingSession} instance. Call `.connect()` before sending.
|
|
972
|
+
*
|
|
973
|
+
* @example
|
|
974
|
+
* ```typescript
|
|
975
|
+
* const session = client.tts.streamingSession(
|
|
976
|
+
* { voiceId: 123, autoMode: true, chunkLengthSchedule: [50, 100, 150, 250] },
|
|
977
|
+
* { onChunk: (chunk) => playAudio(chunk.audio) },
|
|
978
|
+
* );
|
|
979
|
+
*
|
|
980
|
+
* session.connect();
|
|
981
|
+
*
|
|
982
|
+
* for await (const token of llmStream) {
|
|
983
|
+
* session.send(token);
|
|
984
|
+
* }
|
|
985
|
+
*
|
|
986
|
+
* await session.close();
|
|
987
|
+
* ```
|
|
988
|
+
*/
|
|
989
|
+
streamingSession(config, callbacks) {
|
|
990
|
+
return new StreamingSession(this.client, config, callbacks);
|
|
559
991
|
}
|
|
560
992
|
/**
|
|
561
993
|
* Create a multi-context session for concurrent TTS streams.
|
|
@@ -575,7 +1007,7 @@ var TTSResource = class {
|
|
|
575
1007
|
* console.log(`Audio from ${chunk.contextId}`);
|
|
576
1008
|
* playAudio(chunk.audio);
|
|
577
1009
|
* },
|
|
578
|
-
*
|
|
1010
|
+
* onContextClosed: (contextId) => {
|
|
579
1011
|
* console.log(`${contextId} finished`);
|
|
580
1012
|
* },
|
|
581
1013
|
* });
|
|
@@ -614,6 +1046,11 @@ var MultiContextSession = class {
|
|
|
614
1046
|
}
|
|
615
1047
|
/**
|
|
616
1048
|
* Connect to the multi-context WebSocket endpoint.
|
|
1049
|
+
*
|
|
1050
|
+
* The returned promise resolves once the WebSocket is OPEN so callers can
|
|
1051
|
+
* ``await session.connect(callbacks)`` before invoking
|
|
1052
|
+
* {@link createContext} / {@link send}. Pre-open errors reject with the
|
|
1053
|
+
* typed error.
|
|
617
1054
|
*/
|
|
618
1055
|
connect(callbacks) {
|
|
619
1056
|
this.callbacks = callbacks;
|
|
@@ -628,9 +1065,8 @@ var MultiContextSession = class {
|
|
|
628
1065
|
}
|
|
629
1066
|
const url = `${wsUrl}/ws/tts/multi?${authParam}=${this.client.apiKey}`;
|
|
630
1067
|
this.ws = createWs(url);
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
this.ws.onmessage = (event) => {
|
|
1068
|
+
const ws = this.ws;
|
|
1069
|
+
ws.onmessage = (event) => {
|
|
634
1070
|
try {
|
|
635
1071
|
const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
636
1072
|
const data = JSON.parse(messageData);
|
|
@@ -661,9 +1097,6 @@ var MultiContextSession = class {
|
|
|
661
1097
|
};
|
|
662
1098
|
this.callbacks.onChunk?.(chunk);
|
|
663
1099
|
}
|
|
664
|
-
if (data.is_final) {
|
|
665
|
-
this.callbacks.onContextFinal?.(data.context_id);
|
|
666
|
-
}
|
|
667
1100
|
if (data.context_closed) {
|
|
668
1101
|
this.contexts.delete(data.context_id);
|
|
669
1102
|
this.callbacks.onContextClosed?.(data.context_id);
|
|
@@ -679,19 +1112,38 @@ var MultiContextSession = class {
|
|
|
679
1112
|
console.error("Failed to parse WebSocket message:", e);
|
|
680
1113
|
}
|
|
681
1114
|
};
|
|
682
|
-
|
|
683
|
-
|
|
684
|
-
|
|
685
|
-
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
1115
|
+
return new Promise((resolve, reject) => {
|
|
1116
|
+
let opened = false;
|
|
1117
|
+
ws.onopen = () => {
|
|
1118
|
+
opened = true;
|
|
1119
|
+
resolve();
|
|
1120
|
+
};
|
|
1121
|
+
ws.onerror = (event) => {
|
|
1122
|
+
const underlying = event?.error ?? event;
|
|
1123
|
+
const err = classifyWsHandshakeError(underlying) ?? new ConnectionError(
|
|
1124
|
+
"KugelAudio multi-context WebSocket connection error. Check network connectivity."
|
|
1125
|
+
);
|
|
1126
|
+
if (!opened) reject(err);
|
|
1127
|
+
this.callbacks.onError?.(err);
|
|
1128
|
+
};
|
|
1129
|
+
ws.onclose = (event) => {
|
|
1130
|
+
let typedErr = null;
|
|
1131
|
+
if (event.code === 4001 || event.code === 4003 || event.code === 4029 || event.code === 4500) {
|
|
1132
|
+
typedErr = classifyWsClose(event.code, event.reason);
|
|
1133
|
+
this.callbacks.onError?.(typedErr);
|
|
1134
|
+
}
|
|
1135
|
+
if (!opened) {
|
|
1136
|
+
reject(
|
|
1137
|
+
typedErr ?? new ConnectionError(
|
|
1138
|
+
`KugelAudio multi-context WebSocket closed before ready (code ${event.code}).`
|
|
1139
|
+
)
|
|
1140
|
+
);
|
|
1141
|
+
}
|
|
1142
|
+
this.ws = null;
|
|
1143
|
+
this.isStarted = false;
|
|
1144
|
+
this.contexts.clear();
|
|
1145
|
+
};
|
|
1146
|
+
});
|
|
695
1147
|
}
|
|
696
1148
|
/**
|
|
697
1149
|
* Create a new context with optional voice settings.
|
|
@@ -705,10 +1157,13 @@ var MultiContextSession = class {
|
|
|
705
1157
|
context_id: contextId
|
|
706
1158
|
};
|
|
707
1159
|
if (!this.isStarted) {
|
|
1160
|
+
warnIfNoLanguage(this.config.language, this.config.normalize);
|
|
708
1161
|
if (this.config.sampleRate) msg.sample_rate = this.config.sampleRate;
|
|
709
1162
|
if (this.config.cfgScale) msg.cfg_scale = this.config.cfgScale;
|
|
1163
|
+
if (this.config.temperature !== void 0) msg.temperature = this.config.temperature;
|
|
710
1164
|
if (this.config.maxNewTokens) msg.max_new_tokens = this.config.maxNewTokens;
|
|
711
1165
|
if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
|
|
1166
|
+
if (this.config.language) msg.language = this.config.language;
|
|
712
1167
|
if (this.config.inactivityTimeout) msg.inactivity_timeout = this.config.inactivityTimeout;
|
|
713
1168
|
}
|
|
714
1169
|
const voiceId = options?.voiceId || this.config.defaultVoiceId;
|
|
@@ -795,18 +1250,271 @@ var MultiContextSession = class {
|
|
|
795
1250
|
return this.ws !== null && this.ws.readyState === WS_OPEN;
|
|
796
1251
|
}
|
|
797
1252
|
};
|
|
1253
|
+
var StreamingSession = class {
|
|
1254
|
+
constructor(client, config, callbacks) {
|
|
1255
|
+
this.ws = null;
|
|
1256
|
+
this.configSent = false;
|
|
1257
|
+
this.client = client;
|
|
1258
|
+
this.config = config;
|
|
1259
|
+
this.callbacks = callbacks;
|
|
1260
|
+
}
|
|
1261
|
+
/**
|
|
1262
|
+
* Open the WebSocket connection and authenticate.
|
|
1263
|
+
*
|
|
1264
|
+
* The returned promise resolves once the WebSocket is OPEN, so callers can
|
|
1265
|
+
* ``await session.connect()`` and then ``send()`` without racing the
|
|
1266
|
+
* handshake. Pre-open errors (network failure, 4001 unauthorized, …) reject
|
|
1267
|
+
* the promise with the typed error.
|
|
1268
|
+
*/
|
|
1269
|
+
connect() {
|
|
1270
|
+
const wsUrl = this.client.ttsUrl.replace("https://", "wss://").replace("http://", "ws://");
|
|
1271
|
+
let authParam;
|
|
1272
|
+
if (this.client.isToken) {
|
|
1273
|
+
authParam = "token";
|
|
1274
|
+
} else if (this.client.isMasterKey) {
|
|
1275
|
+
authParam = "master_key";
|
|
1276
|
+
} else {
|
|
1277
|
+
authParam = "api_key";
|
|
1278
|
+
}
|
|
1279
|
+
const url = `${wsUrl}/ws/tts/stream?${authParam}=${this.client.apiKey}`;
|
|
1280
|
+
this.ws = createWs(url);
|
|
1281
|
+
const ws = this.ws;
|
|
1282
|
+
ws.onmessage = (event) => {
|
|
1283
|
+
try {
|
|
1284
|
+
const messageData = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
1285
|
+
const data = JSON.parse(messageData);
|
|
1286
|
+
if (data.error) {
|
|
1287
|
+
this.callbacks.onError?.(new KugelAudioError(data.error));
|
|
1288
|
+
return;
|
|
1289
|
+
}
|
|
1290
|
+
if (data.audio) {
|
|
1291
|
+
const chunk = {
|
|
1292
|
+
audio: data.audio,
|
|
1293
|
+
encoding: data.enc || "pcm_s16le",
|
|
1294
|
+
index: data.idx,
|
|
1295
|
+
sampleRate: data.sr,
|
|
1296
|
+
samples: data.samples
|
|
1297
|
+
};
|
|
1298
|
+
this.callbacks.onChunk?.(chunk);
|
|
1299
|
+
}
|
|
1300
|
+
if (data.word_timestamps) {
|
|
1301
|
+
const timestamps = data.word_timestamps.map((w) => ({
|
|
1302
|
+
word: w.word,
|
|
1303
|
+
startMs: w.start_ms,
|
|
1304
|
+
endMs: w.end_ms,
|
|
1305
|
+
charStart: w.char_start,
|
|
1306
|
+
charEnd: w.char_end,
|
|
1307
|
+
score: w.score ?? 1
|
|
1308
|
+
}));
|
|
1309
|
+
this.callbacks.onWordTimestamps?.(timestamps);
|
|
1310
|
+
}
|
|
1311
|
+
if (data.chunk_complete) {
|
|
1312
|
+
this.callbacks.onChunkComplete?.(
|
|
1313
|
+
data.chunk_id ?? 0,
|
|
1314
|
+
data.audio_seconds ?? 0,
|
|
1315
|
+
data.gen_ms ?? 0
|
|
1316
|
+
);
|
|
1317
|
+
}
|
|
1318
|
+
if (data.generation_started) {
|
|
1319
|
+
this.callbacks.onGenerationStarted?.(data.chunk_id ?? 0, data.text ?? "");
|
|
1320
|
+
}
|
|
1321
|
+
if (data.session_closed) {
|
|
1322
|
+
this.callbacks.onSessionClosed?.(
|
|
1323
|
+
data.total_audio_seconds ?? 0,
|
|
1324
|
+
data.total_text_chunks ?? 0,
|
|
1325
|
+
data.total_audio_chunks ?? 0
|
|
1326
|
+
);
|
|
1327
|
+
}
|
|
1328
|
+
} catch (e) {
|
|
1329
|
+
console.error("[KugelAudio] Failed to parse streaming session message:", e);
|
|
1330
|
+
}
|
|
1331
|
+
};
|
|
1332
|
+
return new Promise((resolve, reject) => {
|
|
1333
|
+
let opened = false;
|
|
1334
|
+
ws.onopen = () => {
|
|
1335
|
+
opened = true;
|
|
1336
|
+
resolve();
|
|
1337
|
+
};
|
|
1338
|
+
ws.onerror = (event) => {
|
|
1339
|
+
const underlying = event?.error ?? event;
|
|
1340
|
+
const err = classifyWsHandshakeError(underlying) ?? new ConnectionError(
|
|
1341
|
+
"KugelAudio streaming WebSocket connection error. Check network connectivity."
|
|
1342
|
+
);
|
|
1343
|
+
if (!opened) reject(err);
|
|
1344
|
+
this.callbacks.onError?.(err);
|
|
1345
|
+
};
|
|
1346
|
+
ws.onclose = (event) => {
|
|
1347
|
+
let typedErr = null;
|
|
1348
|
+
if (event.code === 4001 || event.code === 4003 || event.code === 4029 || event.code === 4500) {
|
|
1349
|
+
typedErr = classifyWsClose(event.code, event.reason);
|
|
1350
|
+
this.callbacks.onError?.(typedErr);
|
|
1351
|
+
}
|
|
1352
|
+
if (!opened) {
|
|
1353
|
+
reject(
|
|
1354
|
+
typedErr ?? new ConnectionError(
|
|
1355
|
+
`KugelAudio streaming WebSocket closed before ready (code ${event.code}).`
|
|
1356
|
+
)
|
|
1357
|
+
);
|
|
1358
|
+
}
|
|
1359
|
+
this.ws = null;
|
|
1360
|
+
this.configSent = false;
|
|
1361
|
+
};
|
|
1362
|
+
});
|
|
1363
|
+
}
|
|
1364
|
+
/**
|
|
1365
|
+
* Send a text chunk to the server (e.g. one LLM output token).
|
|
1366
|
+
*
|
|
1367
|
+
* The server buffers text across multiple calls and starts generating at
|
|
1368
|
+
* natural sentence boundaries automatically — no need to call `flush`.
|
|
1369
|
+
*
|
|
1370
|
+
* @param text - Raw text or LLM token to append to the server buffer.
|
|
1371
|
+
* @param flush - Force immediate generation of whatever is buffered.
|
|
1372
|
+
* **Avoid calling this per-sentence from the client.** Doing so bypasses
|
|
1373
|
+
* the server's semantic chunking, incurs a fresh model prefill cost on
|
|
1374
|
+
* every flush, and makes latency *worse*, not better. Let the server
|
|
1375
|
+
* handle chunking via `chunkLengthSchedule` / `autoMode` instead.
|
|
1376
|
+
*/
|
|
1377
|
+
send(text, flush = false) {
|
|
1378
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) {
|
|
1379
|
+
throw new KugelAudioError("StreamingSession not connected. Call connect() first.");
|
|
1380
|
+
}
|
|
1381
|
+
const msg = { text, flush };
|
|
1382
|
+
if (!this.configSent) {
|
|
1383
|
+
if (this.config.voiceId !== void 0) msg.voice_id = this.config.voiceId;
|
|
1384
|
+
if (this.config.modelId !== void 0) msg.model_id = this.config.modelId;
|
|
1385
|
+
if (this.config.cfgScale !== void 0) msg.cfg_scale = this.config.cfgScale;
|
|
1386
|
+
if (this.config.temperature !== void 0) msg.temperature = this.config.temperature;
|
|
1387
|
+
if (this.config.maxNewTokens !== void 0) msg.max_new_tokens = this.config.maxNewTokens;
|
|
1388
|
+
if (this.config.sampleRate !== void 0) msg.sample_rate = this.config.sampleRate;
|
|
1389
|
+
if (this.config.flushTimeoutMs !== void 0) msg.flush_timeout_ms = this.config.flushTimeoutMs;
|
|
1390
|
+
if (this.config.maxBufferLength !== void 0) msg.max_buffer_length = this.config.maxBufferLength;
|
|
1391
|
+
if (this.config.normalize !== void 0) msg.normalize = this.config.normalize;
|
|
1392
|
+
if (this.config.language !== void 0) msg.language = this.config.language;
|
|
1393
|
+
if (this.config.wordTimestamps) msg.word_timestamps = true;
|
|
1394
|
+
if (this.config.autoMode !== void 0) msg.auto_mode = this.config.autoMode;
|
|
1395
|
+
if (this.config.chunkLengthSchedule?.length) msg.chunk_length_schedule = this.config.chunkLengthSchedule;
|
|
1396
|
+
if (this.config.speed !== void 0) msg.speed = this.config.speed;
|
|
1397
|
+
this.configSent = true;
|
|
1398
|
+
}
|
|
1399
|
+
this.ws.send(JSON.stringify(msg));
|
|
1400
|
+
}
|
|
1401
|
+
/**
|
|
1402
|
+
* End the current session but keep the WebSocket connection open.
|
|
1403
|
+
*
|
|
1404
|
+
* This allows starting a new session on the same connection, avoiding
|
|
1405
|
+
* the overhead of a new WebSocket handshake (~200-300ms). After calling
|
|
1406
|
+
* this, optionally call {@link updateConfig} to change voice/model settings,
|
|
1407
|
+
* then call {@link send} to start the next session.
|
|
1408
|
+
*
|
|
1409
|
+
* The returned promise resolves once the server confirms with a
|
|
1410
|
+
* `session_closed` message, or after a 15 s **quiet** timeout — i.e. 15 s
|
|
1411
|
+
* elapse without *any* server message arriving. The timer resets on every
|
|
1412
|
+
* incoming frame so a long final flush that streams audio for tens of
|
|
1413
|
+
* seconds is not truncated; only a genuinely silent server trips the fuse.
|
|
1414
|
+
*/
|
|
1415
|
+
endSession() {
|
|
1416
|
+
if (!this.ws || this.ws.readyState !== WS_OPEN) return Promise.resolve();
|
|
1417
|
+
const ws = this.ws;
|
|
1418
|
+
const QUIET_TIMEOUT_MS = 15e3;
|
|
1419
|
+
return new Promise((resolve) => {
|
|
1420
|
+
let settled = false;
|
|
1421
|
+
let timer;
|
|
1422
|
+
const prevMessage = ws.onmessage;
|
|
1423
|
+
const prevClose = ws.onclose;
|
|
1424
|
+
const done = () => {
|
|
1425
|
+
if (settled) return;
|
|
1426
|
+
settled = true;
|
|
1427
|
+
clearTimeout(timer);
|
|
1428
|
+
ws.onmessage = prevMessage;
|
|
1429
|
+
ws.onclose = prevClose;
|
|
1430
|
+
this.configSent = false;
|
|
1431
|
+
resolve();
|
|
1432
|
+
};
|
|
1433
|
+
const armQuietTimer = () => {
|
|
1434
|
+
clearTimeout(timer);
|
|
1435
|
+
timer = setTimeout(done, QUIET_TIMEOUT_MS);
|
|
1436
|
+
};
|
|
1437
|
+
armQuietTimer();
|
|
1438
|
+
ws.onmessage = (event) => {
|
|
1439
|
+
armQuietTimer();
|
|
1440
|
+
if (prevMessage) prevMessage.call(ws, event);
|
|
1441
|
+
try {
|
|
1442
|
+
const raw = typeof event.data === "string" ? event.data : event.data instanceof Buffer ? event.data.toString() : String(event.data);
|
|
1443
|
+
if (JSON.parse(raw).session_closed) done();
|
|
1444
|
+
} catch {
|
|
1445
|
+
}
|
|
1446
|
+
};
|
|
1447
|
+
ws.onclose = (event) => {
|
|
1448
|
+
this.ws = null;
|
|
1449
|
+
if (prevClose) prevClose.call(ws, event);
|
|
1450
|
+
done();
|
|
1451
|
+
};
|
|
1452
|
+
ws.send(JSON.stringify({ close: true }));
|
|
1453
|
+
});
|
|
1454
|
+
}
|
|
1455
|
+
/**
|
|
1456
|
+
* Update session configuration for the next session.
|
|
1457
|
+
*
|
|
1458
|
+
* Call this after {@link endSession} and before the next {@link send}
|
|
1459
|
+
* to change voice, model, language, or other settings.
|
|
1460
|
+
*/
|
|
1461
|
+
updateConfig(config) {
|
|
1462
|
+
Object.assign(this.config, config);
|
|
1463
|
+
this.configSent = false;
|
|
1464
|
+
}
|
|
1465
|
+
/**
|
|
1466
|
+
* Close the session and the WebSocket connection.
|
|
1467
|
+
*
|
|
1468
|
+
* For session reuse without closing the connection, use
|
|
1469
|
+
* {@link endSession} instead.
|
|
1470
|
+
*
|
|
1471
|
+
* The returned promise resolves once the server confirms the close with a
|
|
1472
|
+
* `session_closed` message, or after a 15 s **quiet** timeout (no traffic
|
|
1473
|
+
* from the server in that window). Audio frames from the server-side
|
|
1474
|
+
* final-flush of the still-buffered text are delivered to your callbacks
|
|
1475
|
+
* before this promise resolves, and each frame resets the quiet timer.
|
|
1476
|
+
*/
|
|
1477
|
+
async close() {
|
|
1478
|
+
await this.endSession();
|
|
1479
|
+
if (this.ws) {
|
|
1480
|
+
try {
|
|
1481
|
+
this.ws.close();
|
|
1482
|
+
} catch {
|
|
1483
|
+
}
|
|
1484
|
+
this.ws = null;
|
|
1485
|
+
}
|
|
1486
|
+
}
|
|
1487
|
+
/** Whether the underlying WebSocket is open. */
|
|
1488
|
+
get isConnected() {
|
|
1489
|
+
return this.ws !== null && this.ws.readyState === WS_OPEN;
|
|
1490
|
+
}
|
|
1491
|
+
};
|
|
798
1492
|
var KugelAudio = class _KugelAudio {
|
|
799
1493
|
constructor(options) {
|
|
800
1494
|
if (!options.apiKey) {
|
|
801
|
-
throw new
|
|
1495
|
+
throw new ValidationError(
|
|
1496
|
+
"KugelAudio API key is missing. Set the KUGELAUDIO_API_KEY environment variable or pass { apiKey: ... } to the client. Get a key at https://app.kugelaudio.com/settings/api-keys."
|
|
1497
|
+
);
|
|
802
1498
|
}
|
|
803
|
-
|
|
1499
|
+
const { cleanKey, detectedRegion } = parseApiKey(options.apiKey);
|
|
1500
|
+
this._apiKey = cleanKey;
|
|
804
1501
|
this._isMasterKey = options.isMasterKey || false;
|
|
805
1502
|
this._isToken = options.isToken || false;
|
|
806
1503
|
this._orgId = options.orgId;
|
|
807
|
-
|
|
1504
|
+
if (options.apiUrl) {
|
|
1505
|
+
this._apiUrl = options.apiUrl.replace(/\/$/, "");
|
|
1506
|
+
} else {
|
|
1507
|
+
const effectiveRegion = options.region || detectedRegion || "eu";
|
|
1508
|
+
if (!(effectiveRegion in REGION_URLS)) {
|
|
1509
|
+
throw new ValidationError(
|
|
1510
|
+
`Invalid region '${effectiveRegion}'. Must be one of: ${Object.keys(REGION_URLS).join(", ")}.`
|
|
1511
|
+
);
|
|
1512
|
+
}
|
|
1513
|
+
this._apiUrl = REGION_URLS[effectiveRegion];
|
|
1514
|
+
}
|
|
808
1515
|
this._ttsUrl = (options.ttsUrl || this._apiUrl).replace(/\/$/, "");
|
|
809
1516
|
this._timeout = options.timeout || 6e4;
|
|
1517
|
+
this._keepalivePingInterval = options.keepalivePingInterval !== void 0 ? options.keepalivePingInterval : 2e4;
|
|
810
1518
|
this.models = new ModelsResource(this);
|
|
811
1519
|
this.voices = new VoicesResource(this);
|
|
812
1520
|
this.tts = new TTSResource(this);
|
|
@@ -852,6 +1560,10 @@ var KugelAudio = class _KugelAudio {
|
|
|
852
1560
|
get ttsUrl() {
|
|
853
1561
|
return this._ttsUrl;
|
|
854
1562
|
}
|
|
1563
|
+
/** Get keepalive ping interval in milliseconds, or null if disabled. */
|
|
1564
|
+
get keepalivePingInterval() {
|
|
1565
|
+
return this._keepalivePingInterval;
|
|
1566
|
+
}
|
|
855
1567
|
/**
|
|
856
1568
|
* Close the client and release resources.
|
|
857
1569
|
* This closes any pooled WebSocket connections.
|
|
@@ -906,25 +1618,49 @@ var KugelAudio = class _KugelAudio {
|
|
|
906
1618
|
signal: controller.signal
|
|
907
1619
|
});
|
|
908
1620
|
clearTimeout(timeoutId);
|
|
909
|
-
if (response.
|
|
910
|
-
|
|
1621
|
+
if (!response.ok) {
|
|
1622
|
+
const text = await response.text();
|
|
1623
|
+
throw classifyHttpError(response.status, text, response.headers);
|
|
911
1624
|
}
|
|
912
|
-
|
|
913
|
-
|
|
1625
|
+
return await response.json();
|
|
1626
|
+
} catch (error) {
|
|
1627
|
+
clearTimeout(timeoutId);
|
|
1628
|
+
if (error instanceof KugelAudioError) {
|
|
1629
|
+
throw error;
|
|
914
1630
|
}
|
|
915
|
-
if (
|
|
916
|
-
throw new
|
|
1631
|
+
if (error.name === "AbortError") {
|
|
1632
|
+
throw new ConnectionError(
|
|
1633
|
+
`Request to ${method} ${path} timed out after ${this._timeout}ms.`
|
|
1634
|
+
);
|
|
917
1635
|
}
|
|
1636
|
+
throw new ConnectionError(
|
|
1637
|
+
`Could not reach KugelAudio at ${url}: ${error.message}. Check network connectivity.`
|
|
1638
|
+
);
|
|
1639
|
+
}
|
|
1640
|
+
}
|
|
1641
|
+
/**
|
|
1642
|
+
* Make a multipart/form-data request (for file uploads).
|
|
1643
|
+
* @internal Used by VoicesResource for reference file uploads.
|
|
1644
|
+
*/
|
|
1645
|
+
async requestMultipart(method, path, formData) {
|
|
1646
|
+
const url = `${this._apiUrl}${path}`;
|
|
1647
|
+
const headers = {
|
|
1648
|
+
"X-API-Key": this._apiKey,
|
|
1649
|
+
"Authorization": `Bearer ${this._apiKey}`
|
|
1650
|
+
};
|
|
1651
|
+
const controller = new AbortController();
|
|
1652
|
+
const timeoutId = setTimeout(() => controller.abort(), this._timeout);
|
|
1653
|
+
try {
|
|
1654
|
+
const response = await fetch(url, {
|
|
1655
|
+
method,
|
|
1656
|
+
headers,
|
|
1657
|
+
body: formData,
|
|
1658
|
+
signal: controller.signal
|
|
1659
|
+
});
|
|
1660
|
+
clearTimeout(timeoutId);
|
|
918
1661
|
if (!response.ok) {
|
|
919
1662
|
const text = await response.text();
|
|
920
|
-
|
|
921
|
-
try {
|
|
922
|
-
const json = JSON.parse(text);
|
|
923
|
-
message = json.detail || json.error || message;
|
|
924
|
-
} catch {
|
|
925
|
-
message = text || message;
|
|
926
|
-
}
|
|
927
|
-
throw new KugelAudioError(message, response.status);
|
|
1663
|
+
throw classifyHttpError(response.status, text, response.headers);
|
|
928
1664
|
}
|
|
929
1665
|
return await response.json();
|
|
930
1666
|
} catch (error) {
|
|
@@ -933,21 +1669,31 @@ var KugelAudio = class _KugelAudio {
|
|
|
933
1669
|
throw error;
|
|
934
1670
|
}
|
|
935
1671
|
if (error.name === "AbortError") {
|
|
936
|
-
throw new
|
|
1672
|
+
throw new ConnectionError(
|
|
1673
|
+
`Request to ${method} ${path} timed out after ${this._timeout}ms.`
|
|
1674
|
+
);
|
|
937
1675
|
}
|
|
938
|
-
throw new
|
|
1676
|
+
throw new ConnectionError(
|
|
1677
|
+
`Could not reach KugelAudio at ${url}: ${error.message}. Check network connectivity.`
|
|
1678
|
+
);
|
|
939
1679
|
}
|
|
940
1680
|
}
|
|
941
1681
|
};
|
|
942
1682
|
export {
|
|
943
1683
|
AuthenticationError,
|
|
944
1684
|
ConnectionError,
|
|
1685
|
+
ErrorCodes,
|
|
945
1686
|
InsufficientCreditsError,
|
|
946
1687
|
KugelAudio,
|
|
947
1688
|
KugelAudioError,
|
|
948
1689
|
RateLimitError,
|
|
949
1690
|
ValidationError,
|
|
1691
|
+
WsCloseCodes,
|
|
950
1692
|
base64ToArrayBuffer,
|
|
1693
|
+
classifyHttpError,
|
|
1694
|
+
classifyWsClose,
|
|
1695
|
+
classifyWsFrame,
|
|
1696
|
+
classifyWsHandshakeError,
|
|
951
1697
|
createWavBlob,
|
|
952
1698
|
createWavFile,
|
|
953
1699
|
decodePCM16
|